# EM algorithm to find MLE of coeff in mixed effects model Classic List Threaded 5 messages Open this post in threaded view
|

 Dear Paul, Thank you for your suggestion. I was moved by the fact that people are so nice to help learners and ask for nothing. With your help, I made some changes to my code and add more comments, try to make things more clear. If this R email list allow me to upload a pdf file to illustrate the formula, it will be great. The reason I do not use lme4 is that later I plan to use this for a more complicated model (Y,T), Y is the same response of mixed model and T is a drop out process. (Frankly I did it for the more complicated model first and got NaN hessian after one iteration, so I turned to the simple model.) In the code, the m loop is the EM iterations. About efficiency, thank you again for pointing it out. I compared sapply and for loop, they are tie and for loop is even better sometimes. In a paper by Bates, he mentioned that the asymptotic properties of beta is kind of independent of the other two parameters. But it seems that paper was submitted but I did not find it on google scholar. Not sure if it is the reason for my results. That is why I hope some expert who have done some simulation similar may be willing to give some suggestion. I may turn to other methods to calculate the conditional expection of the latent variable as the same time. Modified code is as below: # library(PerformanceAnalytics) # install.packages("gregmisc") # install.packages("statmod") library(gregmisc) library(MASS) library(statmod) ## function to calculate loglikelihood loglike <- function(datai = data.list[[i]], vvar = var.old,     beta = beta.old, psi = psi.old) {     temp1 <- -0.5 * log(det(vvar * diag(nrow(datai$Zi)) + datai$Zi %*%         psi %*% t(datai$Zi))) temp2 <- -0.5 * t(datai$yi - datai$Xi %*% beta) %*% solve(vvar * diag(nrow(datai$Zi)) + datai$Zi %*% psi %*% t(datai$Zi)) %*%         (datai$yi - datai$Xi %*% beta)     temp1 + temp2 } ## functions to evaluate the conditional expection, saved as Efun v2.R ## Eh1new=E(bibi') ## Eh2new=E(X'(y-Zbi)) ## Eh3new=E(bi'Z'Zbi) ## Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) ## Eh5new=E(Xibeta-yi)'Zibi ## Eh6new=E(bi) Eh1new <- function(datai = data.list[[i]], weights.m = weights.mat) {     bi <- datai$b tempb <- bi * rep(sqrt(weights.m[, 1] * weights.m[, 2]), 2) #quadratic b, so need sqrt t(tempb) %*% tempb/pi } # end of Eh1 # Eh2new=E(X'(y-Zbi)) Eh2new <- function(datai = data.list[[i]], weights.m = weights.mat) { bi <- datai$b     tempb <- bi * rep(weights.m[, 1] * weights.m[, 2], 2)     tt <- t(datai$Xi) %*% datai$yi - t(datai$Xi) %*% datai$Zi %*%         colSums(tempb)/pi     c(tt) }  # end of Eh2 # Eh3new=E(b'Z'Zbi) Eh3new <- function(datai = data.list[[i]], weights.m = weights.mat) {     bi <- datai$b tempb <- bi * rep(sqrt(weights.m[, 1] * weights.m[, 2]), 2) #quadratic b, so need sqrt sum(sapply(as.list(data.frame(datai$Zi %*% t(tempb))),         function(s) {             sum(s^2)         }))/pi }  # end of Eh3 # Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) Eh4new <- function(datai = data.list[[i]], weights.m = weights.mat,     beta = beta.old) {     bi <- datai$b tt <- sapply(as.list(ata.frame(c(datai$yi - datai$Xi %*% beta) - datai$Zi %*% t(bi))), function(s) {         sum(s^2)     }) * (weights.m[, 1] * weights.m[, 2])     sum(tt)/pi }  # end of Eh4 Eh4newv2 <- function(datai = data.list[[i]], weights.m = weights.mat,     beta = beta.old) {     bi <- datai$b v <- weights.m[, 1] * weights.m[, 2] temp <- c() for (i in 1:length(v)) { temp[i] <- sum(((datai$yi - datai$Xi %*% beta - datai$Zi %*%             bi[i, ]) * sqrt(v[i]))^2)     }     sum(temp)/pi }  # end of Eh4 # Eh5new=E(Xibeta-yi)'Zib Eh5new <- function(datai = data.list[[i]], weights.m = weights.mat,     beta = beta.old) {     bi <- datai$b tempb <- bi * rep(weights.m[, 1] * weights.m[, 2], 2) t(datai$Xi %*% beta - datai$yi) %*% (datai$Zi %*% t(tempb))     sum(t(datai$Xi %*% beta - datai$yi) %*% (datai$Zi %*% t(tempb)))/pi } Eh6new <- function(datai = data.list[[i]], weights.m = weights.mat) { bi <- datai$b     tempw <- weights.m[, 1] * weights.m[, 2]     for (i in 1:nrow(bi)) {         bi[i, ] <- bi[i, ] * tempw[i]     }     colMeans(bi)/pi }  # end of Eh6 ## the main R script ################### initial the values and generate the data ################## n <- 100                                               #number of observations beta <- c(-0.5, 1)                               #true coefficient of fixed effects vvar <- 2  #sigma^2=2                                  #true error variance of epsilon psi <- matrix(c(1, 0.2, 0.2, 1), 2, 2)                 #covariance matrix of random effects b b.m.true <- mvrnorm(n = n, mu = c(0, 0), Sigma = psi)  #100*2 matrix, each row is the b_i Xi <- cbind(rnorm(7, mean = 2, sd = 0.5), log(2:8)) Zi <- Xi y.m <- matrix(NA, nrow = n, ncol = nrow(Xi))            #100*7, each row is a y_i Zi=Xi for( i in 1:n) { y.m[i,]=mvrnorm(1,mu=(Xi%*%beta+Zi%*%b.m.true[i,]),vvar*diag(nrow(Xi))) } b.list <- as.list(data.frame(t(b.m.true))) psi.old <- matrix(c(0.5, 0.4, 0.4, 0.5), 2, 2)          #starting psi, beta and var,not the true values beta.old <- c(-0.3, 0.7) var.old <- 1.7 gausspar <- gauss.quad(10, kind = "hermite", alpha = 0, beta = 0) # data.list.wob contains X,Y and Z, but no b's data.list.wob <- list() for (i in 1:n) {     data.list.wob[[i]] <- list(Xi = Xi, yi = y.m[i, ], Zi = Zi) } # compute true loglikelihood and initial loglikelihood truelog <- 0 for (i in 1:length(data.list.wob)) {     truelog <- truelog + loglike(data.list.wob[[i]], vvar, beta, psi) } truelog loglikeseq <- c() loglikeseq <- sum(sapply(data.list.wob, loglike)) ECM <- F ############################# E-M steps ################################################ # m loop is the EM iteration for (m in 1:300) {     Sig.hat <- Zi %*% psi.old %*% t(Zi) + var.old * diag(nrow(Zi)) #estimated covariance matrix of y     Sig.hat.inv <- solve(Sig.hat)     W.hat <- psi.old - psi.old %*% t(Zi) %*% Sig.hat.inv %*%         Zi %*% psi.old     Sigb <- psi.old - psi.old %*% t(Zi) %*% Sig.hat.inv %*% Zi %*% psi.old  #estimated covariance matrix of b     Y.minus.X.beta <- t(t(y.m) - c(Xi %*% beta.old))     miu.m <- t(apply(Y.minus.X.beta, MARGIN = 1, function(s,         B = psi.old %*% t(Zi) %*% solve(Sig.hat)) {         B %*% s     }))  ### each row is the miu_i     tmp1 <- permutations(length(gausspar$nodes), 2, repeats.allowed = T) tmp2 <- c(tmp1) a.mat <- matrix(gausspar$nodes[tmp2], nrow = nrow(tmp1))  # a1,a1      # a1,a2   # ...   # a10,a9   # a10,a10   # a.mat are all ordered pairs of gauss hermite nodes     a.mat.list <- as.list(data.frame(t(a.mat)))     tmp1 <- permutations(length(gausspar$weights), 2, repeats.allowed = T) tmp2 <- c(tmp1) weights.mat <- matrix(gausspar$weights[tmp2], nrow = nrow(tmp1))  # w1,w1        # w1,w2       # ...       # w10,w9       # w10,w10       # weights.mat are all ordered pairs of gauss hermite weights     L <- chol(solve(W.hat))     LL <- sqrt(2) * solve(L)     halfb <- t(LL %*% t(a.mat))     # each page of b.array is all values of bi_k and bi_j for b_i     # need to calculate those b's as linear functions of nodes since the original integral needs transformation to use gauss approximation     b.list <- list()     for (i in 1:n) {         b.list[[i]] <- t(t(halfb) + miu.m[i, ])     }     # generate a list, each page contains Xi,yi,Zi, and b's     data.list <- list()     for (i in 1:n) {         data.list[[i]] <- list(Xi = Xi, yi = y.m[i, ], Zi = Zi,             b = b.list[[i]])     }     # update sigma^2     t1 <- proc.time()     tempaa <- c()     tempbb <- c()     for (j in 1:n) {         tempbb[j] <- Eh4newv2(datai = data.list[[j]], weights.m = weights.mat, beta = beta.old)     }     var.new <- mean(tempbb)     if (ECM == T) {         var.old <- var.new     }     sumXiXi <- matrix(rowSums(sapply(data.list, function(s) {         t(s$Xi) %*% (s$Xi)     })), ncol = ncol(Xi))     tempbb=sapply(data.list,Eh2new)     beta.new <- solve(sumXiXi) %*% rowSums(tempbb)     if (ECM == T) {         beta.old <- beta.new     }     # update psi     tempcc <- array(NA, c(2, 2, n))     sumtempcc <- matrix(0, 2, 2)     for (j in 1:n) {         tempcc[, , j] <- Eh1new(data.list[[j]], weights.m = weights.mat)         sumtempcc <- sumtempcc + tempcc[, , j]     }     psi.new <- sumtempcc/n     # stop condition     if (sum(abs(beta.old - beta.new)) + sum(abs(psi.old - psi.new)) +         sum(abs(var.old - var.new)) < 0.01) {         print("converge, stop")         break     }     # update parameters     var.old <- var.new     psi.old <- psi.new     beta.old <- beta.new     loglikeseq[m + 1] <- sum(sapply(data.list, loglike)) }  # end of M loop On Tue, Jul 3, 2012 at 5:06 PM, Paul Johnson <[hidden email]> wrote: > On Tue, Jul 3, 2012 at 12:41 PM, jimmycloud <[hidden email]> wrote: > > I have a general question about coefficients estimation of the mixed > model. > > > > I have 2 ideas for you. > > 1. Fit with lme4 package, using the lmer function. That's what it is for. > > 2. If you really want to write your own EM algorithm, I don't feel > sure that very many R and EM experts are going to want to read through > the code you have because you don't follow some of the minimal > readability guidelines.  I accept the fact that there is no officially > mandated R style guide, except for "indent with 4 spaces, not tabs." > But there are some almost universally accepted basics like > > 1. Use <-, not =, for assignment > 2. put a space before and  after symbols like <-, = , + , * , and so forth. > 3. I'd suggest you get used to putting squiggly braces in the K&R style. > > I have found the formatR package's tool tidy.source can do this > nicely. From tidy.source, here's what I get with your code > > http://pj.freefaculty.org/R/em2.R> > Much more readable! > (I inserted library(MASS) for you at the top, otherwise this doesn't > even survive the syntax check.) > > When I copy from email to Emacs, some line-breaking monkey-business > occurs, but I expect you get the idea. > > Now, looking at your cleaned up code, I can see some things to tidy up > to improve the chances that some expert will look. > > First, R functions don't require "return" at the end, many experts > consider it distracting. (Run "lm" or such and review how they write. > No return at the end of functions). > > Second, about that big for loop at the top, the one that goes from m 1:300 > > I don't know what that loop is doing, but there's some code in it that > I'm suspicious of. For example, this part: > >  W.hat <- psi.old - psi.old %*% t(Zi) %*% solve(Sig.hat) %*%  Zi %*% > psi.old > >     Sigb <- psi.old - psi.old %*% t(Zi) %*% solve(Sig.hat) %*%  Zi %*% > psi.old > > > First, you've caused the possibly slow calculation of solve >  (Sig.hat) to run two times.  If you really need it, run it once and > save the result. > > > Second, a for loop is not necessarily slow, but it may be easier to > read if you re-write this: > >  for (j in 1:n) { > tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) >         tempbb[j] <- Eh4newv2(datai = data.list[[j]], weights.m = > weights.mat, beta = beta.old) >  } > > like this: > > tempaa <- lapply(data.list, Eh4new, weights.m, beta.old) > tempbb <- lapply(data.list, Eh4newv2, weights.m, beta.old) > > Third, here is a no-no > > tempbb <- c() > for (j in 1:n) { >         tempbb <- cbind(tempbb, Eh2new(data.list[[j]], weights.m = > weights.mat)) >     } > > That will call cbind over and over, causing a relatively slow memory > re-allocation.  See > (http://pj.freefaculty.org/R/WorkingExamples/stackListItems.R) > > Instead, do this to apply Eh2new to each item in data.list > > tempbbtemp <- lapply(data.list, Eh2new, weights.mat) > > and then smash the results together in one go > > tempbb <- do.call("cbind", tempbbtemp) > > > Fourth, I'm not sure on the matrix algebra. Are you sure you need > solve to get the full inverse of Sig.hat?  Once you start checking > into how estimates are calculated in R, you find that the > paper-and-pencil matrix algebra style of formula is generally frowned > upon. OLS (in lm.fit) doesn't do solve(X'X), it uses a QR > decomposition of matrices.  OR look in MASS package ridge regression > code, where the SVD is used to get the inverse. > > I wish I knew enough about the EM algorithm to gaze at your code and > say "aha, error in line 332", but I'm not.  But if you clean up the > presentation and tighten up the most obvious things, you improve your > chances that somebody who is an expert will exert him/herself to do > it. > > pj > > > >                                                              b follows > > N(0,\psi)  #i.e. bivariate normal > > where b is the latent variable, Z and X are ni*2 design matrices, sigma > is > > the error variance, > > Y are longitudinal data, i.e. there are ni measurements for object i. > > Parameters are \beta, \sigma, \psi; call them \theta. > > > > I wrote a regular EM, the M step is to maximize the log(f(Y,b;\theta)) >  by > > taking first order derivatives, setting to 0 and solving the equation. > > The E step involves the evaluation of E step, using Gauss Hermite > > approximation. (10 points) > > > > All are simulated data. X and Z are naive like cbind(rep(1,m),1:m) > > After 200 iterations, the estimated \beta converges to the true value > while > > \sigma and \psi do not. Even after one iteration, the \sigma goes up from > > about 10^0 to about 10^1. > > I am confused since the \hat{\beta} requires \sigma and \psi from > previous > > iteration. If something wrong then all estimations should be incorrect... > > > > Another question is that I calculated the logf(Y;\theta) to see if it > > increases after updating \theta. > > Seems decreasing..... > > > > I thought the X and Z are linearly dependent would cause some issue but I > > also changed the X and Z to some random numbers from normal distribution. > > > > I also tried ECM, which converges fast but sigma and psi are not close to > > the desired values. > > Is this due to the limitation of some methods that I used? > > > > Can any one give some help? I am stuck for a week. I can send the code to > > you. > > First time to raise a question here. Not sure if it is proper to post all > > code. > > > ########################################################################## > > # the main R script > > n=100 > > beta=c(-0.5,1) > > vvar=2   #sigma^2=2 > > psi=matrix(c(1,0.2,0.2,1),2,2) > > b.m.true=mvrnorm(n=n,mu=c(0,0),Sigma=psi)  #100*2 matrix, each row is the > > b_i > > Xi=cbind(rnorm(7,mean=2,sd=0.5),log(2:8)) #Xi=cbind(rep(1,7),1:7) > > y.m=matrix(NA,nrow=n,ncol=nrow(Xi))   #100*7, each row is a y_i > > Zi=Xi > > > > b.list=as.list(data.frame(t(b.m.true))) > > psi.old=matrix(c(0.5,0.4,0.4,0.5),2,2)      #starting psi, beta and var, > not > > exactly the same as the true value > > beta.old=c(-0.3,0.7) > > var.old=1.7 > > > > gausspar=gauss.quad(10,kind="hermite",alpha=0,beta=0) > > > > data.list.wob=list() > > for (i in 1:n) > > { > > data.list.wob[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi) > > } > > > > #compute true loglikelihood and initial loglikelihood > > truelog=0 > > for (i in 1:length(data.list.wob)) > > { > > truelog=truelog+loglike(data.list.wob[[i]],vvar,beta,psi) > > } > > > > truelog > > > > loglikeseq=c() > > loglikeseq=sum(sapply(data.list.wob,loglike)) > > > > ECM=F > > > > > > for (m in 1:300) > > { > > > > Sig.hat=Zi%*%psi.old%*%t(Zi)+var.old*diag(nrow(Zi)) > > W.hat=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old > > > > Sigb=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old > > det(Sigb)^(-0.5) > > > > Y.minus.X.beta=t(t(y.m)-c(Xi%*%beta.old)) > > > miu.m=t(apply(Y.minus.X.beta,MARGIN=1,function(s,B=psi.old%*%t(Zi)%*%solve(Sig.hat)) > >                                                 { > >                                                 B%*%s > >                                                 } > >                 ))  ### each row is the miu_i > > > > > > tmp1=permutations(length(gausspar$nodes),2,repeats.allowed=T) > > tmp2=c(tmp1) > > a.mat=matrix(gausspar$nodes[tmp2],nrow=nrow(tmp1)) #a1,a1 > >                                                                    #a1,a2 > >                                                                    #... > > >  #a10,a9 > > >  #a10,a10 > > a.mat.list=as.list(data.frame(t(a.mat))) > > tmp1=permutations(length(gausspar$weights),2,repeats.allowed=T) > > tmp2=c(tmp1) > > weights.mat=matrix(gausspar$weights[tmp2],nrow=nrow(tmp1)) #w1,w1 > >                                                                    #w1,w2 > >                                                                    #... > > >  #w10,w9 > > >  #w10,w10 > > L=chol(solve(W.hat)) > > LL=sqrt(2)*solve(L) > > halfb=t(LL%*%t(a.mat)) > > > > # each page of b.array is all values of bi_k and bi_j for b_i > > b.list=list() > > for (i in 1:n) > > { > > b.list[[i]]=t(t(halfb)+miu.m[i,]) > > } > > > > #generate a list, each page contains Xi,yi,Zi, > > data.list=list() > > for (i in 1:n) > > { > > data.list[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi,b=b.list[[i]]) > > } > > > > #update sigma^2 > > t1=proc.time() > > tempaa=c() > > tempbb=c() > > for (j in 1:n) > > { > > > #tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) > > > tempbb[j]=Eh4newv2(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) > > > > } > > var.new=mean(tempbb) > > if (ECM==T){var.old=var.new} > > > > > sumXiXi=matrix(rowSums(sapply(data.list,function(s){t(s$Xi)%*%(s$Xi)})),ncol=ncol(Xi)) > > tempbb=c() > > for (j in 1:n) > > { > > tempbb=cbind(tempbb,Eh2new(data.list[[j]],weights.m=weights.mat)) > > } > > beta.new=solve(sumXiXi)%*%rowSums(tempbb) > > > > if (ECM==T){beta.old=beta.new} > > > > #update psi > > tempcc=array(NA,c(2,2,n)) > > sumtempcc=matrix(0,2,2) > > for (j in 1:n) > > { > > tempcc[,,j]=Eh1new(data.list[[j]],weights.m=weights.mat) > > sumtempcc=sumtempcc+tempcc[,,j] > > } > > psi.new=sumtempcc/n > > > > #stop > > > if(sum(abs(beta.old-beta.new))+sum(abs(psi.old-psi.new))+sum(abs(var.old-var.new))<0.01) > > {print("converge, stop");break;} > > > > #update > > var.old=var.new > > psi.old=psi.new > > beta.old=beta.new > > loglikeseq[m+1]=sum(sapply(data.list,loglike)) > > } # end of M loop > > > > ######################################################################## > > #function to calculate loglikelihood > > > loglike=function(datai=data.list[[i]],vvar=var.old,beta=beta.old,psi=psi.old) > > { > > > temp1=-0.5*log(det(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))) > > > temp2=-0.5*t(datai$yi-datai$Xi%*%beta)%*%solve(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))%*%(datai$yi-datai$Xi%*%beta) > > return(temp1+temp2) > > } > > > > ####################################################################### > > #functions to evaluate the conditional expection, saved as Efun v2.R > > #Eh1new=E(bibi') > > #Eh2new=E(X'(y-Zbi)) > > #Eh3new=E(bi'Z'Zbi) > > #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > > #Eh5new=E(Xibeta-yi)'Zibi > > #Eh6new=E(bi) > > > > Eh1new=function(datai=data.list[[i]], > >                  weights.m=weights.mat) > > { > > #one way > > #temp=matrix(0,2,2) > > #for (i in 1:nrow(bi)) > > #{ > > #temp=temp+bi[i,]%*%t(bi[i,])*weights.m[i,1]*weights.m[i,2] > > #} > > #print(temp) > > > > #the other way > > bi=datai$b > > tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need > > sqrt > > #deno=sum(weights.m[,1]*weights.m[,2]) > > > > return(t(tempb)%*%tempb/pi) > > } # end of Eh1 > > > > > > #Eh2new=E(X'(y-Zbi)) > > Eh2new=function(datai=data.list[[i]], > > weights.m=weights.mat) > > { > > #one way > > #temp=rep(0,2) > > #for (j in 1:nrow(bi)) > > #{ > > > #temp=temp+c(t(datai$Xi)%*%(datai$yi-datai$Zi%*%bi[j,])*weights.m[j,1]*weights.m[j,2]) > > #} > > #deno=sum(weights.m[,1]*weights.m[,2]) > > #print(temp/deno) > > > > #another way > > bi=datai$b > > tempb=bi*rep(weights.m[,1]*weights.m[,2],2) > > > > tt=t(datai$Xi)%*%datai$yi-t(datai$Xi)%*%datai$Zi%*%colSums(tempb)/pi > > return(c(tt)) > > } # end of Eh2 > > > > > > #Eh3new=E(b'Z'Zbi) > > Eh3new=function(datai=data.list[[i]], > > weights.m=weights.mat) > > { > > #one way > > #deno=sum(weights.m[,1]*weights.m[,2]) > > #tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so > need > > sqrt > > #sum(apply(datai$Zi%*%t(tempb),MARGIN=2,function(s){sum(s^2)}))/deno > > > > #another way > > bi=datai$b > > tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need > > sqrt > > > return(sum(sapply(as.list(data.frame(datai$Zi%*%t(tempb))),function(s){sum(s^2)}))/pi) > > }  # end of Eh3 > > > > #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > > Eh4new=function(datai=data.list[[i]], > >                  weights.m=weights.mat,beta=beta.old) > > { > > #one way > > #temp=0 > > #bi=datai$b > > #tt=c() > > #for (j in 1:nrow(bi)) > > #{ > > > #tt[j]=sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] > > > #temp=temp+sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] > > #} > > #temp/deno > > > > # another way > > bi=datai$b > > > > > tt=sapply(as.list(ata.frame(c(datai$yi-datai$Xi%*%beta)-datai$Zi%*%t(bi))), > > function(s){sum(s^2)})*(weights.m[,1]*weights.m[,2]) > > return(sum(tt)/pi) > > } # end of Eh4 > > > > > > Eh4newv2=function(datai=data.list[[i]], > > weights.m=weights.mat,beta=beta.old) > > { > > bi=datai$b > > v=weights.m[,1]*weights.m[,2] > > temp=c() > > for (i in 1:length(v)) > > { > > temp[i]=sum(((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[i,])*sqrt(v[i]))^2) > > } > > return(sum(temp)/pi) > > } # end of Eh4 > > > > > > #Eh5new=E(Xibeta-yi)'Zib > > Eh5new=function(datai=data.list[[i]], > > weights.m=weights.mat,beta=beta.old) > > { > > bi=datai$b > > tempb=bi*rep(weights.m[,1]*weights.m[,2],2) > > t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)) > > return(sum(t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)))/pi) > > } > > > > > > > > Eh6new=function(datai=data.list[[i]], > >                  weights.m=weights.mat) > > { > > bi=datai$b > > tempw=weights.m[,1]*weights.m[,2] > > for (i in 1:nrow(bi)) > > { > > bi[i,]=bi[i,]*tempw[i] > > } > > return(colMeans(bi)/pi) > > } # end of Eh1 > > > > > > > > > > > > > > > > > > -- > > View this message in context: > http://r.789695.n4.nabble.com/EM-algorithm-to-find-MLE-of-coeff-in-mixed-effects-model-tp4635315.html> > Sent from the R help mailing list archive at Nabble.com. > > > > ______________________________________________ > > [hidden email] mailing list > > https://stat.ethz.ch/mailman/listinfo/r-help> > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html> > and provide commented, minimal, self-contained, reproducible code. > > > > -- > Paul E. Johnson > Professor, Political Science Assoc. Director > 1541 Lilac Lane, Room 504 Center for Research Methods > University of Kansas University of Kansas > http://pj.freefaculty.org http://quant.ku.edu> [[alternative HTML version deleted]] ______________________________________________ [hidden email] mailing list https://stat.ethz.ch/mailman/listinfo/r-helpPLEASE do read the posting guide http://www.R-project.org/posting-guide.htmland provide commented, minimal, self-contained, reproducible code. Reply | Threaded Open this post in threaded view | ## Re: EM algorithm to find MLE of coeff in mixed effects model  I don't believe that R-help permits pdf files. A useful workaround is to post it to a file hosting site like MediaFire and post the link here. John Kane Kingston ON Canada > -----Original Message----- > From: [hidden email] > Sent: Wed, 4 Jul 2012 22:56:02 -0400 > To: [hidden email] > Subject: Re: [R] EM algorithm to find MLE of coeff in mixed effects model > > Dear Paul, > > Thank you for your suggestion. I was moved by the fact that people are so > nice to help learners and ask for nothing. > With your help, I made some changes to my code and add more comments, try > to make things more clear. > If this R email list allow me to upload a pdf file to illustrate the > formula, it will be great. > The reason I do not use lme4 is that later I plan to use this for a more > complicated model (Y,T), Y is the same response of mixed model and T is a > drop out process. > (Frankly I did it for the more complicated model first and got NaN > hessian > after one iteration, so I turned to the simple model.) > In the code, the m loop is the EM iterations. About efficiency, thank you > again for pointing it out. I compared sapply and for loop, they are tie > and > for loop is even better sometimes. > In a paper by Bates, he mentioned that the asymptotic properties of beta > is > kind of independent of the other two parameters. But it seems that paper > was submitted but I did not find it on google scholar. > Not sure if it is the reason for my results. That is why I hope some > expert > who have done some simulation similar may be willing to give some > suggestion. > I may turn to other methods to calculate the conditional expection of the > latent variable as the same time. > > Modified code is as below: > > # library(PerformanceAnalytics) > # install.packages("gregmisc") > # install.packages("statmod") > library(gregmisc) > library(MASS) > library(statmod) > > ## function to calculate loglikelihood > loglike <- function(datai = data.list[[i]], vvar = var.old, > beta = beta.old, psi = psi.old) { > temp1 <- -0.5 * log(det(vvar * diag(nrow(datai$Zi)) + datai$Zi %*% > psi %*% t(datai$Zi))) >     temp2 <- -0.5 * t(datai$yi - datai$Xi %*% beta) %*% solve(vvar * >         diag(nrow(datai$Zi)) + datai$Zi %*% psi %*% t(datai$Zi)) %*% > (datai$yi - datai$Xi %*% beta) > temp1 + temp2 > } > > ## functions to evaluate the conditional expection, saved as Efun v2.R > ## Eh1new=E(bibi') > ## Eh2new=E(X'(y-Zbi)) > ## Eh3new=E(bi'Z'Zbi) > ## Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > ## Eh5new=E(Xibeta-yi)'Zibi > ## Eh6new=E(bi) > > Eh1new <- function(datai = data.list[[i]], weights.m = weights.mat) { > bi <- datai$b >     tempb <- bi * rep(sqrt(weights.m[, 1] * weights.m[, 2]), >         2)  #quadratic b, so need    sqrt >     t(tempb) %*% tempb/pi > }  # end of Eh1 > > > # Eh2new=E(X'(y-Zbi)) > Eh2new <- function(datai = data.list[[i]], weights.m = weights.mat) { >     bi <- datai$b > tempb <- bi * rep(weights.m[, 1] * weights.m[, 2], 2) > tt <- t(datai$Xi) %*% datai$yi - t(datai$Xi) %*% datai$Zi %*% > colSums(tempb)/pi > c(tt) > } # end of Eh2 > > > # Eh3new=E(b'Z'Zbi) > Eh3new <- function(datai = data.list[[i]], weights.m = weights.mat) { > bi <- datai$b >     tempb <- bi * rep(sqrt(weights.m[, 1] * weights.m[, 2]), >         2)  #quadratic b, so need sqrt >     sum(sapply(as.list(data.frame(datai$Zi %*% t(tempb))), > function(s) { > sum(s^2) > }))/pi > } # end of Eh3 > > # Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > Eh4new <- function(datai = data.list[[i]], weights.m = weights.mat, > beta = beta.old) { > bi <- datai$b >     tt <- sapply(as.list(ata.frame(c(datai$yi - datai$Xi %*% >         beta) - datai$Zi %*% t(bi))), function(s) { > sum(s^2) > }) * (weights.m[, 1] * weights.m[, 2]) > sum(tt)/pi > } # end of Eh4 > > Eh4newv2 <- function(datai = data.list[[i]], weights.m = weights.mat, > beta = beta.old) { > bi <- datai$b >     v <- weights.m[, 1] * weights.m[, 2] >     temp <- c() >     for (i in 1:length(v)) { >         temp[i] <- sum(((datai$yi - datai$Xi %*% beta - datai$Zi %*% > bi[i, ]) * sqrt(v[i]))^2) > } > sum(temp)/pi > } # end of Eh4 > > # Eh5new=E(Xibeta-yi)'Zib > Eh5new <- function(datai = data.list[[i]], weights.m = weights.mat, > beta = beta.old) { > bi <- datai$b >     tempb <- bi * rep(weights.m[, 1] * weights.m[, 2], 2) >     t(datai$Xi %*% beta - datai$yi) %*% (datai$Zi %*% t(tempb)) > sum(t(datai$Xi %*% beta - datai$yi) %*% (datai$Zi %*% >         t(tempb)))/pi > } > > Eh6new <- function(datai = data.list[[i]], weights.m = weights.mat) { >     bi <- datai$b > tempw <- weights.m[, 1] * weights.m[, 2] > for (i in 1:nrow(bi)) { > bi[i, ] <- bi[i, ] * tempw[i] > } > colMeans(bi)/pi > } # end of Eh6 > > ## the main R script > ################### initial the values and generate the data > ################## > n <- 100 #number of > observations > beta <- c(-0.5, 1) #true coefficient of > fixed > effects > vvar <- 2 #sigma^2=2 #true error > variance > of epsilon > psi <- matrix(c(1, 0.2, 0.2, 1), 2, 2) #covariance matrix > of random effects b > b.m.true <- mvrnorm(n = n, mu = c(0, 0), Sigma = psi) #100*2 matrix, > each > row is the b_i > Xi <- cbind(rnorm(7, mean = 2, sd = 0.5), log(2:8)) > Zi <- Xi > y.m <- matrix(NA, nrow = n, ncol = nrow(Xi)) #100*7, each row > is > a y_i Zi=Xi > > for( i in 1:n) > { > y.m[i,]=mvrnorm(1,mu=(Xi%*%beta+Zi%*%b.m.true[i,]),vvar*diag(nrow(Xi))) > } > b.list <- as.list(data.frame(t(b.m.true))) > psi.old <- matrix(c(0.5, 0.4, 0.4, 0.5), 2, 2) #starting psi, > beta > and var,not the true values > beta.old <- c(-0.3, 0.7) > var.old <- 1.7 > > gausspar <- gauss.quad(10, kind = "hermite", alpha = 0, beta = 0) > > # data.list.wob contains X,Y and Z, but no b's > data.list.wob <- list() > for (i in 1:n) { > data.list.wob[[i]] <- list(Xi = Xi, yi = y.m[i, ], Zi = Zi) > } > > # compute true loglikelihood and initial loglikelihood > truelog <- 0 > for (i in 1:length(data.list.wob)) { > truelog <- truelog + loglike(data.list.wob[[i]], vvar, beta, psi) > } > truelog > > loglikeseq <- c() > loglikeseq <- sum(sapply(data.list.wob, loglike)) > > ECM <- F > > ############################# E-M steps > ################################################ > # m loop is the EM iteration > for (m in 1:300) { > Sig.hat <- Zi %*% psi.old %*% t(Zi) + var.old * diag(nrow(Zi)) > #estimated covariance matrix of y > Sig.hat.inv <- solve(Sig.hat) > W.hat <- psi.old - psi.old %*% t(Zi) %*% Sig.hat.inv %*% > Zi %*% psi.old > Sigb <- psi.old - psi.old %*% t(Zi) %*% Sig.hat.inv %*% Zi %*% > psi.old > #estimated covariance matrix of b > > Y.minus.X.beta <- t(t(y.m) - c(Xi %*% beta.old)) > miu.m <- t(apply(Y.minus.X.beta, MARGIN = 1, function(s, > B = psi.old %*% t(Zi) %*% solve(Sig.hat)) { > B %*% s > })) ### each row is the miu_i > > tmp1 <- permutations(length(gausspar$nodes), 2, repeats.allowed = T) >     tmp2 <- c(tmp1) >     a.mat <- matrix(gausspar$nodes[tmp2], nrow = nrow(tmp1)) # a1,a1 > # a1,a2 > # ... > # a10,a9 > # a10,a10 > # a.mat are all ordered pairs of gauss hermite nodes > a.mat.list <- as.list(data.frame(t(a.mat))) > tmp1 <- permutations(length(gausspar$weights), 2, repeats.allowed = > T) >     tmp2 <- c(tmp1) >     weights.mat <- matrix(gausspar$weights[tmp2], nrow = nrow(tmp1)) # > w1,w1 > # w1,w2 > # ... > # w10,w9 > # w10,w10 > # weights.mat are all ordered pairs of gauss hermite weights > L <- chol(solve(W.hat)) > LL <- sqrt(2) * solve(L) > halfb <- t(LL %*% t(a.mat)) > # each page of b.array is all values of bi_k and bi_j for b_i > # need to calculate those b's as linear functions of nodes since the > original integral needs transformation to use gauss approximation > b.list <- list() > for (i in 1:n) { > b.list[[i]] <- t(t(halfb) + miu.m[i, ]) > } > > # generate a list, each page contains Xi,yi,Zi, and b's > data.list <- list() > for (i in 1:n) { > data.list[[i]] <- list(Xi = Xi, yi = y.m[i, ], Zi = Zi, > b = b.list[[i]]) > } > > # update sigma^2 > t1 <- proc.time() > tempaa <- c() > tempbb <- c() > for (j in 1:n) { > tempbb[j] <- Eh4newv2(datai = data.list[[j]], weights.m = > weights.mat, beta = beta.old) > } > var.new <- mean(tempbb) > if (ECM == T) { > var.old <- var.new > } > > sumXiXi <- matrix(rowSums(sapply(data.list, function(s) { > t(s$Xi) %*% (s$Xi) > })), ncol = ncol(Xi)) > tempbb=sapply(data.list,Eh2new) > beta.new <- solve(sumXiXi) %*% rowSums(tempbb) > > if (ECM == T) { > beta.old <- beta.new > } > > # update psi > tempcc <- array(NA, c(2, 2, n)) > sumtempcc <- matrix(0, 2, 2) > for (j in 1:n) { > tempcc[, , j] <- Eh1new(data.list[[j]], weights.m = weights.mat) > sumtempcc <- sumtempcc + tempcc[, , j] > } > psi.new <- sumtempcc/n > > # stop condition > if (sum(abs(beta.old - beta.new)) + sum(abs(psi.old - psi.new)) + > sum(abs(var.old - var.new)) < 0.01) { > print("converge, stop") > break > } > > # update parameters > var.old <- var.new > psi.old <- psi.new > beta.old <- beta.new > loglikeseq[m + 1] <- sum(sapply(data.list, loglike)) > } # end of M loop > > > On Tue, Jul 3, 2012 at 5:06 PM, Paul Johnson <[hidden email]> > wrote: > >> On Tue, Jul 3, 2012 at 12:41 PM, jimmycloud <[hidden email]> >> wrote: >>> I have a general question about coefficients estimation of the mixed >> model. >>> >> >> I have 2 ideas for you. >> >> 1. Fit with lme4 package, using the lmer function. That's what it is >> for. >> >> 2. If you really want to write your own EM algorithm, I don't feel >> sure that very many R and EM experts are going to want to read through >> the code you have because you don't follow some of the minimal >> readability guidelines. I accept the fact that there is no officially >> mandated R style guide, except for "indent with 4 spaces, not tabs." >> But there are some almost universally accepted basics like >> >> 1. Use <-, not =, for assignment >> 2. put a space before and after symbols like <-, = , + , * , and so >> forth. >> 3. I'd suggest you get used to putting squiggly braces in the K&R style. >> >> I have found the formatR package's tool tidy.source can do this >> nicely. From tidy.source, here's what I get with your code >> >> http://pj.freefaculty.org/R/em2.R>> >> Much more readable! >> (I inserted library(MASS) for you at the top, otherwise this doesn't >> even survive the syntax check.) >> >> When I copy from email to Emacs, some line-breaking monkey-business >> occurs, but I expect you get the idea. >> >> Now, looking at your cleaned up code, I can see some things to tidy up >> to improve the chances that some expert will look. >> >> First, R functions don't require "return" at the end, many experts >> consider it distracting. (Run "lm" or such and review how they write. >> No return at the end of functions). >> >> Second, about that big for loop at the top, the one that goes from m >> 1:300 >> >> I don't know what that loop is doing, but there's some code in it that >> I'm suspicious of. For example, this part: >> >> W.hat <- psi.old - psi.old %*% t(Zi) %*% solve(Sig.hat) %*% Zi %*% >> psi.old >> >> Sigb <- psi.old - psi.old %*% t(Zi) %*% solve(Sig.hat) %*% Zi %*% >> psi.old >> >> >> First, you've caused the possibly slow calculation of solve >> (Sig.hat) to run two times. If you really need it, run it once and >> save the result. >> >> >> Second, a for loop is not necessarily slow, but it may be easier to >> read if you re-write this: >> >> for (j in 1:n) { >> tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) >> tempbb[j] <- Eh4newv2(datai = data.list[[j]], weights.m = >> weights.mat, beta = beta.old) >> } >> >> like this: >> >> tempaa <- lapply(data.list, Eh4new, weights.m, beta.old) >> tempbb <- lapply(data.list, Eh4newv2, weights.m, beta.old) >> >> Third, here is a no-no >> >> tempbb <- c() >> for (j in 1:n) { >> tempbb <- cbind(tempbb, Eh2new(data.list[[j]], weights.m = >> weights.mat)) >> } >> >> That will call cbind over and over, causing a relatively slow memory >> re-allocation. See >> (http://pj.freefaculty.org/R/WorkingExamples/stackListItems.R) >> >> Instead, do this to apply Eh2new to each item in data.list >> >> tempbbtemp <- lapply(data.list, Eh2new, weights.mat) >> >> and then smash the results together in one go >> >> tempbb <- do.call("cbind", tempbbtemp) >> >> >> Fourth, I'm not sure on the matrix algebra. Are you sure you need >> solve to get the full inverse of Sig.hat? Once you start checking >> into how estimates are calculated in R, you find that the >> paper-and-pencil matrix algebra style of formula is generally frowned >> upon. OLS (in lm.fit) doesn't do solve(X'X), it uses a QR >> decomposition of matrices. OR look in MASS package ridge regression >> code, where the SVD is used to get the inverse. >> >> I wish I knew enough about the EM algorithm to gaze at your code and >> say "aha, error in line 332", but I'm not. But if you clean up the >> presentation and tighten up the most obvious things, you improve your >> chances that somebody who is an expert will exert him/herself to do >> it. >> >> pj >> >> >>> b follows >>> N(0,\psi) #i.e. bivariate normal >>> where b is the latent variable, Z and X are ni*2 design matrices, sigma >> is >>> the error variance, >>> Y are longitudinal data, i.e. there are ni measurements for object i. >>> Parameters are \beta, \sigma, \psi; call them \theta. >>> >>> I wrote a regular EM, the M step is to maximize the log(f(Y,b;\theta)) >> by >>> taking first order derivatives, setting to 0 and solving the equation. >>> The E step involves the evaluation of E step, using Gauss Hermite >>> approximation. (10 points) >>> >>> All are simulated data. X and Z are naive like cbind(rep(1,m),1:m) >>> After 200 iterations, the estimated \beta converges to the true value >> while >>> \sigma and \psi do not. Even after one iteration, the \sigma goes up >>> from >>> about 10^0 to about 10^1. >>> I am confused since the \hat{\beta} requires \sigma and \psi from >> previous >>> iteration. If something wrong then all estimations should be >>> incorrect... >>> >>> Another question is that I calculated the logf(Y;\theta) to see if it >>> increases after updating \theta. >>> Seems decreasing..... >>> >>> I thought the X and Z are linearly dependent would cause some issue but >>> I >>> also changed the X and Z to some random numbers from normal >>> distribution. >>> >>> I also tried ECM, which converges fast but sigma and psi are not close >>> to >>> the desired values. >>> Is this due to the limitation of some methods that I used? >>> >>> Can any one give some help? I am stuck for a week. I can send the code >>> to >>> you. >>> First time to raise a question here. Not sure if it is proper to post >>> all >>> code. >>> >> ########################################################################## >>> # the main R script >>> n=100 >>> beta=c(-0.5,1) >>> vvar=2 #sigma^2=2 >>> psi=matrix(c(1,0.2,0.2,1),2,2) >>> b.m.true=mvrnorm(n=n,mu=c(0,0),Sigma=psi) #100*2 matrix, each row is >>> the >>> b_i >>> Xi=cbind(rnorm(7,mean=2,sd=0.5),log(2:8)) #Xi=cbind(rep(1,7),1:7) >>> y.m=matrix(NA,nrow=n,ncol=nrow(Xi)) #100*7, each row is a y_i >>> Zi=Xi >>> >>> b.list=as.list(data.frame(t(b.m.true))) >>> psi.old=matrix(c(0.5,0.4,0.4,0.5),2,2) #starting psi, beta and >>> var, >> not >>> exactly the same as the true value >>> beta.old=c(-0.3,0.7) >>> var.old=1.7 >>> >>> gausspar=gauss.quad(10,kind="hermite",alpha=0,beta=0) >>> >>> data.list.wob=list() >>> for (i in 1:n) >>> { >>> data.list.wob[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi) >>> } >>> >>> #compute true loglikelihood and initial loglikelihood >>> truelog=0 >>> for (i in 1:length(data.list.wob)) >>> { >>> truelog=truelog+loglike(data.list.wob[[i]],vvar,beta,psi) >>> } >>> >>> truelog >>> >>> loglikeseq=c() >>> loglikeseq=sum(sapply(data.list.wob,loglike)) >>> >>> ECM=F >>> >>> >>> for (m in 1:300) >>> { >>> >>> Sig.hat=Zi%*%psi.old%*%t(Zi)+var.old*diag(nrow(Zi)) >>> W.hat=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old >>> >>> Sigb=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old >>> det(Sigb)^(-0.5) >>> >>> Y.minus.X.beta=t(t(y.m)-c(Xi%*%beta.old)) >>> >> miu.m=t(apply(Y.minus.X.beta,MARGIN=1,function(s,B=psi.old%*%t(Zi)%*%solve(Sig.hat)) >>> { >>> B%*%s >>> } >>> )) ### each row is the miu_i >>> >>> >>> tmp1=permutations(length(gausspar$nodes),2,repeats.allowed=T) >>> tmp2=c(tmp1) >>> a.mat=matrix(gausspar$nodes[tmp2],nrow=nrow(tmp1)) #a1,a1 >>> >>> #a1,a2 >>> #... >>> >> #a10,a9 >>> >> #a10,a10 >>> a.mat.list=as.list(data.frame(t(a.mat))) >>> tmp1=permutations(length(gausspar$weights),2,repeats.allowed=T) >>> tmp2=c(tmp1) >>> weights.mat=matrix(gausspar$weights[tmp2],nrow=nrow(tmp1)) #w1,w1 >>> >>> #w1,w2 >>> #... >>> >> #w10,w9 >>> >> #w10,w10 >>> L=chol(solve(W.hat)) >>> LL=sqrt(2)*solve(L) >>> halfb=t(LL%*%t(a.mat)) >>> >>> # each page of b.array is all values of bi_k and bi_j for b_i >>> b.list=list() >>> for (i in 1:n) >>> { >>> b.list[[i]]=t(t(halfb)+miu.m[i,]) >>> } >>> >>> #generate a list, each page contains Xi,yi,Zi, >>> data.list=list() >>> for (i in 1:n) >>> { >>> data.list[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi,b=b.list[[i]]) >>> } >>> >>> #update sigma^2 >>> t1=proc.time() >>> tempaa=c() >>> tempbb=c() >>> for (j in 1:n) >>> { >>> >> #tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) >>> >> tempbb[j]=Eh4newv2(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) >>> >>> } >>> var.new=mean(tempbb) >>> if (ECM==T){var.old=var.new} >>> >>> >> sumXiXi=matrix(rowSums(sapply(data.list,function(s){t(s$Xi)%*%(s$Xi)})),ncol=ncol(Xi)) >>> tempbb=c() >>> for (j in 1:n) >>> { >>> tempbb=cbind(tempbb,Eh2new(data.list[[j]],weights.m=weights.mat)) >>> } >>> beta.new=solve(sumXiXi)%*%rowSums(tempbb) >>> >>> if (ECM==T){beta.old=beta.new} >>> >>> #update psi >>> tempcc=array(NA,c(2,2,n)) >>> sumtempcc=matrix(0,2,2) >>> for (j in 1:n) >>> { >>> tempcc[,,j]=Eh1new(data.list[[j]],weights.m=weights.mat) >>> sumtempcc=sumtempcc+tempcc[,,j] >>> } >>> psi.new=sumtempcc/n >>> >>> #stop >>> >> if(sum(abs(beta.old-beta.new))+sum(abs(psi.old-psi.new))+sum(abs(var.old-var.new))<0.01) >>> {print("converge, stop");break;} >>> >>> #update >>> var.old=var.new >>> psi.old=psi.new >>> beta.old=beta.new >>> loglikeseq[m+1]=sum(sapply(data.list,loglike)) >>> } # end of M loop >>> >>> ######################################################################## >>> #function to calculate loglikelihood >>> >> loglike=function(datai=data.list[[i]],vvar=var.old,beta=beta.old,psi=psi.old) >>> { >>> >> temp1=-0.5*log(det(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))) >>> >> temp2=-0.5*t(datai$yi-datai$Xi%*%beta)%*%solve(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))%*%(datai$yi-datai$Xi%*%beta) >>> return(temp1+temp2) >>> } >>> >>> ####################################################################### >>> #functions to evaluate the conditional expection, saved as Efun v2.R >>> #Eh1new=E(bibi') >>> #Eh2new=E(X'(y-Zbi)) >>> #Eh3new=E(bi'Z'Zbi) >>> #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) >>> #Eh5new=E(Xibeta-yi)'Zibi >>> #Eh6new=E(bi) >>> >>> Eh1new=function(datai=data.list[[i]], >>> weights.m=weights.mat) >>> { >>> #one way >>> #temp=matrix(0,2,2) >>> #for (i in 1:nrow(bi)) >>> #{ >>> #temp=temp+bi[i,]%*%t(bi[i,])*weights.m[i,1]*weights.m[i,2] >>> #} >>> #print(temp) >>> >>> #the other way >>> bi=datai$b >>> tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2)   #quadratic b, so >>> need >>> sqrt >>> #deno=sum(weights.m[,1]*weights.m[,2]) >>> >>> return(t(tempb)%*%tempb/pi) >>> } # end of Eh1 >>> >>> >>> #Eh2new=E(X'(y-Zbi)) >>> Eh2new=function(datai=data.list[[i]], >>>                  weights.m=weights.mat) >>> { >>> #one way >>> #temp=rep(0,2) >>> #for (j in 1:nrow(bi)) >>> #{ >>> >> #temp=temp+c(t(datai$Xi)%*%(datai$yi-datai$Zi%*%bi[j,])*weights.m[j,1]*weights.m[j,2]) >>> #} >>> #deno=sum(weights.m[,1]*weights.m[,2]) >>> #print(temp/deno) >>> >>> #another way >>> bi=datai$b >>> tempb=bi*rep(weights.m[,1]*weights.m[,2],2) >>> >>> tt=t(datai$Xi)%*%datai$yi-t(datai$Xi)%*%datai$Zi%*%colSums(tempb)/pi >>> return(c(tt)) >>> } # end of Eh2 >>> >>> >>> #Eh3new=E(b'Z'Zbi) >>> Eh3new=function(datai=data.list[[i]], >>>                  weights.m=weights.mat) >>> { >>> #one way >>> #deno=sum(weights.m[,1]*weights.m[,2]) >>> #tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2)   #quadratic b, so >> need >>> sqrt >>> #sum(apply(datai$Zi%*%t(tempb),MARGIN=2,function(s){sum(s^2)}))/deno >>> >>> #another way >>> bi=datai$b >>> tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2)   #quadratic b, so >>> need >>> sqrt >>> >> return(sum(sapply(as.list(data.frame(datai$Zi%*%t(tempb))),function(s){sum(s^2)}))/pi) >>> } # end of Eh3 >>> >>> #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) >>> Eh4new=function(datai=data.list[[i]], >>> weights.m=weights.mat,beta=beta.old) >>> { >>> #one way >>> #temp=0 >>> #bi=datai$b >>> #tt=c() >>> #for (j in 1:nrow(bi)) >>> #{ >>> >> #tt[j]=sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] >>> >> #temp=temp+sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] >>> #} >>> #temp/deno >>> >>> # another way >>> bi=datai$b >>> >>> >> tt=sapply(as.list(ata.frame(c(datai$yi-datai$Xi%*%beta)-datai$Zi%*%t(bi))), >>>             function(s){sum(s^2)})*(weights.m[,1]*weights.m[,2]) >>> return(sum(tt)/pi) >>> } # end of Eh4 >>> >>> >>> Eh4newv2=function(datai=data.list[[i]], >>>                  weights.m=weights.mat,beta=beta.old) >>> { >>> bi=datai$b >>> v=weights.m[,1]*weights.m[,2] >>> temp=c() >>> for (i in 1:length(v)) >>> { >>> temp[i]=sum(((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[i,])*sqrt(v[i]))^2) >>> } >>> return(sum(temp)/pi) >>> } # end of Eh4 >>> >>> >>> #Eh5new=E(Xibeta-yi)'Zib >>> Eh5new=function(datai=data.list[[i]], >>>                  weights.m=weights.mat,beta=beta.old) >>> { >>> bi=datai$b >>> tempb=bi*rep(weights.m[,1]*weights.m[,2],2) >>> t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)) >>> return(sum(t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)))/pi) >>> } >>> >>> >>> >>> Eh6new=function(datai=data.list[[i]], >>> weights.m=weights.mat) >>> { >>> bi=datai$b >>> tempw=weights.m[,1]*weights.m[,2] >>> for (i in 1:nrow(bi)) >>> { >>> bi[i,]=bi[i,]*tempw[i] >>> } >>> return(colMeans(bi)/pi) >>> } # end of Eh1 >>> >>> >>> >>> >>> >>> >>> >>> >>> -- >>> View this message in context: >> http://r.789695.n4.nabble.com/EM-algorithm-to-find-MLE-of-coeff-in-mixed-effects-model-tp4635315.html>>> Sent from the R help mailing list archive at Nabble.com. >>> >>> ______________________________________________ >>> [hidden email] mailing list >>> https://stat.ethz.ch/mailman/listinfo/r-help>>> PLEASE do read the posting guide >> http://www.R-project.org/posting-guide.html>>> and provide commented, minimal, self-contained, reproducible code. >> >> >> >> -- >> Paul E. Johnson >> Professor, Political Science    Assoc. Director >> 1541 Lilac Lane, Room 504     Center for Research Methods >> University of Kansas               University of Kansas >> http://pj.freefaculty.org            http://quant.ku.edu>> > > [[alternative HTML version deleted]] > > ______________________________________________ > [hidden email] mailing list > https://stat.ethz.ch/mailman/listinfo/r-help> PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html> and provide commented, minimal, self-contained, reproducible code. ____________________________________________________________ FREE ONLINE PHOTOSHARING - Share your photos online with your friends and family! Visit http://www.inbox.com/photosharing to find out more! ______________________________________________ [hidden email] mailing list https://stat.ethz.ch/mailman/listinfo/r-helpPLEASE do read the posting guide http://www.R-project.org/posting-guide.htmland provide commented, minimal, self-contained, reproducible code.
 Seems that I found the problem. The degree of freedom was missing as a denominator... Thank you for your help. Best wishes, Jie On Thu, Jul 5, 2012 at 9:28 AM, John Kane <[hidden email]> wrote: > I don't believe that R-help permits pdf files.  A useful workaround is to > post it to a file hosting site like MediaFire and post the link here. > John Kane > Kingston ON Canada > > > > -----Original Message----- > > From: [hidden email] > > Sent: Wed, 4 Jul 2012 22:56:02 -0400 > > To: [hidden email] > > Subject: Re: [R] EM algorithm to find MLE of coeff in mixed effects model > > > > Dear Paul, > > > > Thank you for your suggestion. I was moved by the fact that people are so > > nice to help learners and ask for nothing. > > With your help, I made some changes to my code and add more comments, try > > to make things more clear. > > If this R email list allow me to upload a pdf file to illustrate the > > formula, it will be great. > > The reason I do not use lme4 is that later I plan to use this for a more > > complicated model (Y,T), Y is the same response of mixed model and T is a > > drop out process. > > (Frankly I did it for the more complicated model first and got NaN > > hessian > > after one iteration, so I turned to the simple model.) > > In the code, the m loop is the EM iterations. About efficiency, thank you > > again for pointing it out. I compared sapply and for loop, they are tie > > and > > for loop is even better sometimes. > > In a paper by Bates, he mentioned that the asymptotic properties of beta > > is > > kind of independent of the other two parameters. But it seems that paper > > was submitted but I did not find it on google scholar. > > Not sure if it is the reason for my results. That is why I hope some > > expert > > who have done some simulation similar may be willing to give some > > suggestion. > > I may turn to other methods to calculate the conditional expection of the > > latent variable as the same time. > > > > Modified code is as below: > > > > # library(PerformanceAnalytics) > > # install.packages("gregmisc") > > # install.packages("statmod") > > library(gregmisc) > > library(MASS) > > library(statmod) > > > > ## function to calculate loglikelihood > > loglike <- function(datai = data.list[[i]], vvar = var.old, > >     beta = beta.old, psi = psi.old) { > >     temp1 <- -0.5 * log(det(vvar * diag(nrow(datai$Zi)) + datai$Zi %*% > >         psi %*% t(datai$Zi))) > > temp2 <- -0.5 * t(datai$yi - datai$Xi %*% beta) %*% solve(vvar * > > diag(nrow(datai$Zi)) + datai$Zi %*% psi %*% t(datai$Zi)) %*% > >         (datai$yi - datai$Xi %*% beta) > >     temp1 + temp2 > > } > > > > ## functions to evaluate the conditional expection, saved as Efun v2.R > > ## Eh1new=E(bibi') > > ## Eh2new=E(X'(y-Zbi)) > > ## Eh3new=E(bi'Z'Zbi) > > ## Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > > ## Eh5new=E(Xibeta-yi)'Zibi > > ## Eh6new=E(bi) > > > > Eh1new <- function(datai = data.list[[i]], weights.m = weights.mat) { > >     bi <- datai$b > > tempb <- bi * rep(sqrt(weights.m[, 1] * weights.m[, 2]), > > 2) #quadratic b, so need sqrt > > t(tempb) %*% tempb/pi > > } # end of Eh1 > > > > > > # Eh2new=E(X'(y-Zbi)) > > Eh2new <- function(datai = data.list[[i]], weights.m = weights.mat) { > > bi <- datai$b > >     tempb <- bi * rep(weights.m[, 1] * weights.m[, 2], 2) > >     tt <- t(datai$Xi) %*% datai$yi - t(datai$Xi) %*% datai$Zi %*% > >         colSums(tempb)/pi > >     c(tt) > > }  # end of Eh2 > > > > > > # Eh3new=E(b'Z'Zbi) > > Eh3new <- function(datai = data.list[[i]], weights.m = weights.mat) { > >     bi <- datai$b > > tempb <- bi * rep(sqrt(weights.m[, 1] * weights.m[, 2]), > > 2) #quadratic b, so need sqrt > > sum(sapply(as.list(data.frame(datai$Zi %*% t(tempb))), > >         function(s) { > >             sum(s^2) > >         }))/pi > > }  # end of Eh3 > > > > # Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > > Eh4new <- function(datai = data.list[[i]], weights.m = weights.mat, > >     beta = beta.old) { > >     bi <- datai$b > > tt <- sapply(as.list(ata.frame(c(datai$yi - datai$Xi %*% > > beta) - datai$Zi %*% t(bi))), function(s) { > >         sum(s^2) > >     }) * (weights.m[, 1] * weights.m[, 2]) > >     sum(tt)/pi > > }  # end of Eh4 > > > > Eh4newv2 <- function(datai = data.list[[i]], weights.m = weights.mat, > >     beta = beta.old) { > >     bi <- datai$b > > v <- weights.m[, 1] * weights.m[, 2] > > temp <- c() > > for (i in 1:length(v)) { > > temp[i] <- sum(((datai$yi - datai$Xi %*% beta - datai$Zi %*% > >             bi[i, ]) * sqrt(v[i]))^2) > >     } > >     sum(temp)/pi > > }  # end of Eh4 > > > > # Eh5new=E(Xibeta-yi)'Zib > > Eh5new <- function(datai = data.list[[i]], weights.m = weights.mat, > >     beta = beta.old) { > >     bi <- datai$b > > tempb <- bi * rep(weights.m[, 1] * weights.m[, 2], 2) > > t(datai$Xi %*% beta - datai$yi) %*% (datai$Zi %*% t(tempb)) > >     sum(t(datai$Xi %*% beta - datai$yi) %*% (datai$Zi %*% > > t(tempb)))/pi > > } > > > > Eh6new <- function(datai = data.list[[i]], weights.m = weights.mat) { > > bi <- datai$b > >     tempw <- weights.m[, 1] * weights.m[, 2] > >     for (i in 1:nrow(bi)) { > >         bi[i, ] <- bi[i, ] * tempw[i] > >     } > >     colMeans(bi)/pi > > }  # end of Eh6 > > > > ## the main R script > > ################### initial the values and generate the data > > ################## > > n <- 100                                               #number of > > observations > > beta <- c(-0.5, 1)                               #true coefficient of > > fixed > > effects > > vvar <- 2  #sigma^2=2                                  #true error > > variance > > of epsilon > > psi <- matrix(c(1, 0.2, 0.2, 1), 2, 2)                 #covariance matrix > > of random effects b > > b.m.true <- mvrnorm(n = n, mu = c(0, 0), Sigma = psi)  #100*2 matrix, > > each > > row is the b_i > > Xi <- cbind(rnorm(7, mean = 2, sd = 0.5), log(2:8)) > > Zi <- Xi > > y.m <- matrix(NA, nrow = n, ncol = nrow(Xi))            #100*7, each row > > is > > a y_i Zi=Xi > > > > for( i in 1:n) > > { > > y.m[i,]=mvrnorm(1,mu=(Xi%*%beta+Zi%*%b.m.true[i,]),vvar*diag(nrow(Xi))) > > } > > b.list <- as.list(data.frame(t(b.m.true))) > > psi.old <- matrix(c(0.5, 0.4, 0.4, 0.5), 2, 2)          #starting psi, > > beta > > and var,not the true values > > beta.old <- c(-0.3, 0.7) > > var.old <- 1.7 > > > > gausspar <- gauss.quad(10, kind = "hermite", alpha = 0, beta = 0) > > > > # data.list.wob contains X,Y and Z, but no b's > > data.list.wob <- list() > > for (i in 1:n) { > >     data.list.wob[[i]] <- list(Xi = Xi, yi = y.m[i, ], Zi = Zi) > > } > > > > # compute true loglikelihood and initial loglikelihood > > truelog <- 0 > > for (i in 1:length(data.list.wob)) { > >     truelog <- truelog + loglike(data.list.wob[[i]], vvar, beta, psi) > > } > > truelog > > > > loglikeseq <- c() > > loglikeseq <- sum(sapply(data.list.wob, loglike)) > > > > ECM <- F > > > > ############################# E-M steps > > ################################################ > > # m loop is the EM iteration > > for (m in 1:300) { > >     Sig.hat <- Zi %*% psi.old %*% t(Zi) + var.old * diag(nrow(Zi)) > > #estimated covariance matrix of y > >     Sig.hat.inv <- solve(Sig.hat) > >     W.hat <- psi.old - psi.old %*% t(Zi) %*% Sig.hat.inv %*% > >         Zi %*% psi.old > >     Sigb <- psi.old - psi.old %*% t(Zi) %*% Sig.hat.inv %*% Zi %*% > > psi.old > >  #estimated covariance matrix of b > > > >     Y.minus.X.beta <- t(t(y.m) - c(Xi %*% beta.old)) > >     miu.m <- t(apply(Y.minus.X.beta, MARGIN = 1, function(s, > >         B = psi.old %*% t(Zi) %*% solve(Sig.hat)) { > >         B %*% s > >     }))  ### each row is the miu_i > > > >     tmp1 <- permutations(length(gausspar$nodes), 2, repeats.allowed = T) > > tmp2 <- c(tmp1) > > a.mat <- matrix(gausspar$nodes[tmp2], nrow = nrow(tmp1))  # a1,a1 > >      # a1,a2 > >   # ... > >   # a10,a9 > >   # a10,a10 > >   # a.mat are all ordered pairs of gauss hermite nodes > >     a.mat.list <- as.list(data.frame(t(a.mat))) > >     tmp1 <- permutations(length(gausspar$weights), 2, repeats.allowed = > > T) > > tmp2 <- c(tmp1) > > weights.mat <- matrix(gausspar$weights[tmp2], nrow = nrow(tmp1))  # > > w1,w1 > >        # w1,w2 > >       # ... > >       # w10,w9 > >       # w10,w10 > >       # weights.mat are all ordered pairs of gauss hermite weights > >     L <- chol(solve(W.hat)) > >     LL <- sqrt(2) * solve(L) > >     halfb <- t(LL %*% t(a.mat)) > >     # each page of b.array is all values of bi_k and bi_j for b_i > >     # need to calculate those b's as linear functions of nodes since the > > original integral needs transformation to use gauss approximation > >     b.list <- list() > >     for (i in 1:n) { > >         b.list[[i]] <- t(t(halfb) + miu.m[i, ]) > >     } > > > >     # generate a list, each page contains Xi,yi,Zi, and b's > >     data.list <- list() > >     for (i in 1:n) { > >         data.list[[i]] <- list(Xi = Xi, yi = y.m[i, ], Zi = Zi, > >             b = b.list[[i]]) > >     } > > > >     # update sigma^2 > >     t1 <- proc.time() > >     tempaa <- c() > >     tempbb <- c() > >     for (j in 1:n) { > >         tempbb[j] <- Eh4newv2(datai = data.list[[j]], weights.m = > > weights.mat, beta = beta.old) > >     } > >     var.new <- mean(tempbb) > >     if (ECM == T) { > >         var.old <- var.new > >     } > > > >     sumXiXi <- matrix(rowSums(sapply(data.list, function(s) { > >         t(s$Xi) %*% (s$Xi) > >     })), ncol = ncol(Xi)) > >     tempbb=sapply(data.list,Eh2new) > >     beta.new <- solve(sumXiXi) %*% rowSums(tempbb) > > > >     if (ECM == T) { > >         beta.old <- beta.new > >     } > > > >     # update psi > >     tempcc <- array(NA, c(2, 2, n)) > >     sumtempcc <- matrix(0, 2, 2) > >     for (j in 1:n) { > >         tempcc[, , j] <- Eh1new(data.list[[j]], weights.m = weights.mat) > >         sumtempcc <- sumtempcc + tempcc[, , j] > >     } > >     psi.new <- sumtempcc/n > > > >     # stop condition > >     if (sum(abs(beta.old - beta.new)) + sum(abs(psi.old - psi.new)) + > >         sum(abs(var.old - var.new)) < 0.01) { > >         print("converge, stop") > >         break > >     } > > > >     # update parameters > >     var.old <- var.new > >     psi.old <- psi.new > >     beta.old <- beta.new > >     loglikeseq[m + 1] <- sum(sapply(data.list, loglike)) > > }  # end of M loop > > > > > > On Tue, Jul 3, 2012 at 5:06 PM, Paul Johnson <[hidden email]> > > wrote: > > > >> On Tue, Jul 3, 2012 at 12:41 PM, jimmycloud <[hidden email]> > >> wrote: > >>> I have a general question about coefficients estimation of the mixed > >> model. > >>> > >> > >> I have 2 ideas for you. > >> > >> 1. Fit with lme4 package, using the lmer function. That's what it is > >> for. > >> > >> 2. If you really want to write your own EM algorithm, I don't feel > >> sure that very many R and EM experts are going to want to read through > >> the code you have because you don't follow some of the minimal > >> readability guidelines.  I accept the fact that there is no officially > >> mandated R style guide, except for "indent with 4 spaces, not tabs." > >> But there are some almost universally accepted basics like > >> > >> 1. Use <-, not =, for assignment > >> 2. put a space before and  after symbols like <-, = , + , * , and so > >> forth. > >> 3. I'd suggest you get used to putting squiggly braces in the K&R style. > >> > >> I have found the formatR package's tool tidy.source can do this > >> nicely. From tidy.source, here's what I get with your code > >> > >> http://pj.freefaculty.org/R/em2.R> >> > >> Much more readable! > >> (I inserted library(MASS) for you at the top, otherwise this doesn't > >> even survive the syntax check.) > >> > >> When I copy from email to Emacs, some line-breaking monkey-business > >> occurs, but I expect you get the idea. > >> > >> Now, looking at your cleaned up code, I can see some things to tidy up > >> to improve the chances that some expert will look. > >> > >> First, R functions don't require "return" at the end, many experts > >> consider it distracting. (Run "lm" or such and review how they write. > >> No return at the end of functions). > >> > >> Second, about that big for loop at the top, the one that goes from m > >> 1:300 > >> > >> I don't know what that loop is doing, but there's some code in it that > >> I'm suspicious of. For example, this part: > >> > >>  W.hat <- psi.old - psi.old %*% t(Zi) %*% solve(Sig.hat) %*%  Zi %*% > >> psi.old > >> > >>     Sigb <- psi.old - psi.old %*% t(Zi) %*% solve(Sig.hat) %*%  Zi %*% > >> psi.old > >> > >> > >> First, you've caused the possibly slow calculation of solve > >>  (Sig.hat) to run two times.  If you really need it, run it once and > >> save the result. > >> > >> > >> Second, a for loop is not necessarily slow, but it may be easier to > >> read if you re-write this: > >> > >>  for (j in 1:n) { > >> > tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) > >>         tempbb[j] <- Eh4newv2(datai = data.list[[j]], weights.m = > >> weights.mat, beta = beta.old) > >>  } > >> > >> like this: > >> > >> tempaa <- lapply(data.list, Eh4new, weights.m, beta.old) > >> tempbb <- lapply(data.list, Eh4newv2, weights.m, beta.old) > >> > >> Third, here is a no-no > >> > >> tempbb <- c() > >> for (j in 1:n) { > >>         tempbb <- cbind(tempbb, Eh2new(data.list[[j]], weights.m = > >> weights.mat)) > >>     } > >> > >> That will call cbind over and over, causing a relatively slow memory > >> re-allocation.  See > >> (http://pj.freefaculty.org/R/WorkingExamples/stackListItems.R) > >> > >> Instead, do this to apply Eh2new to each item in data.list > >> > >> tempbbtemp <- lapply(data.list, Eh2new, weights.mat) > >> > >> and then smash the results together in one go > >> > >> tempbb <- do.call("cbind", tempbbtemp) > >> > >> > >> Fourth, I'm not sure on the matrix algebra. Are you sure you need > >> solve to get the full inverse of Sig.hat?  Once you start checking > >> into how estimates are calculated in R, you find that the > >> paper-and-pencil matrix algebra style of formula is generally frowned > >> upon. OLS (in lm.fit) doesn't do solve(X'X), it uses a QR > >> decomposition of matrices.  OR look in MASS package ridge regression > >> code, where the SVD is used to get the inverse. > >> > >> I wish I knew enough about the EM algorithm to gaze at your code and > >> say "aha, error in line 332", but I'm not.  But if you clean up the > >> presentation and tighten up the most obvious things, you improve your > >> chances that somebody who is an expert will exert him/herself to do > >> it. > >> > >> pj > >> > >> > >>>                                                              b follows > >>> N(0,\psi)  #i.e. bivariate normal > >>> where b is the latent variable, Z and X are ni*2 design matrices, sigma > >> is > >>> the error variance, > >>> Y are longitudinal data, i.e. there are ni measurements for object i. > >>> Parameters are \beta, \sigma, \psi; call them \theta. > >>> > >>> I wrote a regular EM, the M step is to maximize the log(f(Y,b;\theta)) > >>  by > >>> taking first order derivatives, setting to 0 and solving the equation. > >>> The E step involves the evaluation of E step, using Gauss Hermite > >>> approximation. (10 points) > >>> > >>> All are simulated data. X and Z are naive like cbind(rep(1,m),1:m) > >>> After 200 iterations, the estimated \beta converges to the true value > >> while > >>> \sigma and \psi do not. Even after one iteration, the \sigma goes up > >>> from > >>> about 10^0 to about 10^1. > >>> I am confused since the \hat{\beta} requires \sigma and \psi from > >> previous > >>> iteration. If something wrong then all estimations should be > >>> incorrect... > >>> > >>> Another question is that I calculated the logf(Y;\theta) to see if it > >>> increases after updating \theta. > >>> Seems decreasing..... > >>> > >>> I thought the X and Z are linearly dependent would cause some issue but > >>> I > >>> also changed the X and Z to some random numbers from normal > >>> distribution. > >>> > >>> I also tried ECM, which converges fast but sigma and psi are not close > >>> to > >>> the desired values. > >>> Is this due to the limitation of some methods that I used? > >>> > >>> Can any one give some help? I am stuck for a week. I can send the code > >>> to > >>> you. > >>> First time to raise a question here. Not sure if it is proper to post > >>> all > >>> code. > >>> > >> > ########################################################################## > >>> # the main R script > >>> n=100 > >>> beta=c(-0.5,1) > >>> vvar=2   #sigma^2=2 > >>> psi=matrix(c(1,0.2,0.2,1),2,2) > >>> b.m.true=mvrnorm(n=n,mu=c(0,0),Sigma=psi)  #100*2 matrix, each row is > >>> the > >>> b_i > >>> Xi=cbind(rnorm(7,mean=2,sd=0.5),log(2:8)) #Xi=cbind(rep(1,7),1:7) > >>> y.m=matrix(NA,nrow=n,ncol=nrow(Xi))   #100*7, each row is a y_i > >>> Zi=Xi > >>> > >>> b.list=as.list(data.frame(t(b.m.true))) > >>> psi.old=matrix(c(0.5,0.4,0.4,0.5),2,2)      #starting psi, beta and > >>> var, > >> not > >>> exactly the same as the true value > >>> beta.old=c(-0.3,0.7) > >>> var.old=1.7 > >>> > >>> gausspar=gauss.quad(10,kind="hermite",alpha=0,beta=0) > >>> > >>> data.list.wob=list() > >>> for (i in 1:n) > >>> { > >>> data.list.wob[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi) > >>> } > >>> > >>> #compute true loglikelihood and initial loglikelihood > >>> truelog=0 > >>> for (i in 1:length(data.list.wob)) > >>> { > >>> truelog=truelog+loglike(data.list.wob[[i]],vvar,beta,psi) > >>> } > >>> > >>> truelog > >>> > >>> loglikeseq=c() > >>> loglikeseq=sum(sapply(data.list.wob,loglike)) > >>> > >>> ECM=F > >>> > >>> > >>> for (m in 1:300) > >>> { > >>> > >>> Sig.hat=Zi%*%psi.old%*%t(Zi)+var.old*diag(nrow(Zi)) > >>> W.hat=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old > >>> > >>> Sigb=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old > >>> det(Sigb)^(-0.5) > >>> > >>> Y.minus.X.beta=t(t(y.m)-c(Xi%*%beta.old)) > >>> > >> > miu.m=t(apply(Y.minus.X.beta,MARGIN=1,function(s,B=psi.old%*%t(Zi)%*%solve(Sig.hat)) > >>>                                                 { > >>>                                                 B%*%s > >>>                                                 } > >>>                 ))  ### each row is the miu_i > >>> > >>> > >>> tmp1=permutations(length(gausspar$nodes),2,repeats.allowed=T) > >>> tmp2=c(tmp1) > >>> a.mat=matrix(gausspar$nodes[tmp2],nrow=nrow(tmp1)) #a1,a1 > >>> > >>> #a1,a2 > >>>                                                                    #... > >>> > >>  #a10,a9 > >>> > >>  #a10,a10 > >>> a.mat.list=as.list(data.frame(t(a.mat))) > >>> tmp1=permutations(length(gausspar$weights),2,repeats.allowed=T) > >>> tmp2=c(tmp1) > >>> weights.mat=matrix(gausspar$weights[tmp2],nrow=nrow(tmp1)) #w1,w1 > >>> > >>> #w1,w2 > >>>                                                                    #... > >>> > >>  #w10,w9 > >>> > >>  #w10,w10 > >>> L=chol(solve(W.hat)) > >>> LL=sqrt(2)*solve(L) > >>> halfb=t(LL%*%t(a.mat)) > >>> > >>> # each page of b.array is all values of bi_k and bi_j for b_i > >>> b.list=list() > >>> for (i in 1:n) > >>> { > >>> b.list[[i]]=t(t(halfb)+miu.m[i,]) > >>> } > >>> > >>> #generate a list, each page contains Xi,yi,Zi, > >>> data.list=list() > >>> for (i in 1:n) > >>> { > >>> data.list[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi,b=b.list[[i]]) > >>> } > >>> > >>> #update sigma^2 > >>> t1=proc.time() > >>> tempaa=c() > >>> tempbb=c() > >>> for (j in 1:n) > >>> { > >>> > >> > #tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) > >>> > >> > tempbb[j]=Eh4newv2(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) > >>> > >>> } > >>> var.new=mean(tempbb) > >>> if (ECM==T){var.old=var.new} > >>> > >>> > >> > sumXiXi=matrix(rowSums(sapply(data.list,function(s){t(s$Xi)%*%(s$Xi)})),ncol=ncol(Xi)) > >>> tempbb=c() > >>> for (j in 1:n) > >>> { > >>> tempbb=cbind(tempbb,Eh2new(data.list[[j]],weights.m=weights.mat)) > >>> } > >>> beta.new=solve(sumXiXi)%*%rowSums(tempbb) > >>> > >>> if (ECM==T){beta.old=beta.new} > >>> > >>> #update psi > >>> tempcc=array(NA,c(2,2,n)) > >>> sumtempcc=matrix(0,2,2) > >>> for (j in 1:n) > >>> { > >>> tempcc[,,j]=Eh1new(data.list[[j]],weights.m=weights.mat) > >>> sumtempcc=sumtempcc+tempcc[,,j] > >>> } > >>> psi.new=sumtempcc/n > >>> > >>> #stop > >>> > >> > if(sum(abs(beta.old-beta.new))+sum(abs(psi.old-psi.new))+sum(abs(var.old-var.new))<0.01) > >>> {print("converge, stop");break;} > >>> > >>> #update > >>> var.old=var.new > >>> psi.old=psi.new > >>> beta.old=beta.new > >>> loglikeseq[m+1]=sum(sapply(data.list,loglike)) > >>> } # end of M loop > >>> > >>> > ######################################################################## > >>> #function to calculate loglikelihood > >>> > >> > loglike=function(datai=data.list[[i]],vvar=var.old,beta=beta.old,psi=psi.old) > >>> { > >>> > >> > temp1=-0.5*log(det(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))) > >>> > >> > temp2=-0.5*t(datai$yi-datai$Xi%*%beta)%*%solve(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))%*%(datai$yi-datai$Xi%*%beta) > >>> return(temp1+temp2) > >>> } > >>> > >>> ####################################################################### > >>> #functions to evaluate the conditional expection, saved as Efun v2.R > >>> #Eh1new=E(bibi') > >>> #Eh2new=E(X'(y-Zbi)) > >>> #Eh3new=E(bi'Z'Zbi) > >>> #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > >>> #Eh5new=E(Xibeta-yi)'Zibi > >>> #Eh6new=E(bi) > >>> > >>> Eh1new=function(datai=data.list[[i]], > >>>                  weights.m=weights.mat) > >>> { > >>> #one way > >>> #temp=matrix(0,2,2) > >>> #for (i in 1:nrow(bi)) > >>> #{ > >>> #temp=temp+bi[i,]%*%t(bi[i,])*weights.m[i,1]*weights.m[i,2] > >>> #} > >>> #print(temp) > >>> > >>> #the other way > >>> bi=datai$b > >>> tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so > >>> need > >>> sqrt > >>> #deno=sum(weights.m[,1]*weights.m[,2]) > >>> > >>> return(t(tempb)%*%tempb/pi) > >>> } # end of Eh1 > >>> > >>> > >>> #Eh2new=E(X'(y-Zbi)) > >>> Eh2new=function(datai=data.list[[i]], > >>> weights.m=weights.mat) > >>> { > >>> #one way > >>> #temp=rep(0,2) > >>> #for (j in 1:nrow(bi)) > >>> #{ > >>> > >> > #temp=temp+c(t(datai$Xi)%*%(datai$yi-datai$Zi%*%bi[j,])*weights.m[j,1]*weights.m[j,2]) > >>> #} > >>> #deno=sum(weights.m[,1]*weights.m[,2]) > >>> #print(temp/deno) > >>> > >>> #another way > >>> bi=datai$b > >>> tempb=bi*rep(weights.m[,1]*weights.m[,2],2) > >>> > >>> tt=t(datai$Xi)%*%datai$yi-t(datai$Xi)%*%datai$Zi%*%colSums(tempb)/pi > >>> return(c(tt)) > >>> } # end of Eh2 > >>> > >>> > >>> #Eh3new=E(b'Z'Zbi) > >>> Eh3new=function(datai=data.list[[i]], > >>> weights.m=weights.mat) > >>> { > >>> #one way > >>> #deno=sum(weights.m[,1]*weights.m[,2]) > >>> #tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so > >> need > >>> sqrt > >>> #sum(apply(datai$Zi%*%t(tempb),MARGIN=2,function(s){sum(s^2)}))/deno > >>> > >>> #another way > >>> bi=datai$b > >>> tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so > >>> need > >>> sqrt > >>> > >> > return(sum(sapply(as.list(data.frame(datai$Zi%*%t(tempb))),function(s){sum(s^2)}))/pi) > >>> }  # end of Eh3 > >>> > >>> #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) > >>> Eh4new=function(datai=data.list[[i]], > >>>                  weights.m=weights.mat,beta=beta.old) > >>> { > >>> #one way > >>> #temp=0 > >>> #bi=datai$b > >>> #tt=c() > >>> #for (j in 1:nrow(bi)) > >>> #{ > >>> > >> > #tt[j]=sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] > >>> > >> > #temp=temp+sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] > >>> #} > >>> #temp/deno > >>> > >>> # another way > >>> bi=datai$b > >>> > >>> > >> > tt=sapply(as.list(ata.frame(c(datai$yi-datai$Xi%*%beta)-datai$Zi%*%t(bi))), > >>> function(s){sum(s^2)})*(weights.m[,1]*weights.m[,2]) > >>> return(sum(tt)/pi) > >>> } # end of Eh4 > >>> > >>> > >>> Eh4newv2=function(datai=data.list[[i]], > >>> weights.m=weights.mat,beta=beta.old) > >>> { > >>> bi=datai$b > >>> v=weights.m[,1]*weights.m[,2] > >>> temp=c() > >>> for (i in 1:length(v)) > >>> { > >>> > temp[i]=sum(((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[i,])*sqrt(v[i]))^2) > >>> } > >>> return(sum(temp)/pi) > >>> } # end of Eh4 > >>> > >>> > >>> #Eh5new=E(Xibeta-yi)'Zib > >>> Eh5new=function(datai=data.list[[i]], > >>> weights.m=weights.mat,beta=beta.old) > >>> { > >>> bi=datai$b > >>> tempb=bi*rep(weights.m[,1]*weights.m[,2],2) > >>> t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)) > >>> return(sum(t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)))/pi) > >>> } > >>> > >>> > >>> > >>> Eh6new=function(datai=data.list[[i]], > >>>                  weights.m=weights.mat) > >>> { > >>> bi=datai\$b > >>> tempw=weights.m[,1]*weights.m[,2] > >>> for (i in 1:nrow(bi)) > >>> { > >>> bi[i,]=bi[i,]*tempw[i] > >>> } > >>> return(colMeans(bi)/pi) > >>> } # end of Eh1 > >>> > >>> > >>> > >>> > >>> > >>> > >>> > >>> > >>> -- > >>> View this message in context: > >> > http://r.789695.n4.nabble.com/EM-algorithm-to-find-MLE-of-coeff-in-mixed-effects-model-tp4635315.html> >>> Sent from the R help mailing list archive at Nabble.com. > >>> > >>> ______________________________________________ > >>> [hidden email] mailing list > >>> https://stat.ethz.ch/mailman/listinfo/r-help> >>> PLEASE do read the posting guide > >> http://www.R-project.org/posting-guide.html > >>> and provide commented, minimal, self-contained, reproducible code. > >> > >> > >> > >> -- > >> Paul E. Johnson > >> Professor, Political Science    Assoc. Director > >> 1541 Lilac Lane, Room 504     Center for Research Methods > >> University of Kansas               University of Kansas > >> http://pj.freefaculty.org            http://quant.ku.edu> >> > > > >       [[alternative HTML version deleted]] > > > > ______________________________________________ > > [hidden email] mailing list > > https://stat.ethz.ch/mailman/listinfo/r-help> > PLEASE do read the posting guide > > http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, reproducible code. > > ____________________________________________________________ > FREE ONLINE PHOTOSHARING - Share your photos online with your friends and > family! > Visit http://www.inbox.com/photosharing to find out more! > > >         [[alternative HTML version deleted]] ______________________________________________ [hidden email] mailing list https://stat.ethz.ch/mailman/listinfo/r-helpPLEASE do read the posting guide http://www.R-project.org/posting-guide.htmland provide commented, minimal, self-contained, reproducible code.