# (C) Petteri Pajunen 2006 # # # varmixt2.R # # model 2D data using a mixture of Normals using Variational Bayes # # somewhat optimized code # #library(e1071) #library(MASS) library(mvtnorm) cat("Number of components in the model?") gclust <- scan(n=1) nclust <- 3 # correct number of clusters # gclust <- 4 # guessed number of clusters samples <- 20 iter <- 40 initsig <- 10 realsig <- 0.2 # select cluster means so that (0,0) is not in the middle of data mu <- rnorm(2*nclust,mean=2) mu <- matrix(mu,3,2) sig <- runif(nclust,min=0.1,max=0.2) # keep track of unused clusters: nonzero value indicates a small precision graveyard <- matrix(0,1,gclust) # doneyet is a small positive value meant to see if # iteration has converged doneyet <- 0.01 # generate the clustered data X <- matrix(1,nclust*samples,2) for (i in 1:nclust) { # generate a correlation matrix R <- matrix(rnorm(4,sd=realsig),2,2)-matrix(realsig,2,2) R2 <- R %*% t(R) #x <- rnorm(samples) #y <- rnorm(samples) #z <- cbind(x,y) #z <- R2 %*% z+mu[i,] #z <- mvrnorm(samples,mu[i,],R2) z <- rmvnorm(samples,mu[i,],R2) X[(i-1)*samples+(1:samples),] <- z } X11(width=5,height=5) plot(X[,1],X[,2]) t <- seq(0,2*pi,by=0.01) lines(0.1*sin(t),0.1*cos(t),pch=20) # initial values for EM-like iterations I <- matrix(1,2,2) I[1,2] <- 0 I[2,1] <- 0 pi0 <- matrix(1/gclust,1,gclust) # mixing probabilities lambda <- matrix(1/gclust,1,gclust) # mixing probabilities lambda0 <- 1 nu <- matrix(2,1,gclust) nu0 <- 1 tempgamma <- matrix(0,gclust,nclust*samples) gamm <- matrix(0,nclust*samples,gclust) hatpi <- matrix(1,1,gclust) emu <- rnorm(2*gclust,mean=0,sd=1) emu <- matrix(emu,gclust,2) rho <- matrix(emu,gclust,2) rho0 <- matrix(0,1,2) Phi0 <- matrix(1,2,2) esig <- rnorm(gclust*4,sd=0.01) # small precision matrices initially esig <- t(matrix(c(initsig,0,0,initsig),4,gclust)) tau <- matrix(0,samples*nclust,gclust) muhist <- matrix(0,iter+1,2) beta <- matrix(1,1,gclust) beta0 <- 1 precs <- matrix(0,gclust,1) dstring <- matrix(0,gclust,1) for (i in 1:gclust) { dstring[i] <- " " } for (i in 1:iter) { for (j in which(graveyard == 0)) { pi0[j] <- exp(digamma(lambda[j])-digamma(sum(lambda))) Phij <- matrix(esig[j,],2,2) Gammaj <- nu[j]*solve(Phij) TildeGammaj <- exp(digamma((nu[j])/2)+digamma((nu[j]-1)/2)-log(det(Phij))+2*log(2)) Xro <- X-matrix(1,samples*nclust,1) %*% rho[j,] tempgamma[j,] <- diag(pi0[j]*sqrt(TildeGammaj)*(exp(-(Xro) %*% Gammaj %*% (t(Xro)/2)))*exp(-1/beta[j])) } # now we have computed the responsibilities for sample n tsum <- 1/apply(tempgamma,2,sum) # tsum should be a row vector gamm <- t(tempgamma*(matrix(1,gclust,1) %*% tsum)) for (n in 1:(nclust*samples)) { points(X[n,1],X[n,2],col=rgb(gamm[n,1],gamm[n,2],gamm[n,3]),lwd=2) } finished <- 0 # keep track if we are doing anything reportunused <- 0 for (j in which(graveyard==0)) { # first compute the weighted estimates of weights, means, and covariances hatpi[j] <- sum(gamm[,j])/samples den <- X*(gamm[,j] %*% matrix(1,1,2)) nom <- (samples*hatpi[j]) newemu <- apply(den,2,sum)/nom # new cluster means emu[j,] <- newemu difference <- X-matrix(1,nclust*samples,1) %*% emu[j,] # dimensions N,2 difference <- difference * (sqrt(gamm[,j]) %*% matrix(1,1,2)) den <- t(difference) %*% difference # this is 2 times 2 matrix Sig <- den/nom # now compute the approximation parameters lambda[j] <- nom+lambda0 nu[j] <- nom+nu0 newrho <- (nom*emu[j,]+beta0*rho0)/(nom+beta0) if (i > 1) { lines(c(rho[j,1],newrho[1]),c(rho[j,2],newrho[2]),lwd=2,col=rgb((j==1),(j==2),(j==3))) } # points(newrho[1],newrho[2]) if (max(abs(rho[j,]-newrho))