# (C) Petteri Pajunen 2006
#
#
# emnorm.R
#
# model 2D data using a mixture of Normals
#library(e1071)

library(mvtnorm)
nclust <- 3 # correct number of clusters
cat("Number of components in the model?")
gclust <- scan(n=1)
# gclust <- 5 
samples <- 20
iter <- 15*gclust
initsig <- 50
realsig <- 0.2
mu <- rnorm(2*nclust)
mu <- matrix(mu,3,2)

sig <- runif(nclust,min=0.1,max=0.2)

# generate the clustered data

X <- matrix(1,nclust*samples,2)

for (i in 1:nclust) {
  # generate a correlation matrix
  R <- matrix(rnorm(4,sd=realsig),2,2)
  R2 <- R %*% t(R)
  # x <- rnorm(samples,mean=mu[i,1],sd=sig[i])
  # y <- rnorm(samples,mean=mu[i,2],sd=sig[i])
  z <- rmvnorm(samples,mu[i,],R2)
  # z <- cbind(x,y)
  X[(i-1)*samples+(1:samples),] <- z
}

X11(width=5,height=5)
plot(X[,1],X[,2])

I <- matrix(1,2,2)
I[1,2] <- 0
I[2,1] <- 0
lambda <- matrix(1/gclust,1,gclust)  # mixing probabilities
emu <- rnorm(2*gclust,mean=0,sd=0.1)
emu <- matrix(emu,gclust,2)
# esig <- matrix(100,1,3)    # large inital sd's
esig <- rnorm(gclust*4,sd=100)
esig <- t(matrix(c(initsig,0,0,initsig),4,gclust))
tau <- matrix(0,samples*nclust,gclust)
muhist <- matrix(0,iter+1,2)
for (i in 1:iter) {

  for (j in 1:gclust) {
    # tau[,j] <- lambda[j]*dnorm(X[,1],mean=emu[j,1],sd=esig[j])*dnorm(X[,2],mean=emu[j,2],sd=esig[j])
    tau[,j] <- lambda[j]*dmvnorm(X,emu[j,],matrix(esig[j,],2,2))
  }
  tsum <- 1/apply(tau,1,sum)
  tmat <- tsum %*% matrix(1,1,gclust)
  tau <- tmat*tau  # this tells the mixture probabilites for each sample
  lambda <- apply(tau,2,sum)/(nclust*samples)  # new lambdas
  # plot(X[,1],X[,2])
  for (n in 1:(nclust*samples)) {
    points(X[n,1],X[n,2],col=rgb(tau[n,1],tau[n,2],tau[n,3]),lwd=2)
  }
  for (j in 1:gclust) {
    den <- X*(tau[,j] %*% matrix(1,1,2))
    nom <- 1/sum(tau[,j])
    newemu <- apply(den,2,sum)*nom   # new cluster means
    if ((i > 1) && (j<10)) {
      lines(c(emu[j,1],newemu[1]),c(emu[j,2],newemu[2]),lwd=3,col=rgb((j==1),(j==2),(j==3)))
    }
    # points(newemu[1],newemu[2],lwd=2,col=rgb((j==1),(j==2),(j==3)))
    emu[j,] <- newemu
    difference <- X-matrix(1,nclust*samples,1) %*% emu[j,] # dimensions N,2
    difference <- difference * (sqrt(tau[,j]) %*% matrix(1,1,2))
    den <- t(difference) %*% difference # this is 2 times 2 matrix
    esig[j,] <- matrix(den*nom,1,4)
  }
  
}

  for (j in 1:gclust) {
    points(emu[j,1],emu[j,2],lwd=3,col="red")
  }

cx <- range(X[,1])
cy <- range(X[,2])
x <- seq(cx[1],cx[2],by=0.025)
y <- seq(cy[1],cy[2],by=0.025)
nx <- length(x)
ny <- length(y)
z <- matrix(0,nx,ny)
for (ix in 1:nx) {
  t <- cbind(x[ix]*matrix(1,ny,1),y)
  ztmp <- 0
  cat(".")   # debugging
  for (gi in 1:gclust) {
    ztmp <- ztmp+lambda[gi]*dmvnorm(t,emu[gi,],matrix(esig[gi,],2,2))
  }
  z[ix,] <- ztmp
}
dz <- z-min(z)
dz <- sqrt(dz/max(dz))
image(dz,col=gray(seq(1,0,by=-1/128)),zlim=c(0,1))
for (n in 1:(nclust*samples)) {
  points((X[n,1]-cx[1])/(cx[2]-cx[1]),(X[n,2]-cy[1])/(cy[2]-cy[1]),col=rgb(tau[n,1],tau[n,2],tau[n,3]),lwd=2)
}
scan(quiet=TRUE)
dev.off()