# $Id: lighthouse.R,v 1.9 2007/09/30 10:39:06 kaip Exp $ # # # Copyright (c) 2007 Kai Puolamaki # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # # # T-61.3050 Machine Learning: Basic Principles # For problems 4/2007 # The lighthouse is at position y=2: #D <- data.frame(list(X=rcauchy(256,location=2))) #write.table(D,file="lighthouse.txt") D <- read.table("http://www.cis.hut.fi/Opinnot/T-61.3050/2007/lighthouse.txt") # prior distribution p(y): prior <- function(y,lower=-1000,upper=1000) dunif(y,min=lower,max=upper) # likelihood p(x|y): likelihood <- function(X,y) dcauchy(X,location=y) # loglikelihood log p(D|y): loglikelihood <- function(X,y) sum(log(likelihood(X,y))) # log-posterior log p(y|D) (given several samples and using the prior). # One should still add normalization factor to the posterior, but it is # a constant with respect to y and therefore it has no effect on the # ML or MAP estimates. logposterior <- function(X,y,lower=-1000,upper=1000) { loglikelihood(X,y)+log(prior(y,lower=lower,upper=upper)) } # Notice that if we would happen to have an observation x<-1000 or # x>1000 the log-posterior would be -Inf due to zero prior probability # to such observations! # Normalization factor for the posterior: logZposterior <- function(X,lower=-1000,upper=1000,maxpost=NA) { if(is.na(maxpost)) { map <- MAP(X,lower=lower,upper=upper) maxpost <- logposterior(X,map,lower=lower,upper=upper) } res <- integrate(function(x) exp(sapply(x,function(z) logposterior(X,z,lower=lower,upper=upper))-maxpost), lower=-Inf,upper=Inf) if(res$message != "OK") cat(sprintf("logZposterior: %s\n",res$message)) log(res$value)+maxpost } # Posterior distribution p(y|X) for y (where y is a vector): posterior <- function(X,y,lower=-1000,upper=1000) { map <- MAP(X,lower=lower,upper=upper) maxpost <- logposterior(X,map,lower=lower,upper=upper) logZ <- logZposterior(X,lower=lower,upper=upper,maxpost=maxpost) exp(maxpost-logZ)*exp(sapply(y,function(z) logposterior(X,z,lower=lower,upper=upper)-maxpost)) } # Find maximum likelihood (ML) estimate, given data X: ML <- function(X,lower=-1000,upper=1000) { optimize(function(y) loglikelihood(X,y), lower=lower,upper=upper, maximum=TRUE)$maximum } # Find maximum a posteriori (MAP) estimate, given data X and prior: MAP <- function(X,lower=-1000,upper=1000) { optimize(function(y) logposterior(X,y,lower=lower,upper=upper), lower=lower,upper=upper, maximum=TRUE)$maximum } ######################################################################### # PLOTS # # # Plot the positions of MAP, mean and median as a function of number # of data points: maps <- rep(NA,dim(D)[1]) means <- rep(NA,dim(D)[1]) medians <- rep(NA,dim(D)[1]) for(i in 1:dim(D)[1]) { maps[i] <- MAP(D[1:i,"X"]) means[i] <- mean(D[1:i,"X"]) medians[i] <- median(D[1:i,"X"]) } minmax <- range(c(maps,means,medians)) #pdf("lighthousemap.pdf") plot(c(1,dim(D)[1]),minmax,type="n", xlab="N",ylab="y") # Also show all observations: rug(D[,"X"],side=2,col="red") points(1:dim(D)[1],D[,"X"],pch=".",col="red") abline(a=2,b=0,lty="longdash",col="red") lines(1:dim(D)[1],maps,lty="solid",col="black") lines(1:dim(D)[1],means,lty="dashed",col="black") lines(1:dim(D)[1],medians,lty="dotted",col="black") legend(x="bottomright",bg="white", legend=c("MAP","mean","median","y=2"), lty=c("solid","dashed","dotted","longdash"), col=c("black","black","black","red")) dev.off() # Plot posterior distributions p(y|X) as a function of number # of observations. # # We use function maColorBar to get a color bar. For that we # need BioConductor which can be installed as follows: # > source("http://bioconductor.org/biocLite.R",echo=TRUE) # > biocLite() # > See http://bioconductor.org/docs/install-howto.html library(marray) pdf("lighthouseposterior.pdf") maxpost <- logposterior(D[,"X"],MAP(D[,"X"])) logZ <- logZposterior(D[,"X"],maxpost=maxpost) maxpost <- exp(maxpost-logZ) layout(matrix(c(1,1,1,1,1,1,1,1,1,2,2),1,11,byrow=TRUE)) plot(c(-3,7),c(0,maxpost),type="n",xlab="y",ylab="p(y|X)") cols <- rainbow(dim(D)[1]) y <- -150:350/50 tts <- c(1:4,seq(6,10,2),seq(20,100,20),seq(120,240,40),256) for(t in tts) { cat(sprintf("N=%d\n",t)) py <- posterior(D[1:t,"X"],y) lines(y,py,col=cols[t]) map <- MAP(D[1:t,"X"]) maxpost <- logposterior(D[1:t,"X"],map) logZ <- logZposterior(D[1:t,"X"],maxpost=maxpost) maxpost <- exp(maxpost-logZ) points(map,maxpost,col=cols[t],pch=20) } maColorBar(1:dim(D)[1],col=cols,horizontal=FALSE) dev.off()