# $Id: regression.R,v 1.1 2007/09/18 04:17:54 kaip Exp $
#
#
# Copyright (c) 2007 Kai Puolamaki <Kai.Puolamaki@iki.fi>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
#
# T-61.3050 Machine Learning: Basic Principles
# Example for the lecture of 18 September 2007
#


# f is a stochastic function defined in interval [-1,1].
# We use f to create the toy data set to be used in the lecture.
f <- function(x,noise=.2) {
  sin(pi*x)+noise*rnorm(length(x))
}


# Make the regressor plots and print the factors.
makeplot <- function(data,form,leg="regressor",main="",datate=NA,phi="X^{%d}") {
  plot(c(-1,1),c(-1.5,1.5),type="n",xlab="X",ylab="Y",main=main)
  points(datate,col="red",pch=21)
  x <- -100:100/100; y <- f(x,noise=0)
  lines(x,y,col="red",lty="dashed")
  points(data,pch=19)
  data.lm <- lm(form,data)
  y <- predict(data.lm,data.frame(list(X=x,Y=NA)))
  lines(x,y,col="black",lty="solid")
  legend(x="topleft",legend=c(expression(plain(sin)(X/pi)),leg),
         col=c("red","black"),lty=c("dashed","solid"))
  errtr <- sum((predict(data.lm,data)-data[,"Y"])^2)/length(data[,"Y"])
  errte <- sum((predict(data.lm,datate)-datate[,"Y"])^2)/length(datate[,"Y"])
  # Leave-one-out crossvalidation. Train the data leaving one item in
  # turn out and compute the average error of the data points left out.
  #errcr <- 0
  #for(i in rownames(data)) {
  #  data.lmcr <- lm(form,data[rownames(data)!=i,])
  #  errcr <- errcr+(predict(data.lmcr,data[i,])-data[i,"Y"])^2
  #}
  #errcr <- errcr/length(rownames(data))
  # E_TRAIN, E_CRVALID, E_TEST:
  #cat(sprintf("$%.4f$&$%.4f$&$%.4f$&$",errtr,errcr,errte))
  cat(sprintf("$%.4f$&$%.4f$&$",errtr,errte))
  i <- 0
  for(a in data.lm$coefficients) {
    cat(sprintf(sprintf("%%+.2f%s",phi),a,i))
    i <- i+1
  }
  cat("$\n")
  data.lm
}

# Chebyshev Polynomials of the First Kind can be obtained by using a
# recurrence relation:
Tn <- function(n,x) {
  if(n==0) rep(1,length(x))    # T_0(x)=1
  else if(n==1) x              # T_1(x)=x
  else 2*x*Tn(n-1,x)-Tn(n-2,x) # T_n(x)=2xT_{n-1}(x)-T_{n-2}(x)
}

# Plot Chebyshev Polynomials of the First Kind:
pdf("chebyshev1.pdf")
plot(c(-1,1),c(-1,1),type="n",
     main="Chebyshev Polynomials of the First Kind",
     xlab="X",ylab=expression(T[n](X)))
x <- -100:100/100
cols=c("yellow","black","red","green","blue","black")
ltys=c("solid","solid","solid","solid","solid","dashed")
for(n in 0:5) lines(x,Tn(n,x),col=cols[n+1],lty=ltys[n+1])
legend(x="bottomright",col=cols,lty=ltys,bg="white",
       legend=c(expression(T[0](X)),expression(T[1](X)),expression(T[2](X)),
         expression(T[3](X)),expression(T[4](X)),expression(T[5](X))))
dev.off()

# Plot Polynomial terms:
pdf("polynomials.pdf")
plot(c(-1,1),c(-1,1),type="n",
     main="Polynomials",
     xlab="X",ylab=expression(X^n))
x <- -100:100/100
cols=c("yellow","black","red","green","blue","black")
ltys=c("solid","solid","solid","solid","solid","dashed")
for(n in 0:5) lines(x,x^n,col=cols[n+1],lty=ltys[n+1])
legend(x="bottomright",col=cols,lty=ltys,bg="white",
       legend=c(expression(X^0),expression(X^1),expression(X^2),
         expression(X^3),expression(X^4),expression(X^5)))
dev.off()


# Create data in interval [-1,1]. 
N <- 100
DATA       <- data.frame(list(X=runif(N,min=-1,max=1),Y=0))
DATA[,"Y"] <- f(DATA[,"X"])

# Divide the data in random to the training set of 7 and test set of N-7.
DATATR <- DATA[1:7,]
DATATE <- DATA[8:N,]


# Polynomials of various degrees:
polys <- c("Y ~ 1")
for(i in 1:20) polys <- c(polys,sprintf("%s+I(X^%d)",polys[i],i))

# Chebyshev polynomials of the first kind of various degrees:
chebs <- c("Y ~ 1")
for(i in 1:20) chebs <- c(chebs,sprintf("%s+I(Tn(%d,X))",chebs[i],i))


# Effect of data size on accuracy.
k <- 5
p <- sample(N)
for(n in c(7,10,15,20,25,30,35,40,45,50)) {
  DATATR <- DATA[p[1:n],]
  DATATE <- DATA[p[(n+1):N],]
  DATA.lm <- lm(formula(polys[k+1]),DATATR)
  errtr <- sum((predict(DATA.lm,DATATR)-DATATR[,"Y"])^2)/n
  errte <- sum((predict(DATA.lm,DATATE)-DATATE[,"Y"])^2)/(N-n)
  cat(sprintf("$%d$&$%.4f$&$%.4f$\\\\\n",n,errtr,errte))
}

for(k in c(0:6)) {
  cat(sprintf("$%d$&",k))
  pdf(sprintf("reg-poly-%d.pdf",k))
  makeplot(DATATR,formula(polys[k+1]),leg=sprintf("degree %d polynomial",k),datate=DATATE)
  dev.off()
}