#########################################################
## Clustering Demo                                     ## 
## Author: Han-Ming Wu (hmwu@stat.sinica.edu.tw)       ## 
## Institute of Statistical Science, Academia Sinica   ## 
## http://www.sinica.edu.tw/~hmwu/                     ## 
## 2006/07/25                                          ## 
#########################################################


## Read Data
setwd("C:\\Program Files\\R\\rw2001\\WorkingData")
library(stats)
test.matrix  <- read.table("testdata.txt", header=TRUE)
n <- dim(test.matrix)[1]
p <- dim(test.matrix)[2]-2
test.data <- test.matrix[,3:p+2]
name <- test.matrix[,1]
group <- test.matrix[,2]
groupID <- unique(group)
no.group <- length(groupID)
groupID.name <- c("group1", "group2", "group3", "group4", "group5", "group6")

## standardized data
test.sdata <- (test.data-apply(test.data, 1, mean))/sqrt(apply(test.data, 1, var)) 

## k-means (try different no.group)
no.group <- 5
no.iter <- 200
test.kmeans <- kmeans(test.sdata, no.group, no.iter)
test.kmeans 
## Within cluster sum of squares by cluster
sum(test.kmeans$withinss)

## PCA with k-means
plot(pca.dim1, pca.dim2,  
main="PCA for Test Data with K-means Clustering", xlab="PCA-1", ylab="PCA-2", col=test.kmeans$cluster)

## MDS with k-means
plot(mds.dim1, mds.dim2, xlab="MDS-1", ylab="MDS-2", 
main="MDS for Test Data with K-means Clustering", col = test.kmeans$cluster)

## SOM (try different xdim and ydim)
library(som)
test.som <- som(test.sdata, xdim=5, ydim=4, topol="rect", neigh="gaussian")
plot(test.som) 

## Hierarchical Clustering (average-linkage) on Samples (rows)
test.gene.hc.ave <- hclust(dist(test.sdata), method = "ave")
plot(test.gene.hc.ave, hang = -1, cex=0.5, labels=name)

## Hierarchical Clustering (average-linkage) on Genes (columns)
test.exp.hc.ave <- hclust(dist(t(test.sdata)), method = "ave")
plot(test.exp.hc.ave, cex=0.8)