######################################################### ## Clustering Demo ## ## Author: Han-Ming Wu (hmwu@stat.sinica.edu.tw) ## ## Institute of Statistical Science, Academia Sinica ## ## http://www.sinica.edu.tw/~hmwu/ ## ## 2006/07/25 ## ######################################################### ## Read Data setwd("C:\\Program Files\\R\\rw2001\\WorkingData") library(stats) test.matrix <- read.table("testdata.txt", header=TRUE) n <- dim(test.matrix)[1] p <- dim(test.matrix)[2]-2 test.data <- test.matrix[,3:p+2] name <- test.matrix[,1] group <- test.matrix[,2] groupID <- unique(group) no.group <- length(groupID) groupID.name <- c("group1", "group2", "group3", "group4", "group5", "group6") ## standardized data test.sdata <- (test.data-apply(test.data, 1, mean))/sqrt(apply(test.data, 1, var)) ## k-means (try different no.group) no.group <- 5 no.iter <- 200 test.kmeans <- kmeans(test.sdata, no.group, no.iter) test.kmeans ## Within cluster sum of squares by cluster sum(test.kmeans$withinss) ## PCA with k-means plot(pca.dim1, pca.dim2, main="PCA for Test Data with K-means Clustering", xlab="PCA-1", ylab="PCA-2", col=test.kmeans$cluster) ## MDS with k-means plot(mds.dim1, mds.dim2, xlab="MDS-1", ylab="MDS-2", main="MDS for Test Data with K-means Clustering", col = test.kmeans$cluster) ## SOM (try different xdim and ydim) library(som) test.som <- som(test.sdata, xdim=5, ydim=4, topol="rect", neigh="gaussian") plot(test.som) ## Hierarchical Clustering (average-linkage) on Samples (rows) test.gene.hc.ave <- hclust(dist(test.sdata), method = "ave") plot(test.gene.hc.ave, hang = -1, cex=0.5, labels=name) ## Hierarchical Clustering (average-linkage) on Genes (columns) test.exp.hc.ave <- hclust(dist(t(test.sdata)), method = "ave") plot(test.exp.hc.ave, cex=0.8)