######################################################### ## PCA, MDS Demo ## ## Author: Han-Ming Wu (hmwu@stat.sinica.edu.tw) ## ## Institute of Statistical Science, Academia Sinica ## ## http://www.sinica.edu.tw/~hmwu/ ## ## 2006/07/25 ## ######################################################### ## Read Data setwd("C:\\Program Files\\R\\rw2001\\WorkingData") library(stats) ## in this test data, rows are samples, and columns are genes test.matrix <- read.table("testdata.txt", header=TRUE) n <- dim(test.matrix)[1] p <- dim(test.matrix)[2]-2 test.data <- test.matrix[,3:(p+2)] name <- test.matrix[,1] ## group can be obtained by kmeans, hierarchical clustering ## and any other clustering algorithms group <- test.matrix[,2] groupID <- unique(group) no.group <- length(groupID) groupID.name <- c("group1", "group2", "group3", "group4", "group5", "group6") ## standardized data (maybe you don't have to do this step!) test.sdata <- (test.data-apply(test.data, 1, mean))/sqrt(apply(test.data, 1, var)) ## PCA on Samples (rows) test.pca <- princomp(test.sdata, cor=TRUE, scores=TRUE) # 2D plot for first two components pca.dim1 <- test.pca$scores[,1] pca.dim2 <- test.pca$scores[,2] plot(pca.dim1, pca.dim2, main="PCA for Test Data on Genes", xlab="1st PCA Componnet", ylab="2nd PCA Componnet") plot(pca.dim1, pca.dim2, main="PCA for Test Data on Genes", xlab="1st PCA Componnet", ylab="2nd PCA Componnet", col=c(group), pch=c(group)) legend(0, 0, groupID.name, pch=c(group), col=c(group)) # shows a screeplot. plot(test.pca, main="screeplot") ## loadings plot plot(loadings(test.pca)[,1], loadings(test.pca)[,2], xlab="1st PCA", ylab="2nd PCA", main="Loadings Plot", type="n") text(loadings(test.pca)[,1], loadings(test.pca)[,2], labels=paste(1:p)) abline(h=0) abline(v=0) # print loadings loadings(test.pca) summary(test.pca) ## ## MDS ## library(stats) #correlation matrix test.cor<- cor(t(test.sdata)) #distance matrix test.dist<- sqrt(2*(1-test.cor)) # or test.dist<- 1-test.cor test.mds<- cmdscale(test.dist) mds.dim1 <- test.mds[,1] mds.dim2 <- test.mds[,2] plot(mds.dim1, mds.dim2, type="n", xlab="MDS-1", ylab="MDS-2", main="MDS for Test Data") number <- 1:n for(i in 0:(no.group-1)){ text(mds.dim1[number[group==i]], mds.dim2[number[group==i]], group[number[group==i]] , cex=0.8, col= i+1) } legend(0.5, 1.0, groupID.name, pch="012346", col=c(1,2,3,4,5,6)) ## or plot(mds.dim1, mds.dim2, xlab="MDS-1", ylab="MDS-2", main="MDS for Test Data", col=group)