################################################# # C01: 維度縮減 # # 吳漢銘 國立臺北大學統計學系 # # http://www.hmwu.idv.tw/ # ################################################# # 9/144 X <- iris[,1:4] (S <- cov(X)) (e <- eigen(S)) D <- diag(e$values) C <- e$vectors C%*%D%*%t(C) S%*%C[,1] D[1]*C[,1] #10/144 eigen(cov(iris[,1:4])) # 13/144 iris.sub <- iris[sample(1:150, 8),1:4] iris.sub M.svd <- svd(iris.sub) M.svd M.svd$u %*% (diag(M.svd$d) %*% t(M.svd$v)) d.sub <- diag(M.svd$d[1:2]) u.sub <- as.matrix(M.svd$u[, 1:2]) v.sub <- as.matrix(M.svd$v[, 1:2]) iris.sub.approx <- u.sub %*% d.sub %*% t(v.sub) iris.sub.approx sum((iris.sub - iris.sub.approx)^2) # 14/144 if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("EBImage") library(EBImage) #(Repositories: BioC Software) lena <- readImage("lena.jpg") dims <- dim(lena) dims plot(c(0, dims[1]), c(0, dims[2]), type='n', xlab="", ylab="") rasterImage(lena, 0, 0, dims[1], dims[2]) library(jpeg) lena <- readJPEG("lena.jpg") # 15/144 lena.flip <- Image(flip(lena)) red.weight <- .2989 green.weight <- .587 blue.weight <- 0.114 lena.gray <- red.weight * imageData(lena.flip)[,,1] + green.weight * imageData(lena.flip)[,,2] + blue.weight * imageData(lena.flip)[,,3] dim(lena.gray) lena.gray[1:5, 1:5] image(lena.gray, col = grey(seq(0, 1, length = 256))) # 16/144 lena.svd <- svd(lena.gray) d <- diag(lena.svd$d) dim(d) u <- lena.svd$u v <- lena.svd$v plot(1:length(lena.svd$d), lena.svd$d, pch=19, xlab="i-th lena.svd$d", ylab="lena.svd$d") used.no <- 20 u.sub <- as.matrix(u[, 1:used.no]) v.sub <- as.matrix(v[, 1:used.no]) d.sub <- as.matrix(d[1:used.no, 1:used.no]) lena.approx <- u.sub %*% d.sub %*% t(v.sub) image(lena.approx, col = grey(seq(0, 1, length = 256))) # 20/144 ?cov x <- iris[, 1:4] cov(x) # 21/144 x <- iris[, 1:4] (covx <- cov(x)) e <- eigen(covx) V <- e$vectors V.inverse <- solve(e$vectors) covx.hat <- V %*% diag(e$values) %*% V.inverse covx.hat # same with covx # 23/144 z <- as.matrix(x) %*% e$vectors[, 1:2] plot(z[, 1], z[, 2], col=iris[, 5]) # 31/144 m1 <- matrix(sample(1:16,16),4,4) m1 m1.scale.svd <- svd(scale(m1)) m1.scale.svd m1.pca <- prcomp(m1, scale=T) m1.pca pca2 <- princomp(m1, cor=T) pca2$scores pca2$loadings # 33/144 cell.matrix <- read.table("YeastCellCycle_alpha.txt", header=TRUE, row.names=1) n <- dim(cell.matrix)[1] p <- dim(cell.matrix)[2]-1 cell.data <- cell.matrix[,2:p+1] gene.phase <- cell.matrix[,1] phase <- unique(gene.phase) phase.name <- c("G1", "S", "S/G2", "G2/M", "M/G1") cell.sdata <- t(scale(t(cell.data))) rc <- rainbow(5)[as.integer(gene.phase)] cc <- rainbow(ncol(cell.sdata)) hv <- heatmap(cell.sdata, col = GBRcol, scale = "column", Colv=NA, Rowv=NA, RowSideColors = rc, ColSideColors = cc, margins = c(5,10), xlab = "Times", ylab = "Genes", main = "Heatmap of Microarray Data") # 34/144 cell.pca <- princomp(cell.sdata, cor=TRUE, scores=TRUE) # 2D plot for first two components pca.dim1 <- cell.pca$scores[,1] pca.dim2 <- cell.pca$scores[,2] plot(pca.dim1, pca.dim2, main="PCA for Cell Cycle Data on Genes", xlab="1st PCA Component", ylab="2nd PCA Component",col=c(phase), pch=c(phase)) legend(3, 4, phase.name, pch=c(phase), col=c(phase)) # shows a screeplot. plot(cell.pca) biplot(cell.pca) # 35/144 # loadings plot plot(loadings(cell.pca)[,1], loadings(cell.pca)[,2], xlab="1st PCA", ylab="2nd PCA", main="Loadings Plot", type="n") text(loadings(cell.pca)[,1], loadings(cell.pca)[,2], labels=paste(1:p)) abline(h=0) abline(v=0) # 36/144 library(MASS) mu <- c(2, -1) Sigma <- matrix(c(2.4, -0.5, -0.5, 1), 2) n <- 250 X <- mvrnorm(n, mu, Sigma) mycol <- terrain.colors(n) sorted.x1 <- sort(X[,1]) order.x1 <- order(X[,1]) id <- 1:n sorted.id <- id[order.x1] x1.col <- mycol[order(sorted.id)] par(mfrow=c(1, 2)) plot(X, col=x1.col, pch=16, main="simulated bivariate normal") abline(h=0, v=0, col="gray") X.pca <- princomp(X, cor = TRUE) X.pca$sdev X.pca$loadings plot(X.pca$scores, col=x1.col, pch=16, main="PCA") abline(h=0, v=0, col="gray") # 37/144 pca.pkg <- c("FactoMineR", "factoextra", "corrplot") install.packages(pca.pkg) lapply(pca.pkg, library, character.only=TRUE) data(decathlon2) head(decathlon2 dim(decathlon2) x <- decathlon2[,1:10] # 38/144 x.pca <- PCA(x, graph = FALSE) class(x.pca) str(x.pca) print(x.pca) # 39/144 eig.val <- get_eigenvalue(x.pca) eig.val fviz_eig(x.pca, addlabels = TRUE, ylim = c(0, 50)) # 40/144 var <- get_pca_var(x.pca) var head(var$coord, 4) fviz_pca_var(x.pca, col.var = "black") # 41/144 head(var$cos2, 4) corrplot(var$cos2, is.corr=FALSE) fviz_cos2(x.pca, choice = "var", axes = 1:2) fviz_pca_var(x.pca, col.var = "cos2", gradient.cols = c("blue", "yellow", "red"), repel = TRUE) # Avoid text overlapping # 42/144 head(var$contrib, 4) corrplot(var$contrib, is.corr=FALSE) fviz_contrib(x.pca, choice = "var", axes = 1, top = 10) fviz_contrib(x.pca, choice = "var", axes = 2, top = 10) fviz_contrib(x.pca, choice = "var", axes = 1:2, top = 10) # 43/144 fviz_pca_var(x.pca, col.var = "contrib", gradient.cols = c("blue", "yellow", "red")) set.seed(123) var.kms <- kmeans(var$coord, centers = 3, nstart = 25) kms.grp <- as.factor(var.kms$cluster) fviz_pca_var(x.pca, col.var = kms.grp, palette = c("blue", "green", "red"), legend.title = "Cluster") # 44/144 ind <- get_pca_ind(x.pca) ind ad(ind$coord, 3) head(ind$cos2, 3) head(ind$contrib, 3) # 45/144 fviz_pca_ind(x.pca, col.ind = "cos2", gradient.cols = c("blue", "black", "red"), repel = TRUE) fviz_pca_ind(x.pca, pointsize = "cos2", pointshape = 21, fill = "lightblue", repel = TRUE) # 46/144 fviz_pca_ind(x.pca, geom.ind = "point", col.ind = decathlon2[,13], palette = c("blue", "red"), legend.title = "Competition") fviz_cos2(x.pca, choice = "ind", top = 5) fviz_contrib(x.pca, choice = "ind", axes = 1:2, top = 5) # 48/144 x <- c(-0.9, 0.6, 0.1) y <- c(-0.5, 0, 0.4) plot(x, y, xlim=c(-1, 1), ylim=c(-1, 1), main="Data Input Space") abline(h=0, v=0, col="blue", lwd=2) text(x+0.05, y+0.05, c("s1", "s2", "s3"), col="red") pca <- princomp(cbind(x, y)); ab <- pca$loadings arrows(-ab[1,1], -ab[2,1], ab[1,1], ab[2,1], col="green", angle = 10, lwd=2) text(-ab[1,1], -ab[2,1], "Comp.1") arrows(-ab[1,2], -ab[2,2], ab[1,2], ab[2,2], col="green", angle = 10, lwd=2) text(-ab[1,2], -ab[2,2], "Comp.2") # 51/144 iris.pca <- princomp(iris[,1:4]) biplot(iris.pca, main="Biplot for iris data") # 54/144 library(devtools) install_github("vqv/ggbiplot") library(ggbiplot) iris.prcomp <- prcomp(iris[,1:4], scale. = TRUE) ggbiplot(iris.prcomp, groups = iris[,5]) ggbiplot(iris.prcomp, obs.scale = 1, var.scale = 1, groups = iris[,5], ellipse = TRUE, circle = TRUE) + scale_color_discrete(name = '') + theme(legend.direction = 'horizontal', legend.position = 'top') # 55/144 library(lattice) state.spending <- read.table("StatePolicySpending.txt", header = T, row.names=1) head(state.spending) spend <- scale(state.spending) spend.svd <- svd(spend, nu=2, nv=2) D <- spend.svd$d[1:2] V <- spend.svd$v[,1:2] U <- spend.svd$u[,1:2] # Create matrices for variables and observations by weighting singular vectors with # the square roots of the first two singular values. These will be used to construct # a symmetric biplot. spend.var <- V * (rep(1, length(V[,1])) %*% t(D^.5)) spend.obs <- U * (rep(1, length(U[,1])) %*% t(D^.5)) row.names(spend.var) <- colnames(spend) row.names(spend.obs) <- row.names(spend) # 56/144 # Within the panel function, "panel.xyplot" draws the observation points and # "panel.segments" draws the variable vectors. The first "panel.text" labels the vectors, # and the second "panel.text" provides labels for relatively extreme observation points. xyplot(spend.obs[,2] ~ spend.obs[,1], aspect = 1, panel = function(x, y) { panel.xyplot(x, y, col = "black") panel.segments(rep(0, length(spend.var[,1])), rep(0, length(spend.var[,1])), spend.var[,1], spend.var[,2], lty = 1, col = "blue") panel.text(spend.var[,1], spend.var[,2], row.names(spend.var), cex = .7, col = "green") panel.text(x[abs(x)>.7 | abs(y)>.7]+.04, y[abs(x)>.7 | abs(y)>.7], row.names(spend.obs)[abs(x)>.7 | abs(y)>.7], cex = .7, adj = 0, col= "red") }, xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5), xlab = " ", ylab = " ") # Calculate proportion of variance explained by # first two pairs of singular vectors var.explained <- sum(D^2)/sum(spend.svd$d^2) var.explained # 57/144 biplot(princomp(scale(state.spending)), cex=0.6) # 64/144 data(iris3) Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]), Species = rep(c("setosa","versicolor","virginica"), rep(50,3))) ## FA data <- Iris[,1:4] class <- Iris[,5] factanal(data, factors=1) fa.dim1 <- as.matrix(data)%*%iris.fa$loadings[,1] # 68/144 x <- as.matrix(iris[, 1:4]) B <- x %*% t(x) d <- 2 eB <- eigen(B) C <- eB$vectors D <- eB$values Z <- C[, 1:d] %*% diag(D[1:d]) plot(z[,1], z[,2], col=iris[,5], main="MDS") # 71/144 # correlation matrix cell.cor <- cor(t(cell.sdata)) # distance matrix cell.dist<- sqrt(2*(1-cell.cor)) cell.mds<- cmdscale(cell.dist) mds.dim1 <- cell.mds[,1] mds.dim2 <- cell.mds[,2] plot(mds.dim1, mds.dim2, type="n", xlab="MDS-1", ylab="MDS-2", main="MDS for Cell Cycle Data") for(i in 0:4){ text(mds.dim1[number[gene.phase==i]], mds.dim2[number[gene.phase==i]], gene.phase[number[gene.phase==i]] , cex=0.8, col= i+1) } legend(0.8, 1.0, phase.name, pch="01234", col=c(1,2,3,4,5)) # 73/144 no.group <- 5 no.iter <- 20 USArrests.kmeans <- kmeans(USArrests, no.group, no.iter) plot(USArrests, col = USArrests.kmeans$cluster, main = "K-means: USArrests data") # PCA USArrests.pca <- princomp(USArrests, cor=TRUE, scores=TRUE) pca.dim1 <- USArrests.pca$scores[,1]; pca.dim2 <- USArrests.pca$scores[,2] plot(pca.dim1, pca.dim2, main="PCA for USArrests Data with K-means", xlab="PCA-1", ylab="PCA-2", col=USArrests.kmeans$cluster) # MDS USArrests.mds<- cmdscale(dist(USArrests)) mds.dim1 <- USArrests.mds[,1]; mds.dim2 <- USArrests.mds[,2] plot(mds.dim1, mds.dim2, xlab="MDS-1", ylab="MDS-2", main="MDS for USArrests Data with K-means", col = USArrests.kmeans$cluster) # 86/144 swissroll <- function(n, sigma=0.05){ angle <- (3*pi/2)*(1+2*runif(n)); height <- runif(n); xdata <- cbind(angle*cos(angle), height, angle*sin(angle)) # angle <- seq((1.5*pi): (4.5*pi), length.out=n); # height <- sample(0:21, n, replace = TRUE); # xdata <- cbind(angle*cos(angle), height, angle*sin(angle)) xdata <- scale(xdata) + matrix(rnorm(n*3, 0, sigma), n, 3) order.angle <- order(angle) sort.angle <- sort(order.angle, index.return=TRUE) col.id <- rainbow130(n) my.color <- col.id[sort.angle$ix] colnames(xdata) <- paste("x", 1:3, sep="") list(xdata=xdata, angle=angle, color=my.color) } source("rainbow130.r") sr <- swissroll(800) library(rgl); open3d() plot3d(sr$xdata[,1], sr$xdata[,2], sr$xdata[,3], col=sr$color, size=3, xlab="", ylab="", zlab="", axes = T) library(vegan) sr.isomap <- isomap(dist(sr$xdata), ndim=2, k=7) # try different k plot(sr.isomap, col=sr$color) # 87/144 Scurve <- function(n){ # upper half S curve theta1 <- runif((n/2), min=-0.9*(pi), max=(pi)/2) z1 <- runif((n/2), min=0, max=5) x1 <- -cos(theta1) y1 <- 2-sin(theta1) side.upper <- matrix(0, ncol=4, nrow=(n/2)) for(i in 1:(n/2)){ side.upper[i,1] <- x1[i] side.upper[i,2] <- y1[i] side.upper[i,3] <- z1[i] side.upper[i,4] <- theta1[i] } order.theta1 <- order(theta1) sort.theta1 <- sort(order.theta1, method="qu", index=TRUE) upper.color <- sort.theta1$ix # lower half S curve theta2 <- runif((n/2), min=(pi)/2, max=1.9*(pi)) z2 <- runif((n/2), min=0, max=5) x2 <- cos((pi)-theta2) y2 <- sin((pi)-theta2) side.lower <- matrix(0, ncol=4, nrow=(n/2)) for(i in 1:(n/2)){ side.lower[i,1] <- x2[i] side.lower[i,2] <- y2[i] side.lower[i,3] <- z2[i] side.lower[i,4] <- theta2[i] } order.theta2 <- order(theta2) sort.theta2 <- sort(order.theta2, method="qu", index=TRUE) lower.color <- sort.theta2$ix # 88/144 xdata <- rbind(side.upper[,c(1,3,2)], side.lower[,c(1,3,2)]) xdata <- scale(xdata) + matrix(rnorm(n*3,0, 0.05), n, 3) angle <- c(side.upper[,4], side.lower[,4]) S.color <- c(upper.color, (n/2 + lower.color)) my.color <- (rainbow130((1.2)*n)[S.color])[1:n] scatterplot3d(xdata, color=my.color, xlab="x", ylab="y", zlab="z", pch=20, angle=30) open3d() plot3d(xdata[,1], xdata[,2], xdata[,3], col=my.color, size=5, xlab="x", ylab="y", zlab="z") return(list(xdata=xdata, angle=angle, color=my.color)) } # 95/144 data(iris3) Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]), Species = rep(c("setosa","versicolor","virginica"), rep(50,3))) ## LDA data <- Iris[,1:4] class <- Iris[,5] Iris.lda <- lda(x=data, grouping=class) Iris.lda <- lda(Species ~ ., Iris) lda.dim1 <- as.matrix(data)%*%iris.lda$scaling[,1] lda.dim2 <- as.matrix(data)%*%iris.lda$scaling[,2] ## LDA for classification trainingIndex <- sample(1:150, 75) trainingSample <- Iris[trainingIndex, ] testSample <- Iris[-trainingIndex, ] table(Iris$Species[trainingIndex]) ldaRule <- lda(Species ~ ., Iris, subset = trainingIndex) predict(ldaRule, testSample)$class # 101/144 iris.sir <- dr(as.integer(Species) ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width , data=iris, nslices=3, chi2approx="wood", method="sir") summary(iris.sir) comp.sir <- as.matrix(iris[,1:4]) %*% as.matrix(iris.sir$evectors[,1:2]) plot(comp.sir, col=as.integer(iris$Species)+1, main="SIR to Iris Data") # 102/144 Li6.1 <- function(n){ p <- 5 x <- matrix(0, ncol=p, nrow=n) for(i in 1:p){ x[,i] <- rnorm(n, mean=0, sd=1) } epsilon <- rnorm(n, mean=0, sd=1) y <- x[,1] + x[,2] + x[,3]+ x[,4] + epsilon colnames(x) <- c("x1","x2","x3","x4","x5") as.data.frame(cbind(y, x)) } mydata <- Li6.1(200) attach(mydata) library(dr) my.sir <- dr(y~x1+x2+x3+x4+x5, method="sir", nslices=10) my.sir my.sir dr(formula = y ~ x1 + x2 + x3 + x4 + x5, method = "sir", nslices = 10) # 110/144 x <- matrix(rnorm(100), ncol=20) dim(x) princomp(x) princomp methods(princomp) getAnywhere('princomp.default') # or stats:::princomp.default ?cov.wt # 111/144 e.cor <- eigen(cor(x, use="pairwise.complete.obs")) e.cor$values pca.pkg <- c("FactoMineR", "factoextra", "corrplot") lapply(pca.pkg, library, character.only=TRUE) x.pca <- PCA(x, graph = FALSE) # works, why? PCA # check the code str(x.pca) # 113/144 library(cancerdata) data(VEER1) # Breast cancer gene expression data (van't Veer) VEER1 dim(exprs(VEER1)) str(VEER1) exprs(VEER1)[1:3, 1:5] X1 X2 X3 X4 X5 VEER1@phenoData@data$info 97 Levels: Sample.1..5.yr.survival... table(VEER1@phenoData@data$class) # 114/144 library(FactoMineR); library(factoextra); library(corrplot) x <- t(exprs(VEER1)) x.pca <- PCA(x, graph = FALSE) x.pca eig.val <- get_eigenvalue(x.pca) plot(eig.val[,3], type="l", xlab="components", ylab="variance", + main="cumulative.variance.percent") hist(x.pca$svd$V[,1], xlab="first eigenvector", main="weights of the first eigenvector") # 115/144 x.pc <- rep(1:3, 3:1) y.pc <- unlist(lapply(2:4, function(x) seq(x, 4))) mypca.plot <- function(used.pc){ fviz_pca_ind(x.pca, axes = used.pc, geom.ind = "point", col.ind = VEER1@phenoData@data$class, palette = c("blue", "red"), legend.title = "Groups") } plot.list <- apply(cbind(x.pc, y.pc), 1, mypca.plot) library(gridGraphics) library(grid) grid.newpage() library(gridExtra) grid.arrange(grobs=plot.list, nrow=2, ncol=3) # 116/144 x.pca.scores <- get_pca_ind(x.pca) dim(x.pca.scores$coord) group.col <- c("blue", "red")[as.integer(VEER1@phenoData@data$class)] library("rgl") open3d() plot3d(x.pca.scores$coord[,1:3], col=group.col, type ="p", size=10) play3d(spin3d(), duration=10) # 118/144 library("corpcor") n <- 6 # try 20, 500 p <- 10 # try 100, 10 set.seed(123456) sigma <- matrix(rnorm(p * p), ncol = p) sigma <- crossprod(sigma) + diag(rep(0.1, p)) # t(x) %*% x sigsvd <- svd(sigma) y <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d))) x <- matrix(rnorm(n * ncol(sigma)), nrow = n) %*% y # problem here! use mvrnorm {MASS} s1 <- cov(x) s2 <- cov.shrink(x) par(mfrow=c(1,3)) image(t(sigma)[,p:1], main="true cov", xaxt="n", yaxt="n") image(t(s1)[,p:1], main="empirical cov", xaxt="n", yaxt="n") image(t(s2)[,p:1], main="shrinkage cov", xaxt="n", yaxt="n") sum((s1 - sigma) ^ 2) sum((s2 - sigma) ^ 2) mvrnorm {MASS}: mvrnorm(n = 1, mu, Sigma, ...) # 119/144 is.positive.definite(sigma) is.positive.definite(s1) is.positive.definite(s2) rc <- rbind( data.frame(rank.condition(sigma)), data.frame(rank.condition(s1)), data.frame(rank.condition(s2))) rownames(rc) <- c("true", "empirical", "shrinkage") rc rank condition tol true 10 256.35819 6.376444e-14 empirical 5 Inf 1.947290e-13 shrinkage 10 15.31643 1.022819e-13 e0 <- eigen(sigma, symmetric = TRUE)$values e1 <- eigen(s1, symmetric = TRUE)$values e2 <- eigen(s2, symmetric = TRUE)$values matplot(data.frame(e0, e1, e2), type = "l", ylab="eigenvalues", lwd=2) legend("top", legend=c("true", "empirical", "shrinkage"), lwd=2, lty=1:3, col=1:3) # 124/144 head(LifeCycleSavings) pop <- LifeCycleSavings[, 2:3] oec <- LifeCycleSavings[, -(2:3)] cancor(pop, oec) # 128/144 library(CCA) data(nutrimouse) str(nutrimouse) table(nutrimouse$genotype, nutrimouse$diet) # 129/144 X <- as.matrix(nutrimouse$gene) Y <- as.matrix(nutrimouse$lipid) correl <- matcor(X, Y) img.matcor(correl) img.matcor(correl, type = 2) str(correl) # 130/144 x.hm <- heatmap(correl$Xcor) y.hm <- heatmap(correl$Ycor) ordering <- c(x.hm$rowInd, y.hm$rowInd + 120) correl.2 <- list(Xcor=correl$Xcor[x.hm$rowInd, x.hm$rowInd], Ycor=correl$Ycor[y.hm$rowInd, y.hm$rowInd], XYcor=correl$XYcor[ordering, ordering]) img.matcor(correl.2) img.matcor(correl.2, type = 2) # 131/144 X.subset <- as.matrix(nutrimouse$gene[, sample(1:120, size = 10)]) my.cca <- cc(X.subset, Y) barplot(my.cca$cor, xlab="Dimension", ylab="Canonical correlations", names.arg=1:10, ylim=c(0,1)) plt.cc(my.cca) names(my.cca) my.cca$cor # 132/144 regul.par <- estim.regul(X, Y, plt = TRUE, grid1 = seq(0.0001, 0.01, l=5), # l=50 grid2 = seq(0, 0.1, l=5)) # l=50 lambda1 = 0.01 lambda2 = 0.075 CV-score = 0.884716 my.rcca <- rcc(X, Y, regul.par$lambda1, regul.par$lambda2) names(my.rcca) [1] "cor" "names" "xcoef" "ycoef" "scores" barplot(my.rcca$cor, xlab = "Dimension", ylab = "Canonical correlations", names.arg = 1:21, ylim = c(0,1)) plt.cc(my.rcca, var.label = TRUE, ind.names = paste(nutrimouse$genotype, nutrimouse$diet, sep = ".")) names(my.rcca$scores) [1] "xscores" "yscores" "corr.X.xscores" [4] "corr.Y.xscores" "corr.X.yscores" "corr.Y.yscores # 134/144 library("corpcor") options(digits=4) test <- function(x, s){ image(t(s)[,nrow(s):1], main="cov(x)", col=terrain.colors(100)) image(t(x)[,nrow(x):1], main="x", col=terrain.colors(100)) cat("is.positive.definite:", is.positive.definite(s), "\n") cat("eigen:\n") print(eigen(s)) cat("inverse:\n") print(solve(s)) } layout(matrix(1:2, ncol=1), height=c(1,2)) n <- 100 p <- 4 x1 <- matrix(rnorm(n*p), ncol=p) summary(x1) # 135/144 s1 <- cov(x1) test(x1, s1) # 136/144 x2 <- matrix(rnorm(n*p, sd=0.0001), ncol=p) id <- sample(1:p, floor(p/3)) x2[, id] <- x2[, id] + abs(rnorm(n*length(id), m=10000, sd=5000)) summary(x2) s2 <- cov(x2) test(x2, s2) # 137/144 x3 <- scale(x2) s3 <- cov(x3) test(x3, s3) # 138/144 library(fields); library("corpcor"); library(CCA) cor.col <- two.colors(start="blue", middle="white", end="red") # length=255 par(mfrow=c(3,1)) n <- 100 p <- 4 q <- 5 set.seed(12345) X <- matrix(rnorm(n*p), ncol=p); rX <- cor(X) range.col <- floor((1+range(rX))*127+1) image(t(rX)[,p:1], main="cor(X)", col=cor.col[range.col[1]: range.col[2]]) is.positive.definite(cov(X)) Y <- matrix(rnorm(n*q), ncol=q); rY <- cor(Y) range.col <- floor((1+range(rY))*127+1) image(t(rY)[,q:1], main="cor(Y)", col=cor.col[range.col[1]: range.col[2]]) is.positive.definite(cov(Y)) xy.cca <- cc(X, Y) X[,1] <- 3*X[,4] + 1; rX <- cor(X) range.col <- floor((1+range(rX))*127+1) image(t(rX)[,p:1], main="cor(X)", col=cor.col[range.col[1]: range.col[2]]) xy.cca <- cc(X, Y) is.positive.definite(cov(X)) # 140/144 LCMC function (Q, K = 1:nrow(Q)) { ... nQ <- nrow(Q) nK <- length(K) N <- nQ + 1 if (nK < 0.2 * nQ) { lcmc <- numeric(nK) for (i in 1:nK) { k <- K[i] lcmc[i] <- k/(1 - N) + sum(Q[cm.UL_K(k, nQ)])/N/k } } else { lcmc_ <- diag(apply(apply(Q, 2, cumsum), 1, cumsum))/(1:nQ)/N - (1:nQ)/nQ lcmc <- lcmc_[K] } lcmc } my.coranking <- function(X.high, X.low){ # X.high <- iris[1:10,1:4] # X.low <- princomp(X.high)$score[,1:2] D.high <- as.matrix(dist(X.high)) D.low <- as.matrix(dist(X.low)) f <- function(x){ rank(x, ties.method = 'first', na.last = FALSE) } diag(D.high) <- NA # NA is rank 1 diag(D.low) <- NA R.high <- apply(D.high, 1, f) R.low <- apply(D.low, 1, f) table(R.high, R.low)[-1, -1] # 142/144 library(coRanking) # install.packages("coRanking") library(MASS) par(mfrow=c(2, 3)) x <- iris[,1:4] y <- iris[,5] pca <- princomp(x) Q.pca <- coranking(x, pca$score[,1:2], input = "data") imageplot(Q.pca) lcmc.pca <- LCMC(Q.pca, K = 5:10) mds <- cmdscale(dist(x)) Q.mds <- coranking(x, mds[,1:2], input = "data") imageplot(Q.mds) lcmc.mds <- LCMC(Q.pca, K = 5:10) mylda <- lda(x, grouping=y) lda.dim <- as.matrix(x)%*%mylda$scaling[,1:2] Q.lda <- coranking(x, lda.dim, input = "data") imageplot(Q.lda) lcmc.lda <- LCMC(Q.lda, K = 5:10) names(lcmc.pca) <- paste0("K=", 5:10) rbind(lcmc.pca, lcmc.mds, lcmc.lda) plot(pca$score[,1:2], col=as.integer(y)+1, main="PCA") plot(mds[,1:2], col=as.integer(y)+1, main="MDS") plot(lda.dim[,1:2], col=as.integer(y)+1, main="LDA")