# ---------------------------------------------------- # Program: ThreeD2.R # Author: Steven M. Boker # Date: Tue Nov 20 13:01:15 EST 2007 # # Three-D Plots Part 2 # # ---------------------------------------------------- # ---------------------------------------------- # Load required libraries and data library(lattice) library(akima) source("edaStudent.sdd") # --------------------------------------------------------------------- # Scatterplot Matrix pdf("ThreeD2MatrixPlot1.pdf", height=5, width=5) pairs(cbind(edaStudent\$fammedin, edaStudent\$hscgpa, edaStudent\$ztest), labels=c("fammedin", "hscgpa", "ztest"), cex=.75) dev.off() # ---------------------------------------------------- # Create a dataframe with 3 variables and the residuals from # a multiple regression using those variables. tSelect <- !is.na(edaStudent\$fammedin) & !is.na(edaStudent\$hscgpa) & !is.na(edaStudent\$ztest) tData <- data.frame(fammedin=edaStudent\$fammedin[tSelect], hscgpa=edaStudent\$hscgpa[tSelect], ztest=edaStudent\$ztest[tSelect], resid=lm(ztest~hscgpa+fammedin, na.action=na.omit, data=edaStudent)\$resid) # ---------------------------------------------------- # Split into three levels of family median income incomecat <- factor(cut(tData\$fammedin, breaks = c(0,32000,40000,50000,200000)), labels = c("<32k","32k-40k","40k-50k",">50k")) tData2 <- data.frame(incomecat=incomecat, hscgpa=tData\$hscgpa, ztest=tData\$ztest) pdf("ThreeD2XYplot1.pdf", height=5, width=5) print(xyplot(ztest ~ hscgpa | incomecat, data = tData2, panel = function(x, y) { panel.grid(v = 2) panel.xyplot(x, y) panel.loess(x, y) }, aspect=1, xlab=list(label="High School Core GPA",cex=1.5), ylab=list(label="Entrance Exam (Z-Score)",cex=1.5), scales=list(cex=1.25))) dev.off() # ---------------------------------------------------- # Lowess smooth within each income level tSelect <- incomecat=="<32k" t1 <- lowess(tData\$hscgpa[tSelect], tData\$ztest[tSelect]) tSelect <- incomecat=="32k-40k" t2 <- lowess(tData\$hscgpa[tSelect], tData\$ztest[tSelect]) tSelect <- incomecat=="40k-50k" t3 <- lowess(tData\$hscgpa[tSelect], tData\$ztest[tSelect]) tSelect <- incomecat==">50k" t4 <- lowess(tData\$hscgpa[tSelect], tData\$ztest[tSelect]) # ---------------------------------------------------- # Paste the result back together into a dataframe suitable # for a 3-D interpolated surface tIncome <- c(rep(16,length(t1\$x)), rep(36,length(t2\$x)), rep(45,length(t3\$x)), rep(80,length(t4\$x))) tData3 <- data.frame(incomecat=tIncome, hscgpa=c(t1\$x, t2\$x, t3\$x, t4\$x), ztest=c(t1\$y, t2\$y, t3\$y, t4\$y)) # ---------------------------------------------------- # Create a splined interpolation of the lowess smooths tInt1 <- interp(tData3\$incomecat, tData3\$hscgpa, tData3\$ztest, xo=seq(15,85,by=5), yo=seq(1.5, 5, by=.25), duplicate="mean") pdf("ThreeD2Contour1.pdf", height=5, width=5) image(tInt1, col = heat.colors(12), xlab = "Income * 10k", ylab = "High School GPA") contour(tInt1, add=T) dev.off() # ---------------------------------------------------- # Calculate residuals from the lowess smooths and put in # a format suitable for 3-D plotting. tLen <- (length(tInt1\$x)-1) * (length(tInt1\$y)-1) tX <- rep(NA, tLen) tY <- rep(NA, tLen) tZ <- rep(NA, tLen) tR <- rep(NA, tLen) k <- 1 for (i in 1:(length(tInt1\$x)-1)) { xlo <- tInt1\$x[i] * 1000 xhi <- tInt1\$x[i+1] * 1000 for (j in 1:(length(tInt1\$y)-1)) { ylo <- tInt1\$y[i] yhi <- tInt1\$y[i+1] tSelect <- tData\$fammedin >= xlo & tData\$fammedin < xhi & tData\$hscgpa >= ylo & tData\$hscgpa < yhi tX[k] <- tInt1\$x[i] tY[k] <- tInt1\$y[j] tZ[k] <- tInt1\$z[i,j] tR[k] <- mean(tData\$ztest[tSelect] - tInt1\$z[i,j]) k <- k + 1 } } tData4 <- data.frame(fammedin=tX, hscgpa=tY, ztest=tZ, resid=tR) # ----------------------------------------------------- # Plot a 3-d Perspective plot of the interpolation splines pdf("ThreeD2Persp1.pdf", height=6, width=6) persp(unique(tX), unique(tY), matrix(tZ,nrow=14,ncol=14), theta = 340, phi = 25, r = 3, d = 1, shade = .25, col = "lightblue", xlab = "High School Core GPA", ylab = "Family Median Income", zlab = "Standardized Test Score") dev.off() for(i in seq(0,360,by=20)) { tfilename <- paste("ThreeD2Persp1-", i, ".pdf", sep="") pdf(tfilename, height=6, width=6) persp(unique(tX), unique(tY), matrix(tZ,nrow=14,ncol=14), theta = i, phi = 25, r = 3, d = 1, shade = .25, col = "lightblue", xlab = "High School Core GPA", ylab = "Family Median Income", zlab = "Standardized Test Score") dev.off() } # ---------------------------------------------------- # Quit the program