# ---------------------------------------------------- # Program: ThreeD2.R # Author: Steven M. Boker # Date: Tue Nov 9 13:00:29 EST 2004 # # Three-D Plots Part 2 # # ---------------------------------------------------- # ---------------------------------------------- # Load required libraries and data library(lattice) library(akima) source("edaStudent.sdd") # --------------------------------------------------------------------- # Scatterplot Matrix pdf("ThreeD2MatrixPlot1.pdf", height=5, width=5) pairs(cbind(edaStudent$fammedin, edaStudent$hscgpa, edaStudent$ztest), labels=c("fammedin", "hscgpa", "ztest"), cex=.75) dev.off() # ---------------------------------------------------- # Create a dataframe with 3 variables and the residuals from # a multiple regression using those variables. tSelect <- !is.na(edaStudent$fammedin) & !is.na(edaStudent$hscgpa) & !is.na(edaStudent$ztest) tData <- data.frame(fammedin=edaStudent$fammedin[tSelect], hscgpa=edaStudent$hscgpa[tSelect], ztest=edaStudent$ztest[tSelect], resid=lm(ztest~hscgpa+fammedin, na.action=na.omit, data=edaStudent)$resid) # ---------------------------------------------------- # Split into three levels of family median income incomecat <- factor(cut(tData$fammedin, breaks = c(0,32000,40000,50000,200000)), labels = c("<32k","32k-40k","40k-50k",">50k")) tData2 <- data.frame(incomecat=incomecat, hscgpa=tData$hscgpa, ztest=tData$ztest) pdf("ThreeD2XYplot1.pdf", height=5, width=5) print(xyplot(ztest ~ hscgpa | incomecat, data = tData2, panel = function(x, y) { panel.grid(v = 2) panel.xyplot(x, y) panel.loess(x, y) }, aspect=1, xlab=list(label="High School Core GPA",cex=1.5), ylab=list(label="Entrance Exam (Z-Score)",cex=1.5), scales=list(cex=1.25))) dev.off() # ---------------------------------------------------- # Lowess smooth within each income level tSelect <- incomecat=="<32k" t1 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) tSelect <- incomecat=="32k-40k" t2 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) tSelect <- incomecat=="40k-50k" t3 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) tSelect <- incomecat==">50k" t4 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) # ---------------------------------------------------- # Paste the result back together into a dataframe suitable # for a 3-D interpolated surface tIncome <- c(rep(16,length(t1$x)), rep(36,length(t2$x)), rep(45,length(t3$x)), rep(80,length(t4$x))) tData3 <- data.frame(incomecat=tIncome, hscgpa=c(t1$x, t2$x, t3$x, t4$x), ztest=c(t1$y, t2$y, t3$y, t4$y)) # ---------------------------------------------------- # Create a splined interpolation of the lowess smooths tInt1 <- interp(tData3$incomecat, tData3$hscgpa, tData3$ztest, xo=seq(15,85,by=5), yo=seq(1.5, 5, by=.25), duplicate="mean") pdf("ThreeD2Contour1.pdf", height=5, width=6) image(tInt1, col = heat.colors(12), xlab = "Income * 10k", ylab = "High School GPA") contour(tInt1, add=T) dev.off() # ---------------------------------------------------- # Calculate residuals from the lowess smooths and put in # a format suitable for 3-D plotting. tLen <- (length(tInt1$x)-1) * (length(tInt1$y)-1) tX <- rep(NA, tLen) tY <- rep(NA, tLen) tZ <- rep(NA, tLen) tR <- rep(NA, tLen) k <- 1 for (i in 1:(length(tInt1$x)-1)) { xlo <- tInt1$x[i] * 1000 xhi <- tInt1$x[i+1] * 1000 for (j in 1:(length(tInt1$y)-1)) { ylo <- tInt1$y[i] yhi <- tInt1$y[i+1] tSelect <- tData$fammedin >= xlo & tData$fammedin < xhi & tData$hscgpa >= ylo & tData$hscgpa < yhi tX[k] <- tInt1$x[i] tY[k] <- tInt1$y[j] tZ[k] <- tInt1$z[i,j] tR[k] <- mean(tData$ztest[tSelect] - tInt1$z[i,j]) k <- k + 1 } } tData4 <- data.frame(fammedin=tX, hscgpa=tY, ztest=tZ, resid=tR) # ----------------------------------------------------- # Plot a 3-d Perspective plot of the interpolation splines pdf("ThreeD2Persp1.pdf", height=5, width=6) persp(unique(tX), unique(tY), matrix(tZ,nrow=14,ncol=14), theta = 45, phi = 25, r = 3, d = 1, shade = .25, col = 4, xlab = "High School Core GPA", ylab = "Family Median Income", zlab = "Standardized Test Score") dev.off() for(i in seq(0,360,by=20)) { tfilename <- paste("ThreeD2Persp1-", i, ".pdf", sep="") pdf(tfilename, height=5, width=6) persp(unique(tX), unique(tY), matrix(tZ,nrow=14,ncol=14), theta = i, phi = 25, r = 3, d = 1, shade = .25, col = 4, xlab = "High School Core GPA", ylab = "Family Median Income", zlab = "Standardized Test Score") dev.off() } # ---------------------------------------------------- # Quit the program