# --------------------------------------------------------------------- # Program: ThreeD2.S # Author: Steven M. Boker # Date: April 5, 2000 # # Three-D Plots Part 2 # # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Create a dataframe with 3 variables and the residuals from # a multiple regression using those variables. tSelect <- !is.na(edaStudent$fammedin) & !is.na(edaStudent$hscgpa) & !is.na(edaStudent$ztest) tData <- data.frame(fammedin=edaStudent$fammedin[tSelect], hscgpa=edaStudent$hscgpa[tSelect], ztest=edaStudent$ztest[tSelect], resid=lm(ztest~hscgpa+fammedin, na.action=na.omit, data=edaStudent)$resid) # --------------------------------------------------------------------- # Split into three levels of family median income incomecat <- factor(cut(tData$fammedin, breaks = c(0,32000,40000,50000,200000)), labels = c("<32k","32k-40k","40k-50k",">50k")) tData2 <- data.frame(incomecat=incomecat, hscgpa=tData$hscgpa, ztest=tData$ztest) graphsheet(height=6.4,width=6.4) xyplot(ztest ~ hscgpa | incomecat, data = tData2, panel = function(x, y) { panel.grid(v = 2) panel.xyplot(x, y) panel.loess(x, y) }, aspect=1, xlab="High School Core GPA", ylab="Entrance Exam (Z-Score)") # --------------------------------------------------------------------- # Lowess smooth within each income level tSelect <- incomecat=="<32k" t1 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) tSelect <- incomecat=="32k-40k" t2 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) tSelect <- incomecat=="40k-50k" t3 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) tSelect <- incomecat==">50k" t4 <- lowess(tData$hscgpa[tSelect], tData$ztest[tSelect]) # --------------------------------------------------------------------- # Paste the result back together into a dataframe suitable # for a 3-D interpolated surface tIncome <- c(rep(16,length(t1$x)), rep(36,length(t2$x)), rep(45,length(t3$x)), rep(80,length(t4$x))) tData3 <- data.frame(incomecat=tIncome, hscgpa=c(t1$x, t2$x, t3$x, t4$x), ztest=c(t1$y, t2$y, t3$y, t4$y)) # --------------------------------------------------------------------- # Create a splined interpolation of the lowess smooths tInt1 <- interp(tData3$incomecat, tData3$hscgpa, tData3$ztest, xo=seq(15,85,by=5), yo=seq(1.5, 5, by=.25)) # --------------------------------------------------------------------- # Calculate residuals from the lowess smooths and put in # a format suitable for 3-D plotting. tLen <- (length(tInt1$x)-1) * (length(tInt1$y)-1) tX <- rep(NA, tLen) tY <- rep(NA, tLen) tZ <- rep(NA, tLen) tR <- rep(NA, tLen) k <- 1 for (i in 1:(length(tInt1$x)-1)) { xlo <- tInt1$x[i] * 1000 xhi <- tInt1$x[i+1] * 1000 for (j in 1:(length(tInt1$y)-1)) { ylo <- tInt1$y[i] yhi <- tInt1$y[i+1] tSelect <- tData$fammedin >= xlo & tData$fammedin < xhi & tData$hscgpa >= ylo & tData$hscgpa < yhi tX[k] <- tInt1$x[i] tY[k] <- tInt1$y[j] tZ[k] <- tInt1$z[i,j] tR[k] <- mean(tData$ztest[tSelect] - tInt1$z[i,j]) k <- k + 1 } } tData4 <- data.frame(fammedin=tX, hscgpa=tY, ztest=tZ, resid=tR) # --------------------------------------------------------------------- # Quit the program q()