# --------------------------------------------------------------------- # Program: Bivariate2.R # Author: Steven M. Boker # Date: Mon Oct 22 18:04:50 EDT 2007 # # Bivariate data part deux # # --------------------------------------------------------------------- # ---------------------------------------------- # Load required libraries library(lattice) # ------------------------------------------------ # Read the student data. source("edaStudent.sdd") summary(edaStudent) # --------------------------------------------------------------------- # Estimating a linear model tLM <- lm(hscgpa ~ ztest, data=edaStudent) summary(tLM) # --------------------------------------------------------------------- # Plotting a regression line pdf("Biv2ZtestGPA1.pdf", height=5, width=5) print(xyplot(hscgpa ~ ztest, data = edaStudent, panel = function(x, y) { panel.xyplot(x, y) panel.abline(lm(hscgpa ~ ztest, data = edaStudent)) }, aspect=1, xlab = list(label="Entrance Exam (Z-Score)",cex=1.25), ylab=list(label="High School Core GPA",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Another way of plotting a regression line pdf("Biv2ZtestGPA2.pdf", height=5, width=5) plot(c(-3,3), c(1,5), type="n" , xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") lines(edaStudent\$ztest, edaStudent\$hscgpa, type="p", col="blue", pch=1) abline(lm(hscgpa ~ ztest, data = edaStudent)) dev.off() # --------------------------------------------------------------------- # Another way of plotting a regression line conditioned on a Factor pdf("Biv2ZtestGPASex1a.pdf", height=5, width=5) tSelect <- !is.na(edaStudent\$sex) & edaStudent\$sex=="Female" plot(c(-3,3), c(1,5), type="n" , xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") lines(edaStudent\$ztest[tSelect], edaStudent\$hscgpa[tSelect], type="p", col="blue", pch=1) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col="blue") lines(1.4, 2.25, col="blue", pch=1, type="p") text(1.6, 2.25, "Female", col="blue", adj=0) dev.off() pdf("Biv2ZtestGPASex1b.pdf", height=5, width=5) tSelect <- !is.na(edaStudent\$sex) & edaStudent\$sex=="Female" plot(c(-3,3), c(1,5), type="n" , xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") lines(edaStudent\$ztest[tSelect], edaStudent\$hscgpa[tSelect], type="p", col="blue", pch=1) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col="blue") tSelect <- !is.na(edaStudent\$sex) & edaStudent\$sex=="Male" lines(edaStudent\$ztest[tSelect], edaStudent\$hscgpa[tSelect], type="p", col="green", pch=2) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col="green") lines(1.4, 2.25, col="blue", pch=1, type="p") text(1.6, 2.25, "Female", col="blue", adj=0) lines(1.4, 1.75, col="green", pch=2, type="p") text(1.6, 1.75, "Male", col="green", adj=0) dev.off() pdf("Biv2ZtestGPASex1.pdf", height=5, width=5) tSelect <- !is.na(edaStudent\$sex) & edaStudent\$sex=="Female" plot(c(-3,3), c(1,5), type="n" , xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") lines(edaStudent\$ztest[tSelect], edaStudent\$hscgpa[tSelect], type="p", col="blue", pch=1) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col="blue") tSelect <- !is.na(edaStudent\$sex) & edaStudent\$sex=="Male" lines(edaStudent\$ztest[tSelect], edaStudent\$hscgpa[tSelect], type="p", col="green", pch=2) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col="green") tSelect <- is.na(edaStudent\$sex) lines(edaStudent\$ztest[tSelect], edaStudent\$hscgpa[tSelect], type="p", col="red", pch=3) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col="red") lines(1.4, 2.25, col="blue", pch=1, type="p") text(1.6, 2.25, "Female", col="blue", adj=0) lines(1.4, 1.75, col="green", pch=2, type="p") text(1.6, 1.75, "Male", col="green", adj=0) lines(1.4, 1.25, col="red", pch=2, type="p") text(1.6, 1.25, "Missing", col="red", adj=0) dev.off() # --------------------------------------------------------------------- # Residuals from a linear model tRes <- lm(hscgpa ~ ztest, data=edaStudent)\$residuals summary(tRes) pdf("Biv2QQResidZtestGPA1.pdf", height=5, width=5) print(qqmath(~ tRes, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Residuals",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Residuals from a multiple regression tLM <- lm(hscgpa ~ ztest + totunits, data=edaStudent) summary(tLM) pdf("Biv2QQResidZtestGPA2.pdf", height=5, width=5) print(qqmath(~ tLM\$residuals, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Residuals",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Scatterplot Matrix pdf("Biv2MatrixPlot.pdf", height=5, width=5) tFrame <- data.frame(hscgpa=edaStudent\$hscgpa, ztest=edaStudent\$ztest, totunits=edaStudent\$totunits, sex=edaStudent\$sex) pairs(tFrame, cex=.5, col="seagreen") dev.off() pdf("Biv2MatrixPlotb.pdf", height=5, width=5) tFrame <- data.frame(hscgpa=edaStudent\$hscgpa, ztest=edaStudent\$ztest, totunits=edaStudent\$totunits, sex=as.numeric(edaStudent\$sex)+runif(length(edaStudent\$sex),-.5,+.5)) pairs(tFrame, cex=.5, col="seagreen") dev.off() # --------------------------------------------------------------------- # Create a data matrix and plot residuals from a principal components tMatrix <- cbind(edaStudent\$hscgpa, edaStudent\$ztest, edaStudent\$totunits) cor(tMatrix) # --------------------------------------------------------------------- # The principal roots and vectors of the data matrix tMatrix tSVD <- svd(tMatrix) tSVD # --------------------------------------------------------------------- # Create the first principal component matrix of the data matrix pc1tMatrix <- tSVD\$u[,1] %*% as.matrix(tSVD\$d[1]) %*% t(tSVD\$v[,1]) # --------------------------------------------------------------------- # Pairs Scatterplot pdf("Biv2PCAResidPlot0.pdf", height=5, width=5) pairs(tMatrix, labels=c("hscgpa", "ztest", "totunits"), cex=.75, col="skyblue") dev.off() pdf("Biv2PCAResidPlot1.pdf", height=5, width=5) pairs(tMatrix - pc1tMatrix, labels=c("hscgpa", "ztest", "totunits"), cex=.75, col="skyblue") dev.off() # --------------------------------------------------------------------- # Create the second principal component matrix of the data matrix pc2tMatrix <- tSVD\$u[,2] %*% as.matrix(tSVD\$d[2]) %*% t(tSVD\$v[,2]) # --------------------------------------------------------------------- # Pairs Scatterplot pdf("Biv2PCAResidPlot2.pdf", height=5, width=5) pairs(tMatrix - (pc1tMatrix + pc2tMatrix), labels=c("hscgpa", "ztest", "totunits"), cex=.75, col="skyblue") dev.off()