# --------------------------------------------------------------------- # Program: Bivariate1.R # Author: Steven M. Boker # Date: Tue Oct 3 08:10:45 EDT 2006 # # Bivariate data # # --------------------------------------------------------------------- # ---------------------------------------------- # Load required libraries library(lattice) # --------------------------------------------------------------------- # Estimating a linear model tLM <- lm(hscgpa ~ ztest, data=edaStudent) summary(tLM) source("edaStudent.sdd") summary(edaStudent) # --------------------------------------------------------------------- # Plotting a regression line pdf("Biv2ZtestGPA1.pdf", height=5, width=5) print(xyplot(hscgpa ~ ztest, data = edaStudent, panel = function(x, y) { panel.xyplot(x, y) panel.abline(lm(hscgpa ~ ztest, data = edaStudent)) }, aspect=1, xlab = list(label="Entrance Exam (Z-Score)",cex=1.25), ylab=list(label="High School Core GPA",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Another way of plotting a regression line pdf("Biv2ZtestGPA2.pdf", height=5, width=5) plot(edaStudent$ztest, edaStudent$hscgpa, xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") abline(lm(hscgpa ~ ztest, data = edaStudent)) dev.off() # --------------------------------------------------------------------- # Another way of plotting a regression line conditioned on a Factor pdf("Biv2ZtestGPASex1.pdf", height=5, width=5) tSelect <- !is.na(edaStudent$sex) & edaStudent$sex=="Female" plot(c(-3,3), c(1,5), type="n" , xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") lines(edaStudent$ztest[tSelect], edaStudent$hscgpa[tSelect], type="p", col=1, pch=1) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col=1) tSelect <- !is.na(edaStudent$sex) & edaStudent$sex=="Male" lines(edaStudent$ztest[tSelect], edaStudent$hscgpa[tSelect], type="p", col=2, pch=2) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col=2) lines(1.4, 2, col=1, pch=1, type="p") text(1.6, 2, "Female", col=1, adj=0) lines(1.4, 1.5, col=2, pch=2, type="p") text(1.6, 1.5, "Male", col=2, adj=0) dev.off() # --------------------------------------------------------------------- # Residuals from a linear model tRes <- lm(hscgpa ~ ztest, data=edaStudent)$residuals summary(tRes) pdf("Biv2QQResidZtestGPA1.pdf", height=5, width=5) print(qqmath(~ tRes, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Residuals",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Residuals from a multiple regression tLM <- lm(hscgpa ~ ztest + totunits, data=edaStudent) summary(tLM) pdf("Biv2QQResidZtestGPA2.pdf", height=5, width=5) print(qqmath(~ tLM$residuals, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Residuals",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Scatterplot Matrix pdf("Biv2MatrixPlot.pdf", height=5, width=5) pairs(cbind(edaStudent$hscgpa, edaStudent$ztest, edaStudent$totunits), labels=c("hscgpa", "ztest", "totunits"), cex=.75) dev.off() # --------------------------------------------------------------------- # Create a data matrix and plot residuals from a principal components tMatrix <- cbind(edaStudent$hscgpa, edaStudent$ztest, edaStudent$totunits) cor(tMatrix) # --------------------------------------------------------------------- # The principal roots and vectors of the data matrix tMatrix tSVD <- svd(tMatrix) tSVD # --------------------------------------------------------------------- # Create the first principal component matrix of the data matrix pc1tMatrix <- tSVD$u[,1] %*% as.matrix(tSVD$d[1]) %*% t(tSVD$v[,1]) # --------------------------------------------------------------------- # Pairs Scatterplot pdf("Biv2PCAResidPlot.pdf", height=5, width=5) pairs(tMatrix - pc1tMatrix, labels=c("hscgpa", "ztest", "totunits"), cex=.75) dev.off()