# --------------------------------------------------------------------- # Program: Bivariate1.S # Author: Steven M. Boker # Date: Tue Oct 12 13:12:35 EST 2004 # # Bivariate data # # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Estimating a linear model tLM <- lm(hscgpa ~ ztest, data=edaStudent) summary(tLM) # --------------------------------------------------------------------- # Plotting a regression line graphsheet(height=6.4,width=6.4) xyplot(hscgpa ~ ztest, data = edaStudent, panel = function(x, y) { panel.xyplot(x, y) panel.abline(lm(hscgpa ~ ztest, data = edaStudent)) }, aspect=1, xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") # --------------------------------------------------------------------- # Another way of plotting a regression line graphsheet(height=6.4,width=6.4) plot(edaStudent$ztest, edaStudent$hscgpa, xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") abline(lm(hscgpa ~ ztest, data = edaStudent)) # --------------------------------------------------------------------- # Another way of plotting a regression line conditioned on a Factor graphsheet(height=6.4,width=6.4) tSelect <- !is.na(edaStudent$sex) & edaStudent$sex=="Female" plot(c(-3,3), c(1,5), type="n" , xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA") lines(edaStudent$ztest[tSelect], edaStudent$hscgpa[tSelect], type="p", col=1, pch=1) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col=1) tSelect <- !is.na(edaStudent$sex) & edaStudent$sex=="Male" lines(edaStudent$ztest[tSelect], edaStudent$hscgpa[tSelect], type="p", col=5, pch=2) abline(lm(hscgpa ~ ztest, data = edaStudent, subset=tSelect), col=5) lines(1.4, 2, col=1, pch=1, type="p") text(1.6, 2, "Female", col=1, adj=0) lines(1.4, 1.5, col=5, pch=2, type="p") text(1.6, 1.5, "Male", col=5, adj=0) # --------------------------------------------------------------------- # Residuals from a linear model tRes <- lm(hscgpa ~ ztest, data=edaStudent)$residuals summary(tRes) graphsheet(height=6.4,width=6.4) qqmath(~ tRes, distribution=qnorm, prepanel = prepanel.qqmathline, panel = function(x, y) { panel.qqmathline(y, distribution = qnorm) panel.qqmath(x, y) }, aspect=1, xlab = "Normal Distribution", ylab="Residuals") # --------------------------------------------------------------------- # Residuals from a multiple regression tLM <- lm(hscgpa ~ ztest + totunits, data=edaStudent) summary(tLM) graphsheet(height=6.4,width=6.4) qqmath(~ tLM$residuals, distribution=qnorm, prepanel = prepanel.qqmathline, panel = function(x, y) { panel.qqmathline(y, distribution = qnorm) panel.qqmath(x, y) }, aspect=1, xlab = "Normal Distribution", ylab="Residuals") # --------------------------------------------------------------------- # Pairs Scatterplot graphsheet(height=6.4,width=6.4) pairs(cbind(edaStudent$hscgpa, edaStudent$ztest, edaStudent$totunits), labels=c("hscgpa", "ztest", "totunits"), cex=.5) # --------------------------------------------------------------------- # Create a data matrix and plot residuals from a principal components tMatrix <- cbind(edaStudent$hscgpa, edaStudent$ztest, edaStudent$totunits) cor(tMatrix) # --------------------------------------------------------------------- # The principal roots and vectors of the data matrix tMatrix tSVD <- svd(svdtMatrix) tSVD # --------------------------------------------------------------------- # Create the first principal component matrix of the data matrix pc1tMatrix <- tSVD$u[,1] %*% as.matrix(tSVD$d[1]) %*% t(tSVD$v[,1]) # --------------------------------------------------------------------- # Pairs Scatterplot graphsheet(height=6.4,width=6.4) pairs(tMatrix - pc1tMatrix, labels=c("hscgpa", "ztest", "totunits"), cex=.5) dev.off()