# --------------------------------------------------------------------- # Program: Bivariate1.S # Author: Steven M. Boker # Date: Tue Oct 3 08:10:36 EDT 2006 # # Bivariate data # # --------------------------------------------------------------------- # ---------------------------------------------- # Load required libraries library(lattice) # ------------------------------------------------ # Read the iris data. iris <- data.frame(read.table("iris.dat", header=T)) summary(iris) # ------------------------------------------------ # Read the student data. source("edaStudent.sdd") summary(edaStudent) # --------------------------------------------------------------------- # Scatterplots pdf("Biv1ZtestPlot1.pdf", height=5, width=5) plot(edaStudent\$ztest, edaStudent\$hscgpa, xlab="Entrance Exam (Z-Score)", ylab="High School Core GPA", type='p', pch=2) dev.off() pdf("Biv1HSCGPAXYPlot1.pdf", height=5, width=5) print(xyplot(hscgpa ~ ztest, data = edaStudent, aspect=1, xlab = list(label="Entrance Exam (Z-Score)",cex=1.25), ylab=list(label="High School Core GPA",cex=1.25), scales=list(cex=1.25) ) ) dev.off() pdf("Biv1ZtestXYPlot0.pdf", height=5, width=5) print(xyplot(ztest ~ sex, data = edaStudent, aspect=1, xlab = list(label="Sex",cex=1.25), ylab=list(label="Entrance Exam (Z-Score)",cex=1.25), scales=list(cex=1.25) ) ) dev.off() pdf("Biv1ZtestXYPlot1.pdf", height=5, width=5) print(xyplot(ztest ~ jitter(as.numeric(sex), .5), data = edaStudent, aspect=1, xlab = list(label="Sex",cex=1.25), ylab=list(label="Entrance Exam (Z-Score)",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Scatterplots with Likert style data x <- rnorm(1000, mean=4, sd=1.5) y <- .5 * x + rnorm(1000, mean=2, sd=2) x[x<0] <- 0 x[x>7] <- 7 y[y<0] <- 0 y[y>7] <- 7 pdf("Biv1LikertPlot1.pdf", height=5, width=5) print(xyplot(y ~ x, aspect=1, xlab = list(label="X",cex=1.25), ylab=list(label="Y",cex=1.25), scales=list(cex=1.25) ) ) dev.off() likertX <- round(x, 0) likertY <- round(y, 0) pdf("Biv1LikertPlot2.pdf", height=5, width=5) print(xyplot(likertY ~ likertX, aspect=1, xlab = list(label="X",cex=1.25), ylab=list(label="Y",cex=1.25), scales=list(cex=1.25) ) ) dev.off() tLen <- length(likertX) pdf("Biv1LikertPlot3.pdf", height=5, width=5) print(xyplot((likertY + runif(tLen, -.5, +.5)) ~ (likertX + runif(tLen, -.5, +.5)), aspect=1, xlab = list(label="X",cex=1.25), ylab=list(label="Y",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Scatterplots conditioned on a factor pdf("Biv1HSCGPAXYPlot2.pdf", height=5, width=8) print(xyplot(hscgpa ~ ztest | sex, data = edaStudent, aspect=1, xlab = list(label="Entrance Exam (Z-Score)",cex=1.25), ylab=list(label="High School Core GPA",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Scatterplots conditioned on a factor pdf("Biv1IrisPlot1.pdf", height=7, width=7) print(xyplot(petal.length ~ petal.width | variety, data = iris, aspect=1, xlab = list(label="Petal Length",cex=1.25), ylab=list(label="Petal Width",cex=1.25), scales=list(cex=1.25) ) ) dev.off() pdf("Biv1IrisPlot2.pdf", height=7, width=7) print(xyplot(sepal.length ~ sepal.width | variety, data = iris, aspect=1, xlab = list(label="Sepal Length",cex=1.25), ylab=list(label="Sepal Width",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Scatterplots conditioned on a continuous variable pdf("Biv1HSCGPAXYPlot3.pdf", height=6, width=8) print(xyplot(hscgpa ~ ztest | cut(pposths,6), data = edaStudent, aspect=1, main=list("HSCGPA vs. ZTEST \n Cut by Probability of Post HS in Neighborhood",cex=1.25), xlab = list(label="Entrance Exam (Z-Score)",cex=1.25), ylab=list(label="High School Core GPA",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Estimating a linear model tLM <- lm(hscgpa ~ ztest, data=edaStudent) summary(tLM) # --------------------------------------------------------------------- # Plotting a regression line pdf("Biv1HSCGPAXYPlot4.pdf", height=5, width=5) print(xyplot(hscgpa ~ ztest, data = edaStudent, panel = function(x, y) { panel.xyplot(x, y) panel.abline(lm(hscgpa ~ ztest, data = edaStudent)) }, aspect=1, xlab = list(label="Entrance Exam (Z-Score)",cex=1.25), ylab=list(label="High School Core GPA",cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Plotting a regression line conditioned on another variable pdf("Biv1HSCGPAXYPlot5.pdf", height=6, width=8) print(xyplot(hscgpa ~ ztest | cut(pposths,6), data = edaStudent, panel = function(x, y) { panel.xyplot(x, y) panel.abline(lm(hscgpa ~ ztest, data = edaStudent)) }, aspect=1, xlab = list(label="Entrance Exam (Z-Score)",cex=1.25), ylab=list(label="High School Core GPA",cex=1.25), scales=list(cex=1.25) ) ) dev.off()