# ------------------------------------------------------ # Program: VectorFields2.R # Author: Steven M. Boker # Date: Thu Dec 2 09:49:03 EST 2004 # # Vector Field plotting part 2 # # ------------------------------------------------------ # ------------------------------------------------------ # Create and plot two autoregressive time series tX1 <- rep(NA, 201) tX1[1] <- 90 for (i in 2:201) { tX1[i] <- .98 * tX1[i-1] } tX1 <- 100 - tX1 + rnorm(201, mean=0, sd=10) tX2 <- rep(NA, 201) tX2[1] <- 90 for (i in 2:201) { tX2[i] <- .99 * tX2[i-1] } tX2 <- 100 - tX2 + rnorm(201, mean=0, sd=10) tX3 <- rep(NA, 201) tX3[1] <- 90 for (i in 2:201) { tX3[i] <- .995 * tX3[i-1] } tX3 <- 100 - tX3 + rnorm(201, mean=0, sd=10) pdf("VF2ARLongPlot1.pdf", height=6.4,width=7.5) plot(c(0,70), c(0,120), type="n" , xlab="Age", ylab="Total Score", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) lines(c(0:200)/3, tX1, type="l", lty=1, col=1) lines(c(0:200)/3, tX2, type="l", lty=1, col=2) lines(c(0:200)/3, tX3, type="l", lty=1, col=4) dev.off() # ------------------------------------------------------ # Create a timelagged dataframe (3-D Embedding) tau <- 15 tX <- c(tX1, rep(NA,2*tau+1), tX2, rep(NA,2*tau+1), tX3) tGroup <- c(rep(1, length(tX1)), rep(NA,2*tau+1), rep(2, length(tX2)), rep(NA,2*tau+1), rep(4, length(tX3)), rep(NA,2*tau+1)) tAge <- c(1:length(tX1), rep(NA,2*tau+1), 1:length(tX2), rep(NA,2*tau+1), 1:length(tX3))/3 tLen <- length(tX) ageData1 <- data.frame(x1=tX[1:(tLen-(2*tau))], x2=tX[(1+tau):(tLen-tau)], x3=tX[(1+(2*tau)):tLen], age=tAge[1:(tLen-(2*tau))], group=tGroup[1:(tLen-(2*tau))]) # ------------------------------------------------------ # Plot the first two columns as if they were a panel study with two # waves of measurement spaced 15 days apart pdf("VF2ARLagged1.pdf", height=6.4,width=7.5) plot(c(0,201)/3, c(0,120), type="n" , xlab="Age", ylab="Total Score", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) for (i in seq(1,length(ageData1$x1),by=2)) { if (is.na(ageData1$age[i]) | ageData1$age[i] > (68-tau)) { next } lines(c(ageData1$age[i],ageData1$age[i]+tau), c(ageData1$x1[i],ageData1$x2[i]), type="l", lty=1) } dev.off() pdf("VF2ARLagged1b.pdf", height=6.4,width=7.5) plot(c(0,201)/3, c(0,120), type="n" , xlab="Age", ylab="Total Score", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) for (i in seq(1,length(ageData1$x1),by=2)) { if (is.na(ageData1$age[i]) | ageData1$age[i] > (68-tau)) { next } lines(c(ageData1$age[i],ageData1$age[i]+tau), c(ageData1$x1[i],ageData1$x2[i]), type="l", lty=1, col=ageData1$group[i]) } dev.off() # ------------------------------------------------------ # Plot the three columns as if they were a panel study with three # waves of measurement spaced 15 days apart pdf("VF2ARLong2.pdf", height=6.4,width=7.5) plot(c(0,201)/3, c(0,120), type="n" , xlab="Age", ylab="Total Score", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) for (i in seq(1,length(ageData1$x1),by=2)) { if (is.na(ageData1$age[i]) | ageData1$age[i] > (68-(2*tau))) { next } lines(c(ageData1$age[i], ageData1$age[i]+tau, ageData1$age[i]+(2*tau)), c(ageData1$x1[i], ageData1$x2[i], ageData1$x3[i]), type="l", lty=1) } dev.off() pdf("VF2ARLong2b.pdf", height=6.4,width=7.5) plot(c(0,201)/3, c(0,120), type="n" , xlab="Age", ylab="Total Score", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) for (i in seq(1,length(ageData1$x1),by=2)) { if (is.na(ageData1$age[i]) | ageData1$age[i] > (68-(2*tau))) { next } lines(c(ageData1$age[i], ageData1$age[i]+tau, ageData1$age[i]+(2*tau)), c(ageData1$x1[i], ageData1$x2[i], ageData1$x3[i]), type="l", lty=1, col=ageData1$group[i]) } dev.off() # ------------------------------------------------------ # Calculate the slope and midpoint for each pair from the # first two columns of ageData1 ageMid <- ageData1$age + (tau/2) scoreMid <- (ageData1$x1 + ageData1$x2) / 2 slopeMid <- (ageData1$x2 - ageData1$x1) / tau tSelect <- !is.na(ageMid) & !is.na(scoreMid) & !is.na(slopeMid) ageSlopeFrame <- data.frame( age = ageMid[tSelect], score = scoreMid[tSelect], slope = slopeMid[tSelect]) # ------------------------------------------------------ # Calculate a loess smoothed slope surface slopeLoess <- loess(slope ~ age * score, data=ageSlopeFrame) ageSlopeFrame2 <- data.frame(age=ageSlopeFrame$age, score=ageSlopeFrame$score, slope=ageSlopeFrame$slope, fitted=predict(slopeLoess)) # ------------------------------------------------------ # Calculate a slope field of the fitted slopes smoothX <- seq(0, 75, by=6) smoothY <- seq(0, 120, by=8) smoothP <- matrix(NA, length(smoothX), length(smoothY)) smoothZ <- matrix(NA, length(smoothX), length(smoothY)) h <- 10 for (i in 1:length(smoothX)) { x <- smoothX[i] t1x <- abs(x - ageSlopeFrame2$age)/h for (j in 1:length(smoothY)) { y <- smoothY[j] t1y <- abs(y - ageSlopeFrame2$score)/h t2 <- rep(0, length(t1x)) t2[t1x < 1 & t1y < 1] <- 1/2 smoothP[i,j] <- sum(t2)/(length(ageSlopeFrame2$age)*h) smoothZ[i,j] <- mean(ageSlopeFrame2$fitted[t1x < 1 & t1y < 1]) } } smoothP <- 100 * (smoothP/sum(smoothP)) smoothSlopeFrame <- data.frame( age=rep(smoothX,length(smoothY)), score=rep(smoothY,each=length(smoothX)), density=c(smoothP), slope=c(smoothZ)) # ------------------------------------------------------ # Plot a slope field of the fitted slopes pdf("VF2ARSlope1.pdf", height=6.4,width=7.5) plot(c(0,220)/3, c(0,121), type="n", xlab="Age", ylab="Total Score", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) scale <- 2.5 for (i in 1:length(smoothSlopeFrame$age)) { tx <- smoothSlopeFrame$age[i] ty <- smoothSlopeFrame$score[i] ta <- smoothSlopeFrame$slope[i] td <- smoothSlopeFrame$density[i] txinc <- (scale * td) / sqrt( 1 + (ta^2)) tyinc <- ta * txinc lines(c(tx+txinc, tx-txinc), c(ty+tyinc, ty-tyinc)) } dev.off() # ------------------------------------------------------ # Plot a vector field of the fitted slopes pdf("VF2ARVector1.pdf", height=6.4,width=7.5) plot(c(0,220)/3, c(0,101), type="n", xlab="Age", ylab="Total Score", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) scale <- 2.5 for (i in 1:length(smoothSlopeFrame$age)) { tx <- smoothSlopeFrame$age[i] ty <- smoothSlopeFrame$score[i] ta <- smoothSlopeFrame$slope[i] td <- smoothSlopeFrame$density[i] txinc <- (scale * td) / sqrt( 1 + (ta^2)) tyinc <- ta * txinc tarrowlen <- sqrt(txinc^2 + tyinc^2) * .05 if (is.na(tarrowlen) | tarrowlen < .001) next arrows(tx-txinc, ty-tyinc, tx+txinc, ty+tyinc, length = tarrowlen) } dev.off() # ------------------------------------------------------ # Plot a vector field of the fitted slopes using the menus smoothSlopeFrame2 <- data.frame(age=smoothSlopeFrame$age, score=smoothSlopeFrame$score, slope=atan(smoothSlopeFrame$slope)*(180/3.14), density=smoothSlopeFrame$density*4) # ------------------------------------------------------ # Write a file suitable for use with svf tSelect <- !is.na(ageData1$x1) & !is.na(ageData1$x2) & !is.na(ageData1$age) tMatrix <- cbind(ageData1$age[tSelect], ageData1$x1[tSelect], ageData1$age[tSelect] + tau, ageData1$x2[tSelect]) write(t(round(tMatrix,4)), "ageData.dat", ncolumns=4) # ------------------------------------------------------ # Use Multidimensional vectors to visualize the edaStudent data source("edaStudent.sdd") smoothX <- seq(0, 14, by=2) smoothY <- seq(0.125, 1, by=.125) smoothP <- matrix(NA, length(smoothX), length(smoothY)) smoothZ <- matrix(NA, length(smoothX), length(smoothY)) smoothW <- matrix(NA, length(smoothX), length(smoothY)) smoothV <- matrix(NA, length(smoothX), length(smoothY)) hx <- 5/14 hy <- 5/1 for (i in 1:length(smoothX)) { x <- smoothX[i] t1x <- abs(x - (edaStudent$income/10000))*hx for (j in 1:length(smoothY)) { y <- smoothY[j] t1y <- abs(y - edaStudent$pposths)*hy t2 <- rep(0, length(t1x)) t2[t1x < 1 & t1y < 1 & !is.na(t1x) & !is.na(t1y)] <- 1/2 smoothP[i,j] <- sum(t2)/(length(edaStudent$income)) smoothZ[i,j] <- mean(edaStudent$ztest[t1x < 1 & t1y < 1 & !is.na(edaStudent$ztest) & !is.na(t1x) & !is.na(t1y)]) smoothW[i,j] <- mean(edaStudent$hscgpa[t1x < 1 & t1y < 1 & !is.na(edaStudent$hscgpa) & !is.na(t1x) & !is.na(t1y)]) smoothV[i,j] <- mean(edaStudent$totunits[t1x < 1 & t1y < 1 & !is.na(edaStudent$totunits) & !is.na(t1x) & !is.na(t1y)]) } } smoothP <- 100 * (smoothP/sum(smoothP)) smoothStudentFrame <- data.frame( income=rep(smoothX,length(smoothY)), pposths=rep(smoothY,each=length(smoothX)), density=c(smoothP), ztest=c(smoothZ), hscgpa=(c(smoothW)-3.03)/.52, totunits=(c(smoothV)-17.6)/2.05) # ------------------------------------------------------ # Plot a whisker field of the fitted slopes pdf("VF2StudentMVF.pdf", height=6.4,width=6.4) plot(c(0,14), c(0,1.0), type="n" , xlab="Income (in 10K)", ylab="Probability of Post HS", cex.lab=1.75, cex.axis=1.5, cex.main=1.75, cex=1.75, mgp=c(2.5,.75,0)) scale <- .1 for (i in 1:length(smoothSlopeFrame$age)) { tx <- smoothStudentFrame$income[i] ty <- smoothStudentFrame$pposths[i] td <- smoothStudentFrame$density[i] ta <- smoothStudentFrame$ztest[i] txinc <- (scale * td) / sqrt( 1 + (ta^2)) tyinc <- ta * txinc lines(c(tx), c(ty), type="p") lines(c(tx, tx+txinc), c(ty, ty+tyinc), type="l") tx1 <- tx+txinc ty1 <- ty+tyinc ta <- smoothStudentFrame$hscgpa[i] txinc <- (scale * td) / sqrt( 1 + (ta^2)) tyinc <- ta * txinc lines(c(tx1, tx1+txinc), c(ty1, ty1+tyinc), type="l") tx2 <- tx1+txinc ty2 <- ty1+tyinc ta <- smoothStudentFrame$totunits[i] txinc <- (scale * td) / sqrt( 1 + (ta^2)) tyinc <- ta * txinc lines(c(tx2, tx2+txinc), c(ty2, ty2+tyinc), type="l") } dev.off()