# --------------------------------------------------------------------- # Program: Transformations2.R # Author: Steven M. Boker # Date: Tue Oct 16 10:59:14 EDT 2007 # # More transformations and dealing with Missing Values # # --------------------------------------------------------------------- # ---------------------------------------------- # Load required libraries library(lattice) # --------------------------------------------------------------------- # Missing values (NA) # x5 is "right censored" x5 <- rnorm(300, mean=0, sd=1) round(summary(x5),3) x5NA <- x5 x5NA[x5 > .5] <- NA round(summary(x5NA),3) # x6 is "left censored" x6 <- rnorm(300, mean=0, sd=1) round(summary(x6),3) x6NA <- x6 x6NA[x6 < -.5] <- NA round(summary(x6NA),3) # x7 is "missing completely at random" x7 <- rnorm(300, mean=0, sd=1) round(summary(x7),3) tIndex <- floor(runif(200, min=1, max=300)) x7NA <- x7 x7NA[tIndex] <- NA round(summary(x7NA),3) # --------------------------------------------------------------------- # Boxplots of Missing Completely At Random pdf("Trans2BoxX7.pdf", height=5 ,width=6) boxplot(x7, x7NA, names=c("x7", "x7NA"), col="lightblue") dev.off() # --------------------------------------------------------------------- # Quantile-Normal Missing Completely At Random pdf("Trans2QQx7.pdf", height=5 ,width=9) print(qqmath(~ x | xtype, data=data.frame(x=c(x7,x7NA),xtype=as.factor(c(rep("Complete",300),rep("MCAR",300)))), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="X Score",cex=1.75), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Boxplots of Right Censored pdf("Trans2BoxX5.pdf", height=5 ,width=6) boxplot(x5, x5NA, names=c("x5", "x5NA"), col="lightblue") dev.off() # --------------------------------------------------------------------- # Quantile-Normal Right Censored pdf("Trans2QQx5.pdf", height=5 ,width=9) print(qqmath(~ x | xtype, data=data.frame(x=c(x5,x5NA),xtype=as.factor(c(rep("Complete",300),rep("Right Censored",300)))), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="X Score",cex=1.75), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Boxplots of Left Censored pdf("Trans2BoxX6.pdf", height=5 ,width=6) boxplot(x6, x6NA, names=c("x6", "x6NA"), col="lightblue") dev.off() # --------------------------------------------------------------------- # Quantile-Normal Left Censored pdf("Trans2QQx6.pdf", height=5 ,width=9) print(qqmath(~ x | xtype, data=data.frame(x=c(x6,x6NA),xtype=as.factor(c(rep("Complete",300),rep("Left Censored",300)))), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="X Score",cex=1.75), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Mean and variance of x7 mean(x7) var(x7) mean(x7NA) var(x7NA) # --------------------------------------------------------------------- # Oh-Oh. What do we do now? # R requires you to think about what to do about missing data. mean(x7, na.rm=TRUE) var(x7, na.rm=TRUE) mean(x7NA, na.rm=TRUE) var(x7NA, na.rm=TRUE) # --------------------------------------------------------------------- # But this only gives an unbiased answer if the data are MCAR. mean(x6, na.rm=TRUE) var(x6, na.rm=TRUE) mean(x6NA, na.rm=TRUE) var(x6NA, na.rm=TRUE) # --------------------------------------------------------------------- # What do is.na(x7) and !is.na(x7) do? Select missing and nonmissing. tSelect <- is.na(x7NA) tSelect tSelect <- !is.na(x7NA) tSelect # --------------------------------------------------------------------- # What is the percentage of missing values? round((length(x7NA[is.na(x7NA)]) / length(x7NA)), 3) # --------------------------------------------------------------------- # Right censoring is different than a ceiling effect # x5NA is "right censored" x5b <- x5 x5b[x5 > .5] <- .5 round(summary(x5b),3) pdf("Trans2QQx5b.pdf", height=5 ,width=15) print(qqmath(~ x | xtype, data=data.frame(x=c(x5,x5NA,x5b), xtype=as.factor(c(rep("Complete",300), rep("Right Censored",300), rep("Ceiling",300)))), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(3,1), xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="X Score",cex=1.75), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Create Platykurtic and Leptokurtotic Data platyData <- rnorm(300, mean=0, sd=.25) + runif(300, min=-2, max=2) round(summary(platyData),3) leptoData <- rnorm(300, mean=0, sd=2)^3 / 100 round(summary(leptoData),3) # --------------------------------------------------------------------- # Create Histograms and Boxplots of Platykurtic and Leptokurtotic Data pdf("Trans2HistPlatLept.pdf", height=5 ,width=5) par(mfrow=c(2,1)) theBreaks <- seq(-4, 4, by=.25) hist(platyData, breaks=theBreaks, xlab="Platykurtosis") hist(leptoData, breaks=theBreaks, xlab="Leptokurtosis") dev.off() pdf("Trans2HistPlatLept2.pdf", height=5 ,width=9) print(histogram(~ x | xtype, data=data.frame(x=c(platyData,leptoData), xtype=as.factor(c(rep("Platykurtic",300),rep("Leptokurtic",300)))), layout=c(2,1), aspect=1, nint=20 )) dev.off() pdf("Trans2BoxPlatLept.pdf", height=5 ,width=5) boxplot(platyData, leptoData, names=c("Platykurtosis","Leptokurtosis"), col="lightblue") dev.off() # --------------------------------------------------------------------- # Create Quantile Normal Plots of Platykurtic and Leptokurtotic Data pdf("Trans2QQPlat.pdf", height=5 ,width=5) print(qqmath(~ platyData, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="Platykurtotic Data",cex=1.75), scales=list(cex=1.25) ) ) dev.off() pdf("Trans2QQLept.pdf", height=5 ,width=5) print(qqmath(~ leptoData, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="Leptokurtotic Data",cex=1.75), scales=list(cex=1.25) ) ) dev.off() pdf("Trans2QQPlatLept.pdf", height=5 ,width=5) print(qqmath(~ x | xtype, data=data.frame(x=c(platyData,leptoData), xtype=as.factor(c(rep("Platykurtic",300),rep("Leptokurtic",300)))), layout=c(2,1), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution"), ylab=list(label="Data") ) ) dev.off()