# ------------------------------------------------ # Program: Univariate4.R # Author: Steven M. Boker # Date: Tue Sep 25 10:48:54 EDT 2007 # # Provides examples of the use of QQ plots for the # visualization of univariate distributions # # ------------------------------------------------ library(lattice) # ------------------------------------------------ # Create a sample from each of two Normal distributions # with different means and compare them using a QQ plot. x <- c(rnorm(300, mean=0, sd=1), rnorm(300, mean=2, sd=1)) # Create a matching length category vector xtype <- as.factor(c(rep(0,300), rep(1,300))) # Make the QQ plot pdf("Uni4QQNormSample1.pdf", height=5, width=5) print(qq(xtype ~ x, aspect=1, xlab = list(label="Mean = 0",cex=1.25), ylab=list(label="Mean = 2",cex=1.25), scales=list(cex=1.25) )) dev.off() # Same data as two histograms pdf("Uni4HistNormSample1.pdf", height=5, width=5) print(histogram(~ x | xtype, data=data.frame(x,xtype), layout=c(2,1), aspect=1, nint=20 )) dev.off() # ------------------------------------------------ # Create a sample from each of two Normal distributions # with different variance and compare them using a QQ plot. x <- c(rnorm(300, mean=0, sd=1), rnorm(300, mean=0, sd=2)) # Create a category vector xtype <- as.factor(c(rep(0,300), rep(1,300))) # Make the QQ plot pdf("Uni4QQNormSample2.pdf", height=5, width=5) print(qq(xtype ~ x, aspect=1, xlab = list(label="Variance = 1",cex=1.25), ylab=list(label="Variance = 4",cex=1.25), scales=list(cex=1.25) )) dev.off() # Same data as two histograms pdf("Uni4HistNormSample2.pdf", height=5, width=5) print(histogram(~ x | xtype, data=data.frame(x,xtype), layout=c(2,1), aspect=1, nint=20 )) dev.off() # ------------------------------------------------ # Create a sample from each of two Normal distributions # with different mean & variance and compare them # using a QQ plot. x <- c(rnorm(300, mean=0, sd=1), rnorm(300, mean=2, sd=2)) # Create a category vector xtype <- as.factor(c(rep(0,300), rep(1,300))) # Make the QQ plot pdf("Uni4QQNormSample3.pdf", height=5, width=5) print(qq(xtype ~ x, aspect=1, xlab = list(label="Mean = 0, Variance = 1",cex=1.25), ylab=list(label="Mean = 2, Variance = 4",cex=1.25), scales=list(cex=1.25) )) dev.off() # Same data as two histograms pdf("Uni4HistNormSample3.pdf", height=5, width=5) print(histogram(~ x | xtype, data=data.frame(x,xtype), layout=c(2,1), aspect=1, nint=20 )) dev.off() # ------------------------------------------------ # Create an unknown mixture distribution sample # from each of two Normal distributions # with different means and compare them # to the normal distribution using a QQ plot. x <- c(rnorm(300, mean=0, sd=1), rnorm(300, mean=6, sd=1)) # Create a category vector xtype <- as.factor(c(rep(0,300), rep(1,300))) # Make the QQ plot pdf("Uni4QQNormMixture1.pdf", height=5, width=5) print(qqmath(~ x, distribution=qnorm, data=data.frame(x, xtype), prepanel = prepanel.qqmathline, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Mixture with Differing Means",cex=1.25), scales=list(cex=1.25) )) dev.off() # Same data as a histogram pdf("Uni4HistNormMixture1.pdf", height=5, width=5) print(histogram(~ x , data=data.frame(x,xtype), aspect=1, nint=20 )) dev.off() # ------------------------------------------------ # Create an unknown mixture distribution sample # with a smaller mean difference x <- c(rnorm(300, mean=0, sd=1), rnorm(300, mean=3, sd=1)) # Create a category vector xtype <- as.factor(c(rep(0,300), rep(1,300))) # Make the QQ plot pdf("Uni4QQNormMixture1b.pdf", height=5, width=5) print(qqmath(~ x, distribution=qnorm, data=data.frame(x, xtype), prepanel = prepanel.qqmathline, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Mixture with Differing Means",cex=1.25), scales=list(cex=1.25) )) dev.off() # Same data as a histogram pdf("Uni4HistNormMixture1b.pdf", height=5, width=5) print(histogram(~ x , data=data.frame(x,xtype), aspect=1, nint=20 )) dev.off() # ------------------------------------------------ # Create an unknown mixture distribution sample # from each of two Normal distributions # with different mean & variance and compare them # to the normal distribution using a QQ plot. x <- c(rnorm(300, mean=0, sd=1), rnorm(300, mean=0, sd=4)) # Create a category vector xtype <- as.factor(c(rep(0,300), rep(1,300))) # Make the QQ plot pdf("Uni4QQNormMixture2.pdf", height=5, width=5) print(qqmath(~ x, distribution=qnorm, data=data.frame(x, xtype), prepanel = prepanel.qqmathline, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Mixture with Differing Variances",cex=1.25), scales=list(cex=1.25))) dev.off() # Same data as a histogram pdf("Uni4HistNormMixture2.pdf", height=5, width=5) print(histogram(~ x , data=data.frame(x,xtype), aspect=1, nint=20 )) dev.off() # ------------------------------------------------ # Create a sample from each of two different types # of distributions and compare them using a QQ plot. x <- c(rnorm(300, mean=0, sd=1), rexp(300)) # Create a category vector xtype <- as.factor(c(rep(0,300), rep(1,300))) # Make the QQ plot pdf("Uni4QQNormExpSample1.pdf", height=5, width=5) print(qq(xtype ~ x, aspect=1, data=data.frame(x,xtype), xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Exponential Distribution",cex=1.25), scales=list(cex=1.25) )) dev.off() # Same data as two histograms pdf("Uni4HistNormExpSample1.pdf", height=5, width=5) print(histogram(~ x | xtype, data=data.frame(x,xtype), layout=c(2,1), aspect=1, nint=20 )) dev.off() # Treat the exponential and normal as a single mixture distribution pdf("Uni4QQNormExpSample2.pdf", height=5, width=5) print(qqmath(~ x, distribution=qnorm, data=data.frame(x, xtype), prepanel = prepanel.qqmathline, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.25), ylab=list(label="Mixture of Normal and Exponential",cex=1.25), scales=list(cex=1.25))) dev.off() # Same data as a histogram pdf("Uni4HistNormExpSample2.pdf", height=5, width=5) print(histogram(~ x , data=data.frame(x,xtype), aspect=1, nint=20 )) dev.off() # ------------------------------------------------ # ------------------------------------------------ # # Residual analysis # # # ------------------------------------------------ # Read the iris data. iris <- data.frame(read.table("iris.dat", header=T)) summary(iris) dim(iris) # ------------------------------------------------ # Subtract a grand mean and create a boxplot. pdf("Uni4BoxIris1.pdf", height=5, width=5) print(bwplot(iris$variety ~ iris$sepal.length, aspect=1, xlab = list(label="Sepal Length (cm)",cex=1.25), scales=list(cex=1.25) )) dev.off() pdf("Uni4BoxIris2.pdf", height=5, width=5) newSepalLength <- iris$sepal.length - mean(iris$sepal.length) print(bwplot(iris$variety ~ newSepalLength, aspect=1, xlab = list(label="Sepal Length (cm)",cex=1.25), scales=list(cex=1.25) )) dev.off() # ------------------------------------------------ # Subtract marginal means and create a boxplot. oneway(sepal.length~variety, spread = 1, location=mean, data=iris) pdf("Uni4BoxIris3.pdf", height=5, width=5) print(bwplot(variety ~ oneway(sepal.length~variety)$residuals, data=iris, aspect=1, xlab = list(label="Sepal Length (cm)",cex=1.25), scales=list(cex=1.25) )) dev.off() # ------------------------------------------------ # Checking for homogeneity of residuals and pooled residuals. resSepLen <- oneway(sepal.length~variety, data=iris)$residuals pdf("Uni4QQIrisResid1.pdf", height=5, width=5) print(qqmath(~ resSepLen | iris$variety, distribution = substitute(function(p) quantile(resSepLen, p)), panel=function(x, ...){ panel.grid() panel.abline(0, 1) panel.qqmath(x, ...) }, aspect=1, layout=c(2,2), xlab = list(label="Pooled Residual Sepal Length (cm)",cex=1.25), ylab=list(label="Residual Sepal Length (cm)",cex=1.25), scales=list(cex=1.25))) dev.off() # ------------------------------------------------ # Checking for normality of pooled residuals. pdf("Uni4QQIrisResid2.pdf", height=5, width=5) print(qqmath(~ oneway(sepal.length~variety)$residuals, data = iris, prepanel = prepanel.qqmathline, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Unit Normal Quantiles",cex=1.25), ylab=list(label="Residual Sepal Length (cm)",cex=1.25), scales=list(cex=1.25))) dev.off() # ------------------------------------------------ # Checking for normality of residuals. pdf("Uni4QQIrisResid3.pdf", height=5, width=5) print(qqmath(~ oneway(sepal.length~variety)$residuals | variety, data = iris, prepanel = prepanel.qqmathline, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,2), xlab = list(label="Unit Normal Quantiles",cex=1.25), ylab=list(label="Residual Sepal Length (cm)",cex=1.25), scales=list(cex=1.25))) dev.off()