# --------------------------------------------------------------------- # Program: Univariate1.R # Author: Steven M. Boker # Date: Tue Sep 18 11:11:32 EDT 2007 # # Exploring univariate distributions part 1. # Histograms, boxplots, qqnormal plots # # This version is for the R software. # # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Load required libraries library(MASS) # Resistant regression library contains "lmsreg" # --------------------------------------------------------------------- # Create a sample (N=201) from the normal distribution x <- rnorm(201, mean=0, sd=1) summary(x) # --------------------------------------------------------------------- # Sort the sample xSorted <- sort(x) # --------------------------------------------------------------------- # Calculate median, hinges, interquartile range and adjacent values xMedian <- xSorted[101] xLowerHinge <- xSorted[51] xUpperHinge <- xSorted[151] xInterQuartile <- xUpperHinge - xLowerHinge xUpperAdjacent <- xUpperHinge + (1.5 * xInterQuartile) xLowerAdjacent <- xLowerHinge - (1.5 * xInterQuartile) # --------------------------------------------------------------------- # Plot a histogram as a pdf file pdf("NormalHist1.pdf", height=5, width=6) hist(x, nclass=10, main = "Sample from Normal Distribution (N=201)", xlab = "Z-Score", ylab = "Count", col = "light blue") dev.off() # --------------------------------------------------------------------- # Plot a histogram as a jpg file (only works if you have X11 installed) #jpeg("NormalHist1.jpg", height=480, width=640) #hist(x, nclass=10, # main = "Sample from Normal Distribution (N=201)", # xlab = "Z-Score", # ylab = "Count", # cex.lab=1.75, cex.axis=1.5, cex.main=1.75, # cex=1.75, mgp=c(2.5,.75,0)) #dev.off() # --------------------------------------------------------------------- # Plot a histogram and add Median, Hinges, and Adjacent Values. pdf("NormalHist2.pdf", height=5,width=6) hist(x, nclass=10, main = "Sample from Normal Distribution (N=201)", xlab = "Z-Score", ylab = "Count", col = "light blue") lines(c(xMedian, xMedian), c(0,50)) lines(c(xLowerHinge, xLowerHinge), c(0,50)) lines(c(xUpperHinge, xUpperHinge), c(0,50)) lines(c(xUpperAdjacent, xUpperAdjacent), c(0,50)) lines(c(xLowerAdjacent, xLowerAdjacent), c(0,50)) dev.off() # --------------------------------------------------------------------- # Plot a boxplot on pdf("NormalBoxplot1.pdf", height=5,width=6) boxplot(x, main = "Sample from Normal Distribution (N=201)", xlab = "X", ylab = "Z-Score", col = "light blue") dev.off() # --------------------------------------------------------------------- # Plot a cumulative proportion graph. pdf("NormalCumplot1.pdf", height=5,width=5) plot(c(-3,3), c(0,1), main = "Cumulative Proportions of Sample \n (N=201)", xlab = "Z-Score", ylab = "Cumulative Proportion", type="n") lines(xSorted, c(1:201)/201, type="p") dev.off() # --------------------------------------------------------------------- # Plot a QQ-Normal graph pdf("NormalQQNormal1.pdf", height=5,width=5) qqnorm(x, main = "QQNORM plot of Normal Sample \n (N=201)", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Sample") dev.off() # --------------------------------------------------------------------- # Plot a QQ-Normal graph with a regression line. tQQ <- qqnorm(x, plot=F) pdf("NormalQQNormal2.pdf", height=5,width=5) plot(tQQ$x, tQQ$y, main = "QQNORM plot of Normal Sample \n (N=201)", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Sample") abline(lmsreg(tQQ$x, tQQ$y)) dev.off() # --------------------------------------------------------------------- # Read the galaxy velocity data from Cleveland. galaxy <- data.frame(read.table("galaxy.dat", header=T)) summary(galaxy) length(galaxy$velocity) # --------------------------------------------------------------------- # Plot a histogram of galaxy velocity data. pdf("GalaxyHistogram1.pdf", height=5,width=6) hist(galaxy$velocity, nclass=10, main = "Galaxy Velocities", xlab = "Velocity", ylab = "Number of Galaxies", col = "light green") dev.off() # --------------------------------------------------------------------- # Plot a histogram of galaxy velocity data. pdf("GalaxyHistogram2.pdf", height=5,width=6) hist(galaxy$velocity, nclass=20, main = "Galaxy Velocities", xlab = "Velocity", ylab = "Number of Galaxies", col = "light green") dev.off() # --------------------------------------------------------------------- # Plot a boxplot of the galaxy velocity data. pdf("GalaxyBoxplot1.pdf", height=5,width=6) boxplot(galaxy$velocity, main = "Galaxy Velocities", xlab = "galaxy$velocity", ylab = "Velocity", col = "light green") dev.off() # --------------------------------------------------------------------- # Plot cumulative proportions of the galaxy velocity data. pdf("GalaxyCumplot1.pdf", height=5,width=5) plot(c(1400,1800), c(0,1), main = "Cumulative Proportions \n of Galaxy Velocities", xlab = "Velocity", ylab = "Cumulative Proportion", type="n") tLength <- length(galaxy$velocity) lines(sort(galaxy$velocity), c(1:tLength)/tLength, type="p") dev.off() # --------------------------------------------------------------------- # Plot QQ-Normal of the galaxy velocity data. pdf("GalaxyQQNormal1.pdf", height=5,width=5) qqnorm(galaxy$velocity, main = "QQNORM plot of Galaxy Velocities", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Galaxy Velocities") dev.off() # --------------------------------------------------------------------- # Plot QQ-Normal with a regression line of the galaxy velocity data. tQQ <- qqnorm(galaxy$velocity, plot=F) pdf("GalaxyQQNormal2.pdf", height=5,width=5) plot(tQQ$x, tQQ$y, main = "QQNORM plot of Galaxy Velocities", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Galaxy Velocities") abline(lmsreg(tQQ$x, tQQ$y)) dev.off()