# --------------------------------------------------------------------- # Program: Univariate1.S # Author: Steven M. Boker # Date: Thu Sep 16 09:36:41 EST 2004 # # Exploring univariate distributions part 1. # Histograms, boxplots, qqnormal plots # # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Create a sample (N=201) from the normal distribution x <- rnorm(201, mean=0, sd=1) summary(x) # --------------------------------------------------------------------- # Sort the sample xSorted <- sort(x) # --------------------------------------------------------------------- # Calculate median, hinges, interquartile range and adjacent values xMedian <- xSorted[101] xLowerHinge <- xSorted[51] xUpperHinge <- xSorted[151] xInterQuartile <- xUpperHinge - xLowerHinge xUpperAdjacent <- xUpperHinge + (1.5 * xInterQuartile) xLowerAdjacent <- xLowerHinge - (1.5 * xInterQuartile) # --------------------------------------------------------------------- # Plot a histogram on the screen (graphsheet is not available in R) graphsheet(height=6.4, width=7.5) hist(x, nclass=10, main = "Sample from Normal Distribution (N=201)", xlab = "Z-Score", ylab = "Count") # --------------------------------------------------------------------- # Plot the same histogram as a pdf file from R (not available in Splus) # # pdf("NormalHist1.pdf", height=6.4, width=7.5) # hist(x, nclass=10, # main = "Sample from Normal Distribution (N=201)", # xlab = "Z-Score", # ylab = "Count") # dev.off() # --------------------------------------------------------------------- # Plot a histogram on the screen and add Median, Hinges, and Adjacent # Values. graphsheet(height=6.4,width=7.5) hist(x, nclass=10, main = "Sample from Normal Distribution (N=201)", xlab = "Z-Score", ylab = "Count") lines(c(xMedian, xMedian), c(0,50)) lines(c(xLowerHinge, xLowerHinge), c(0,50)) lines(c(xUpperHinge, xUpperHinge), c(0,50)) lines(c(xUpperAdjacent, xUpperAdjacent), c(0,50)) lines(c(xLowerAdjacent, xLowerAdjacent), c(0,50)) # --------------------------------------------------------------------- # Plot a boxplot on screen graphsheet(height=6.4,width=7.5) boxplot(x, main = "Sample from Normal Distribution (N=201)", xlab = "X", ylab = "Z-Score") # --------------------------------------------------------------------- # Plot a cumulative proportion graph. graphsheet(height=6.4,width=6.4) plot(c(-3,3), c(0,1), main = "Cumulative Proportions of Sample \n (N=201)", xlab = "Z-Score", ylab = "Cumulative Proportion", type="n") lines(xSorted, c(1:201)/201, type="p") # --------------------------------------------------------------------- # Plot a QQ-Normal graph on screen graphsheet(height=6.4,width=6.4) qqnorm(x, main = "QQNORM plot of Normal Sample \n (N=201)", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Sample") # --------------------------------------------------------------------- # Plot a QQ-Normal graph with a regression line. tQQ <- qqnorm(x, plot=F) graphsheet(height=6.4,width=6.4) plot(tQQ$x, tQQ$y, main = "QQNORM plot of Normal Sample \n (N=201)", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Sample") abline(lmsreg(tQQ$x, tQQ$y)) # --------------------------------------------------------------------- # Read the galaxy velocity data from Cleveland. galaxy <- data.frame(read.table("galaxy.dat", header=T)) # --------------------------------------------------------------------- # Plot a histogram of galaxy velocity data. graphsheet(height=6.4,width=7.5) hist(galaxy$velocity, nclass=10, main = "Galaxy Velocities", xlab = "Velocity", ylab = "Number of Galaxies") # --------------------------------------------------------------------- # Plot a boxplot of the galaxy velocity data. graphsheet(height=6.4,width=7.5) boxplot(galaxy$velocity, main = "Galaxy Velocities", xlab = "galaxy$velocity", ylab = "Velocity") # --------------------------------------------------------------------- # Plot cumulative proportions of the galaxy velocity data. graphsheet(height=6.4,width=6.4) plot(c(1400,1800), c(0,1), main = "Cumulative Proportions \n of Galaxy Velocities", xlab = "Velocity", ylab = "Cumulative Proportion", type="n") tLength <- length(galaxy$velocity) lines(sort(galaxy$velocity), c(1:tLength)/tLength, type="p") # --------------------------------------------------------------------- # Plot QQ-Normal of the galaxy velocity data. graphsheet(height=6.4,width=6.4) qqnorm(galaxy$velocity, main = "QQNORM plot of Galaxy Velocities", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Galaxy Velocities") # --------------------------------------------------------------------- # Plot QQ-Normal with a regression line of the galaxy velocity data. tQQ <- qqnorm(galaxy$velocity, plot=F) graphsheet(height=6.4,width=6.4) plot(tQQ$x, tQQ$y, main = "QQNORM plot of Galaxy Velocities", xlab = "Quantiles from Normal Distribution", ylab = "Quantiles from Galaxy Velocities") abline(lmsreg(tQQ$x, tQQ$y))