# ---------------------------------------------------------------------
# Program: Transformations2.S  
#  Author: Steven M. Boker
#    Date: Tue Oct 5 13:11:53 EST 2004
#
#  More transformations and dealing with Missing Values
#
# ---------------------------------------------------------------------

# ---------------------------------------------------------------------
#  Missing values (NA)

#  x5 is "right censored"

x5 <- rnorm(300, mean=0, sd=1)
x5[x5 > .5] <- NA
summary(x5)

#  x6 is "left censored"

x6 <- rnorm(300, mean=0, sd=1)
x6[x6 < -.5] <- NA
summary(x6)

#  x7 is "missing completely at random"

x7 <- rnorm(300, mean=0, sd=1)
tIndex <- floor(runif(100, min=1, max=300))
x7[tIndex] <- NA
summary(x7)

# ---------------------------------------------------------------------
#  Boxplots of the three distributions

graphsheet(height=6.4,width=6.4)
boxplot(x5, x6, x7, names=c("x5", "x6", "x7"))

# ---------------------------------------------------------------------
#  Quantile-Normal plots for the distributions

graphsheet(height=6.4,width=6.4)
qqmath(~ x7, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="x7")

graphsheet(height=6.4,width=6.4)
qqmath(~ x6, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="x6")

graphsheet(height=6.4,width=6.4)
qqmath(~ x5, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="x5")

# ---------------------------------------------------------------------
#  Mean and variance of x7

mean(x7)
var(x7)

# ---------------------------------------------------------------------
#  Oh-Oh.  What do we do now?  Use the is.na() function to deselect NAs

tSelect <- !is.na(x7)
mean(x7[tSelect])
var(x7[tSelect])

# ---------------------------------------------------------------------
#  What do is.na(x7) and !is.na(x7) do?  Select missing and nonmissing.

tSelect <- is.na(x7)
tSelect

tSelect <- !is.na(x7)
tSelect

# ---------------------------------------------------------------------
#  What is the percentage of missing values?

round(100 * (length(x7[is.na(x7)]) / length(x7)), 1)

# ---------------------------------------------------------------------
#  Right censoring is different than a ceiling effect

#  x5 is "right censored"

x5b <- rnorm(300, mean=0, sd=1)
x5b[x5b > .5] <- .5
summary(x5b)

graphsheet(height=6.4,width=6.4)
qqmath(~ x5b, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="x5b")

# ---------------------------------------------------------------------
#  Create Platykurtic and Leptokurtotic Data

platyData <- rnorm(300, mean=0, sd=.25) + runif(300, min=-2, max=2)
summary(platyData)

leptoData <- rnorm(300, mean=0, sd=2)^3 / 100
summary(leptoData)

# ---------------------------------------------------------------------
#  Create Histograms and Boxplots of Platykurtic and Leptokurtotic Data

graphsheet(height=6.4,width=6.4)
par(mfrow=c(2,1))
theBreaks <- seq(-2.5, 2.5, by=.25)
hist(platyData, breaks=theBreaks, xlab="Platykurtosis")
hist(leptoData, breaks=theBreaks, xlab="Leptokurtosis")

graphsheet(height=6.4,width=6.4)
boxplot(platyData, leptoData, names=c("Platykurtosis","Leptokurtosis"))

# ---------------------------------------------------------------------
#  Create Quantile Normal Plots of Platykurtic and Leptokurtotic Data

graphsheet(height=6.4,width=6.4)
qqmath(~ platyData, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="Platykurtotic Data")

graphsheet(height=6.4,width=6.4)
qqmath(~ leptoData, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="Leptokurtotic Data")

# ---------------------------------------------------------------------
#  A function for a power transformation for data centered at zero

centeredPower <- function(dataVector, exponent) {
	tData <- dataVector
	tData[dataVector < 0] <- tData[dataVector < 0] * -1
	tData <- tData^(exponent)
	tData[dataVector < 0] <- tData[dataVector < 0] * -1
	return(tData)
}


# ---------------------------------------------------------------------
#  Create two variables x and x^2 and look at their histograms


x <- rnorm(300, mean=0, sd=1)
xSquared <- x^2

graphsheet(height=6.4,width=6.4)
par(mfrow=c(2,1))
hist(x)
hist(xSquared)

graphsheet(height=6.4,width=6.4)
qqmath(~ xSquared, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="xSquared")

# ---------------------------------------------------------------------
#  Use the centeredPower transformation with a power of 2

x <- rnorm(300, mean=0, sd=1)
xSquared <- centeredPower(x, 2)

graphsheet(height=6.4,width=6.4)
par(mfrow=c(2,1))
hist(x)
hist(xSquared)

graphsheet(height=6.4,width=6.4)
qqmath(~ xSquared, 
         distribution=qnorm,
         prepanel = prepanel.qqmathline, 
	 panel = function(x, y) {
	   panel.qqmathline(y, distribution = qnorm)
	   panel.qqmath(x, y)
	 },
         aspect=1,
         xlab = "Normal Distribution",
         ylab="xSquared")


