### From Wiki1

###
### Very Basic Regression in R
### MATH 6627
### September 5, 2012
###
library(spidadev) # spida package
# Artificial data
# Index of Heart Damage, Coffee Consumption and Stress in standardized units
# reading a table of data pasted from a file
cat("
### Paste some data in here
", file = "coffee.dat") # creates a temporary file
cf <- read.table('coffee.dat',header = T)
cf # show the data set
head(cf) # first 6 lines
some(cf) # random 10 lines
xqplot(cf) # uniform quantile plots
xqplot(cf, ptype = 'normal') # normal quantile plots
# Example: Creating a categorical variable with 'cut'
cf$Stress.class <- cut( cf$Stress, c(-Inf,75,150,Inf),
labels = c("Low",'Medium','High'))
cf
xqplot(cf) # quick look at data set --- not for formal reports
# 3 ways to produce roughly the same thing
pairs(cf)
scatterplotMatrix(cf)
splom(~cf)
# Note:
# we will see that all 2-dimensional views do not necessarily
# capture the essential structure of a 3+ dimensional data set
# Regressions: Simple and Multiple
fit1 <- lm( Heart ~ Coffee, data = cf)
fit1
str(fit1)
summary(fit1) # usual output
plot(fit1) # basic diagnostics
fit2 <- lm( Heart ~ Stress, data = cf)
summary(fit2)
# Regression on a factor (categorical variable) with more than 2 levels
fit2c <- lm( Heart ~ Stress.class, data = cf)
summary(fit2c)
# Testing the effect of "Stress.class"
wald( fit2c, "Stress")
# Pairwise comparisons
L <- rbind( "Med - Low" = c(0,1,0),
"High - Low" = c(0,0,1),
"High - Med" = c(0,-1,1))
L
wald(fit2c, L)
# Multiple regression
fit3 <- lm( Heart ~ Stress + Coffee, data = cf)
summary(fit3)
plot(fit3) # some diagnostics
avPlots(fit3) # Added Variable Plots
shapiro.test( resid(fit3)) # formal test of normality of residuals
#
# Note that the simple regression of Heart on Coffee
# shows that there is a statistically significant
# positive association of heart damage with coffee consumption.
#
# However, when controlling for 'Stress' the association
# is negative, although not statistically significant.
#
# Does this provide evidence that Coffee consumption may be harmful?
# Why would the sign of the coefficient change when we include Stress in the model?
#