# MATH 6627 2012-13 Practicum in Statistical Consulting/Very Basic Regression in R

```###
###  Very Basic Regression in R
###  MATH 6627
###  September 5, 2012
###

# Artificial data
# Index of Heart Damage, Coffee Consumption and Stress in standardized units

# reading a table of data pasted from a file
cat("

### Paste some data in here

", file = "coffee.dat")           # creates a temporary file
cf          # show the data set
some(cf)    # random 10 lines

xqplot(cf)  # uniform quantile plots
xqplot(cf, ptype = 'normal')  # normal quantile plots

# Example: Creating a categorical variable with 'cut'

cf\$Stress.class <- cut( cf\$Stress, c(-Inf,75,150,Inf),
labels = c("Low",'Medium','High'))
cf
xqplot(cf)  # quick look at data set --- not for formal reports

# 3 ways to produce roughly the same thing
pairs(cf)
scatterplotMatrix(cf)
splom(~cf)

# Note:
# we will see that all 2-dimensional views do not necessarily
# capture the essential structure of a 3+ dimensional data set

# Regressions: Simple and Multiple

fit1 <- lm( Heart ~ Coffee, data = cf)
fit1
str(fit1)
summary(fit1)  # usual output
plot(fit1)     # basic diagnostics

fit2 <- lm( Heart ~ Stress, data = cf)
summary(fit2)

# Regression on a factor (categorical variable) with more than 2 levels

fit2c <- lm( Heart ~ Stress.class, data = cf)
summary(fit2c)

# Testing the effect of "Stress.class"

wald( fit2c, "Stress")

# Pairwise comparisons

L <- rbind( "Med - Low" = c(0,1,0),
"High - Low" = c(0,0,1),
"High - Med" = c(0,-1,1))
L
wald(fit2c, L)

# Multiple regression

fit3 <- lm( Heart ~ Stress + Coffee, data = cf)
summary(fit3)

plot(fit3)  # some diagnostics

shapiro.test( resid(fit3))  # formal test of normality of residuals

#
# Note that the simple regression of Heart on Coffee
# shows that there is a statistically significant
# positive association of heart damage with coffee consumption.
#
# However, when controlling for 'Stress' the association
# is negative, although not statistically significant.
#
# Does this provide evidence that Coffee consumption may be harmful?
# Why would the sign of the coefficient change when we include Stress in the model?
#```