# Mixed Models with R/Lab 2

Jump to: navigation, search
``` ###
###
###   Exercises for Mixed Models with R
###   Lab Session 2
###

```

'

## Schizophrenia within-group factor example

'

``` #  The following example illustrates the use of contextual
#  mean dummy variables for a within-group factor with 3 levels
#
#  Schizophrenia patients were assessed at 6 annual checkups.
#
#  At each checkup, the type of drug currently prescribed by
#  the treating physician was recorded.  There were 3 types of
#  drugs: Typical, Atypical and Clozapine.
#
#  Researchers are interested in using
#  this observational data to study the
#  the effectiveness of the newest drug, Clozapine, particularly
#  comparing it with 'Typical' drugs, the established standard.
#
#  There are a number of 'outcome' measure (i.e. measures of the
#  severity of the illness) but only one, 'neg', is of interest
#  for now.
#

library( spida )
library( p3d )

?Drugs
some( Drugs )

# Which drug seems best to reduce 'neg' symptoms
windows()

xyplot( neg ~  year | Subj, Drugs)

dd <- Drugs
dd\$id <- reorder( dd\$Subj, dd\$neg)
xyplot( neg ~ year | id, dd )
xyplot( neg ~ year | id, dd , groups = drug, auto.key = T)

#
# QUESTIONS:
#     1) Can we perform OLS fits on each cluster?
#        Note that the data are balanced with respect to time
#        but not with respect to drugs.
#
#        Also note that Clozapine is more frequently given later in the study
#

# compare drugs:

#
# LME model
#

fit0 <- lme( neg ~ drug, dd, random = ~ 1 | id )
summary(fit0)
fit <- lme( neg ~ drug, dd, random = ~ 1 + drug| id ,
control = list(msMaxIter=200,msVerbose=TRUE))
summary(fit)
```

"

### Comparing random effects model

"

``` anova(fit0,fit)
```

"

#### Simulating to calibrate p-values

"

``` sim <- simulate(fit0, m2 = fit, nsim =1000)

# Clozapine looks best

Ld <- Ldiff ( fit, "drug")   # hypothesis matrix to test differences between drugs
Ld
wald( fit, Ld )

Ld <- Ldiff ( fit,  "drug", ref = "Atypical")
wald( fit, Ld )

# Note: overall significance although individual p-values were not

# QUESTIONS:
#       1) Can you explain why 'drug' is significant overall although
#          neither p-value is significant in the summary output. Note that
#          this illustrates the danger in dropping multiple terms on the
#          basis of individual p-values.
#
#       2) How is clozapine doing ?
#          Is it better than the others as expected?
#          What explanation can you think of for the results?
#

#
# Hausman test: test whether contextual variables have a significant effect.
#
with( dd, cvar(drug,id))
fit2 <- update( fit, . ~ . + cvar( drug, id))
summary( fit2 )
wald( fit2, 'cvar')

#
# QUESTIONS:
#   1) What is the interpretation of the contextual variable for a
#      categorical effect:

head( cbind( dd['id'], model.matrix( fit2) ), 18 )

#
#   2) Compare results with and without cvar's
#
#   3) Compare cvar effect with corresponding raw var: what does this suggest??
#      E.g. 'Typical' and cvar(Typical)?
#

wald( fit2, -1)
wald( fit , -1)

Ld <- Ldiff( fit2, "drug")

wald( fit2, Ld )  # compare Clozapine with Typical

#
# QUESTIONS:
#
#  1) Would it ever be appropriate to include a contextual variable even
#        though it isn't significant?  e.g. In this case if the p-value were
#        0.06, what would be the consequence of including it or excluding it?
#  2) Compare the SEs for the raw variables and for the contextual variable.
#        What could explain the difference?
#
# === EXERCISE: ===
#
#  1) Estimate the between-patient differences (compositional effects)
#     in these three drugs. Note that there are at least two ways of
#     doing this.
#
#  2) Carry out a similar analysis for general symptoms: 'gen'
#
```

'

## Controlling for time

'

```   #
# Taking time into account
#
# When the range of time is compact and similar for all subjects
# and when time is not expected to have a very different effect
# as time progresses, simple models for time are generally adequate
#
# In the next session, we will see much richer functions of
# time: splines, asymptotic models, Fourier analysis, etc.
#

fit2l <- update( fit2, . ~ . + year, control = list(msMaxIter=2000,
msMaxEval=2000))
?lmeControl
summary( fit2l )
ww <-wald( fit2l )
wald( fit2l, 'cvar' )
wald( fit2l, 'drug' )
Ld <- Ldiff( fit2l, "drug")
wald ( fit2l, Ld )    # What does this say about Clozapine ?

# Q: How do you explain the differences in the estimation of the Typical - Clozapine
#    comparison in the 3 studies.
lapply(
list("no ctx" = fit,"ctx"= fit2,"ctx+year" = fit2l),
function( fit ) wald( fit , Ldiff( fit,'drug'))
)
clist <- lapply(
list("no ctx" = fit,"ctx"= fit2,"ctx+year" = fit2l),
function( fit ) wald( fit , Ldiff( fit,'drug'))12[3,,drop=FALSE]
)
clist
do.call( rbind, clist)

#
#  EXERCISE:
#     Is there evidence that there is curvature in the effect of time?
#

#
# Level 1 and Level 2 diagnostics
#
```

'

## Level 1 diagnostics

'

```   #
# Level 1
#
windows()
plot( fit2l )
plot( fit2l, id = .02)
plot( fit2l, resid(.) ~ fitted(.) | drug, id = .02)
plot( fit2l, resid(.) ~ fitted(.) | drug * Sex, id = .02)

#
# Diagnostic for heteroskedasticity
#

plot( fit2l, sqrt( abs( resid(.))) ~ fitted(.), id = .02) # exploratory version

# fancy version
plot( fit2l, sqrt( abs( resid(.))) ~ fitted(.), id = .02, sub = "scale-location plot",
ylab = expression( sqrt(abs(" resid "))),
panel = function(x, y, ...){
panel.xyplot( x, y, ...)
panel.loess(x ,y, ...)
})
# id didn't work so we do it manually

trellis.focus()            # trellis = lattice ?!?
#   (the archaeology should be less visible)
panel.identify(labels = dd\$id)  # in RStudio quit by clicking 'finish' at upper right
trellis.unfocus()

# no strong pattern here so we wait until later to see what to do about it.

qqnorm( fit2l, id = .02 )
qqnorm( fit2l, ~ resid(.) | drug, id = .02 )

# We note 'M47', 'F36', 'F41' as suspicious cases

xyplot( neg ~ year | id, dd, groups = drug, auto.key = T)   # not clear for lecture

show.settings()
td( pch = 15:17, col = c('blue','green4','red'))
show.settings()

xyplot( neg ~ year | id, dd, groups = drug, auto.key = T) # look for M47, F36 and F41

```

'

## Using the variogram as a diagnostic for autocorrelation

'

```   #
#  Variogram: diagnostics for autocorrelation
#  New level 1 diagnostic for longitudinal data
#

vv <- Variogram( fit2l , form = ~ year | id)
vv         # variance of differences between pairs as a function of distance in time
plot( vv )
# Says: diffences between pairs of residuals that are close have smaller
# variance than between those that are far apart

```

'

## Level 2 diagnostics

'

```  windows()
ranef(fit2l)   # Level 2 residuals
class( ranef (fit2l))
methods( class = 'ranef.lme' )
plot( ranef( fit2l ))
plot( ranef( fit2l ), ~ qqnorm(.))    # ERROR ???

RE <- cbind( ranef( fit2l), up( dd, ~ id))
# note ranef( fit2l, aug = T) has a 'bug' and doesn't work if there's a matrix in data frame
head( RE )
xyplot( `(Intercept)` ~ age1 | Sex , RE)

qqnorm( RE1)

```

'

## Influential cases

'

``` dpo <- dropone(fit2l, ~ id)   # check to see if original fit is used for start values
dpo
names(dpo)
spm( dpo[, 8:16], labels = dpo\$.drop, id.n = 4 )   # error! Can you guess why?
spm( dpo[, 8:15], labels = dpo\$.drop, id.n = 4 )   # error! Can you guess why?

#
# Acting on diagnostics
# Should we drop some observations
#

fit2l.d <- update( fit2l, data = subset(dd, !(id %in% c("M47","F36","F41")) ))

# or

fit2l.d <- update( fit2l, subset = !(id %in% c("M47","F36","F41")) )
summary (fit2l.d)

#
# EXERCISE:
#     Compare results with fit2l.d and fit2l. Any substantial differences?

# In the following analysis, we keep the original data but this does not
# imply that this is the best course. If some outliers prove to have
# uncorrectable measurement errors, or if they are not in scope (some other
# diagnosis) then it would be appropriate to drop them.
#
```

'

## Autocorrelation

'

```   #
#  Incorporation of possible autocorrelation
#

#
#  Add a correlation structure to the R side.
#  Most common:
#      corAR1:    autoregressive of order 1 for evenly spaced data
#      corCAR1:   continuous autoregressive of order 1 for
#                 unevenly spaced data
#      corARMA:   autoregressive moving average of variable order
#      -- other classes are mainly for spatial correlation
#      -- you can write your own but it's challenging
#  See: ?corClasses for a complete list
#

# Auto-regressive process of order 1

fit2lc <- update( fit2l, correlation = corAR1( form = ~ year | id))
summary( fit2lc)

# do we need the autocorrelation?

intervals( fit2lc )   # look at CI for Phi (correlation)

# or

anova( fit2l, fit2lc ) # this test can be ok because there is no boundary at 0 for Phi
# but there is when using corCAR1.
plot( simulate( fit2l, nsim = 1000, m2 = fit2lc))  # out of luck with fancier model
wald( fit2l )
wald( fit2lc )

Ld

wald( fit2l, Ld )
wald( fit2lc, Ld )

#
# QUESTION: When would you expect results to be affected by
#        including AR in the model?
#
#    Note: AR (with positive autocorrelation) implies we expect obs.
#    close in time to be closer than expected under the assumption
#    of independence and observations that are far in time
#    to be farther.
#
#    Therefore a treatment difference between
#    adjoining times gets more weight
#    than one between distant times.
#
#    That explains why including autocorrelation can change estimated
#    differences and p-values.
#

#
#  EXERCISE:
#    Look at diagnostics for models predicting 'gen' instead of 'neg'
#
#
```

'

## Heteroscedasticity

'

``` #
# Heteroscedasticity revisited
#

fitg <- lme( gen ~ drug + cvar(drug,id) , dd, random = ~ 1 | id)
summary( fitg )

fitgl <- lme( gen ~ drug + cvar(drug,id) + year, dd, random = ~ 1 | id)
summary( fitgl )

wald( fitgl, Ld)

# ... many diagnostics later ....
# Note: I believe there is an error in documentation and the default
# for resid is type = 'r' for 'raw' or 'response'. We prefer the
# standardized, 'pearson' residuals here.

plot( fitgl, sqrt( abs( resid(., type = 'p'))) ~ fitted(.), id = .03)

plot( fitgl, sqrt( abs( resid(., type = 'p'))) ~ fitted(.), id = .03,
panel = function(x, y, ...) {
panel.xyplot( x, y, ...)
panel.loess( x, y,...)
})

plot( fitgl, sqrt( abs( resid(., type = 'p'))) ~ fitted(.) | Sex, id = .03,
panel = function(x, y, ...) {
panel.xyplot( x, y, ...)
panel.loess( x, y,...)
})

plot( fitgl, sqrt( abs( resid(., type = 'p'))) ~ fitted(.) | drug, id = .03,
panel = function(x, y, ...) {
panel.xyplot( x, y, ...)
panel.loess( x, y,...)
})
#
# Perhaps: increased variability with increase in predicted value
#    In practice, I wouldn't be too disturbed by this plot but
#    for pedagogical purposes, let's see what we could do to address this.
#
```

'

### Modelling heteroscedasticity

'

``` #
# Accounting for heteroskedasticity
#

#
#  We assume that the SD of residuals changes with the fitted value (variance covariate)
#
#  Common assumptions are that:
#     1) variance is proportional to an unknown power of the variance covariate
#         or
#     2) a unknown constant plus an unknown power of the covariate
#

# To see the full set of methods, available (you can also create your own):

?varClasses

# Here we illustrate varConstPower

fitglh <- update( fitgl, weights = varConstPower( form =  ~fitted(.)| drug) )

summary(fitglh)

anova( fitgl, fitglh )

plot( fitglh, sqrt( abs( resid(.,type = 'p'))) ~ fitted(.) | drug, id = .03,
panel = function(x, y, ...) {
panel.xyplot( x, y, ...)
panel.loess( x, y,...)
})
# note that 'pearson residuals' are plotted so that
# high variance residuals will be shrunk.
# with raw residuals we expect heteroskedasticity to look
# even worse because the high variance observations have
# less influence on the fit
# raw residuals:
plot( fitglh, sqrt( abs( resid(., type = 'r'))) ~ fitted(.) | drug, id = .03,
panel = function(x, y, ...) {
panel.xyplot( x, y, ...)
panel.loess( x, y,...)
})

plot( fitgl, sqrt( abs( resid(., type = 'r'))) ~ fitted(.) | drug, id = .03,
panel = function(x, y, ...) {
panel.xyplot( x, y, ...)
panel.loess( x, y,...)
})

wald( fitgl, Ld)
wald( fitglh, Ld)

#
# Note that the impact of accounting for heteroskedasticity is not negligible.
#
# As usual, seeking higher efficiency has an impact on what we're estimating.
# Differences between drugs in patients at lower levels of 'gen' receive
# more weight than those at higher levels. The estimate has lower variance
# and is more precise but it might estimate something slightly different if
# there are differences in drug effects at different levels of severity.
# If this is an important question to address we could reformulate the model
# to attempt to take it into account although we might not have much power
# to detect such an effect.
#

# EXERCISE:
#    Refit 'fitglh' and 'fitgl' dropping a few residual outliers.
#    Retest the difference between the two models.
#    Does accounting for heteroskedastiticy still improve the fit?
#    Might it be that heteroskedasticity was just accounting for a
#        few outliers instead of capturing a general phenomenon?
#

#  EXERCISE:
#    Enlarge the RE model. How far can you go and what is the impact
#    of doing so.

```

'

## Visualizing models

'

``` #
#  Visualizing different models
#

#
# The following code was used to generate graphs for a lecture
# It shows how the different models above fit the data and
# attempts to explain how and why they differ.
#

td( pch = 15:17, col = c('blue','green4','red'))

xyplot( neg ~ year | Subj, dd, groups = drug )

dd\$id <- dd\$Subj
dd\$id <- reorder( dd\$id, 1000*(dd\$Sex == "M") + dd\$neg )

td( pch = 15:17, col = c('blue','green4','red'))
xyplot( neg ~ year | id, dd, groups = drug,
between = list( y = c(0,.5,0,0,0,0,0)) ,
skip = c( rep(F,15), T, rep(F,30)),
layout = c( 8,7),
auto.key = list( columns = 3))

# select a 'representative' sample of 3 from each sex

sel <- c( "F4","F25","F45","M12","M40","M51")

td( pch = 15:17, col = c('blue','green4','red'), cex = 1.2)
xyplot( neg ~ year | id, dd, groups = drug,
between = list( y = c(0,.5,0,0,0,0,0)) ,
skip = c( rep(F,15), T, rep(F,30)),
layout = c( 3,2),
auto.key = list( columns = 3),
subset = id %in% sel)

fit.ols <- lm( neg ~ drug, dd )
summary( fit.ols )

L <- list(
'predicted' = cbind( 1, contrasts( dd\$drug)),
'differences' = Ldiff ( fit.ols, 'drug', ref = 'Atypical'))
L
wald( fit.ols, L)
# Clozapine is worst but not significantly so

#
#  Displaying data and fitted values
#

pred <- expand.grid( year = 1:6, drug = levels( dd\$drug), id = levels(dd\$id))
head( pred )
some( pred )

pred\$neg <- predict( fit.ols, newdata = pred )

# Combine with data for plotting

dd\$what <- factor("data")
pred\$what <- factor("fit")

td( pch = 15:17, col = c('blue','green4','red'), cex = 1.2,
lty = c(1,1,1,1,1,1,2,2,2), lwd = 2)
xyplot( neg ~ year | id, Rbind( dd, pred ),  groups = what:drug,
panel = panel.superpose.2,
type = c('p','p','p','l','l','l'),
subset = id %in% sel,
between = list( y =1 ),
auto.key = list( text = c("Atypical","Clozapine","Typical"),
points = T, lines = T, columns = 3))
# overplotting problem

# make later lines less dense:

td( pch = 15:17, col = c('blue','green4','red'), cex = 1.2,
lty = c(1,2,3,1,2,3,1,2,3),lwd = 2)
xyplot( neg ~ year | id, Rbind( dd, pred ),  groups = what:drug,
panel = panel.superpose.2,
type = c('p','p','p','l','l','l'),
subset = id %in% sel,
between = list( y =1 ),
auto.key = list( text = c("Atypical","Clozapine","Typical"),
points = T, lines = T, columns = 3))

#
# Mixed model: first version
#

fit.mix <- lme( neg ~ drug, dd, random = ~ 1 | id )
summary(fit.mix)
wald( fit.mix, L )  # same L as above because same FE model
# Clozapine is best and sig. diff from Typical

#
# Can use same pred for Popn pred and for BLUPS
#

pred\$neg <- predict( fit.mix, pred, level = 0)

# Make a separate data frame for blupss

pred2 <- pred
pred2\$neg <- predict( fit.mix, pred2, level = 1)
pred2\$what <- factor("fit2")

# Cut and paste from last, then add 'pred2'

td( pch = 15:17, col = c('blue','green4','red'), cex = 1.2,
lty = c(1,1,1,2,2,2,1,1,1),lwd = 2)
xyplot( neg ~ year | id, Rbind( dd, pred, pred2 ),  groups = what:drug,
panel = panel.superpose.2,
lwd = 2,
type = c(rep('p',3),rep('l',6)),
subset = id %in% sel,
between = list( y =1 ),
auto.key = list( text = c("Atypical","Clozapine","Typical"),
points = T, lines = T, columns = 3))

#
# Hausman test and mixed model with contextual categorical variable
#

dd\$drug.m <- cvar( dd\$drug, dd\$id)          # note that drug.m is matrix
dd\$drug.m
head( dd, 18)
some( dd )

# QUESTION:
#  What is the interpretation of drug.m?
#  Why not three columns, one for each drug?

fit.m <- lme( neg ~ drug + drug.m , dd, random = ~ 1 | id )
summary( fit.m )

wald( fit.m , 'drug.m')   # Should we keep drug.m?
wald( fit.m , 'drug')

L
Lm <- lapply( L, function(x) cbind( x, 0, 0))
Lm
wald( fit.m , Lm )

# compare with

wald( fit.mix, L)

#
# QUESTION:
#    With drug.m in the model, the difference between Typical and Clozapine
#    is larger and more significant. Why?
#

#
#  Plotting fit with Level 2 variables (here the contextual variables)
#

#
#  If you want to show how the response depends on FE variables
#  you need to create a prediction data frame with all the FE variables
#  needed for prediction. (Level 1 or Level 2)
#
#
#  If you want to show predictions for individual clusters ( popn average
#  of BLUPS)
#           AND
#  you have Level 2 variables in your model
#  you need to construct a data frame with
#      ids and Level 1 variables
#  then you need to merge the data.frame with the matching
#  Level 2 variables.
#
#

pred <- expand.grid( year = 1:6, drug = levels( dd\$drug), id = levels(dd\$id))
head( pred)
dim( pred )
# merge pred with specific Level 2 variable corresponding to id

ddu <- up( dd, ~ id)
ddu

predc <- merge( pred, ddu)
dim( predc )
head( predc )
names( predc ) # note that drug.m matrix is preserved as a matrix

# ready to predict popn average:

predc\$neg <- predict( fit.m , predc, level = 0)

# predict BLUP

predc.blup <- predc
predc.blup\$neg <- predict( fit.m , predc.blup, level = 1)

# Identify data frames so they can be combined and each plot differently

dd\$what <- factor("data")
predc\$what <- factor("fit0")
predc.blup\$what <- factor('fit1')

# Note: this is all cut and paste except for the first line

td( pch = 15:17, col = c('blue','green4','red'), cex = 1.2,
lty = c(1,1,1,2,2,2,1,1,1),lwd = 2)
xyplot( neg ~ year | id , Rbind( dd, predc, predc.blup ),  groups = what:drug,
panel = panel.superpose.2,
lwd = 2,
type = c(rep('p',3),rep('l',6)),
subset = id %in% sel,
between = list( y =1 ),
#auto.key = list( text = c("Atypical","Clozapine","Typical"),
auto.key = list(
points = T, lines = T, columns = 3))

#
#  Taking time into account
#

fit.my <- update( fit.m, . ~ . + year)

summary( fit.my )   # very significant drop with time

# Previous hypothesis matrix

Lm
wald( fit.my )

# We only need to add year to Lm

Lmy <- Lm

Lmy <- lapply( Lm , function( x ) cbind(x, 0) )

# let's use the average year for predicted values

Lmy1[,6] <- 3.5

Lmy

#
# QUESTION: Should we do the same thing for the second matrix?
#
#

wald ( fit.my, Lmy )

#
# QUESTION: What does this say about Clozapine?
#

# We don't need new pred data.frames

predc\$neg <- predict( fit.my, predc, level = 0)
predc.blup\$neg <- predict( fit.my, predc.blup, level = 1)

dd\$what
predc\$what <- factor( 'fit0')
predc.blup\$what <- factor( 'fit1')
rb <- Rbind( dd, predc, predc.blup)
# Note: this is identical to previous
xyplot( neg ~ year | id , Rbind( dd, predc, predc.blup ),  groups = what:drug,
panel = panel.superpose.2,
lwd = 2,
type = c(rep('p',3),rep('l',6)),
subset = id %in% sel,
between = list( y =1 ),
auto.key = list( text = c("Atypical","Clozapine","Typical"),
points = T, lines = T, columns = 3))

#
# Adding a possible autocorrelation
#

fit.myc <- update( fit.my, correlation = corAR1( form = ~ year | id ))

summary( fit.myc )   # very significant drop with time

wald( fit.myc, 'drug.m')   # what happened to the contextual effect? Why?

# Hypothesis matrix does not change

wald ( fit.myc, Lmy )

#
#  Adding random Drug effects
#

fit.mycr <- update( fit.myc, random = ~ 1 + dvar( drug, id) | id)

fit.mycr <- update( fit.myc, random = ~ 1 + dvar( drug, id) | id,
control = list( msMaxIter = 200, msVerbose = TRUE   ))

summary( fit.mycr )

summary( fit.myc )   # very significant drop with time
intervals( fit.myc ) # Beware of interpreting intervals for SDs
wald( fit.myc, 'drug.m')   # what happened to the contextual effect? Why?

# Hypothesis matrix does not change

wald ( fit.mycr, Lmy )

getVarCov( fit.mycr)
cond( getVarCov( fit.mycr) )

coef( fit.mycr )
zz <- ranef( fit.mycr )
names(zz) <- c("Int",'Cloz', "Typical")
names(zz)
require( p3ddev )
Init3d()
Plot3d(Int ~ Cloz +Typical , zz)
Id3d(pad = 2)
Ell3d()

#
# some of the potential RE variance is accounted for by within-cluster
# variance.  This is analogous to having an F at or below 1 for blocks
# in a blocked randomized design.
#

fit.mycrd <- update( fit.mycr, subset = !(id %in% c("F41", "F55", "F36", "M53")))
summary(fit.mycrd)
wald( fit.mycrd, "drug")
wald( fit.mycrd, Ldiff( fit.mycrd, "drug[^\\.]", ref ="Atypical"))

# Finally: random year

fit.mycry <- update( fit.myc, random = ~ 1 + dvar( drug, id) + year | id)

fit.mycry <- update( fit.myc, random = ~ 1 + dvar( drug, id) + year| id,
control = list( msMaxIter = 200, msVerbose = TRUE   ))
summary(fit.mycry)

wald( fit.mycry )

#
#  Please contact Georges Monette <georges@yorku.ca>
#  with any questions or comments.
```