SCS Reads 2012/Multiple Imputation: The analysis is the easiest part.R

From Wiki1

Jump to: navigation, search
###
###  SCS Reads 2012
###  Multiple Imputation
###  The analysis is the easiest part!
###  See the last few lines: we just use Wald tests on the fixed effects
###       coefficients of the pooled object
###



library(spidade)
library(nlme)
library(mice)

#  To illustrate what can be gained from MI even when 
#  missingness is MCAR in many variable, we use the 
#  familiar 'hs' data set

head(hs)

#  Consider the model mathach ~ ses * Sex * Sector 
#  where mathach, ses or Sex can be missing

dim(hs)
hsm <- hs
set.seed(1247)

hsm$mathach [ sample(nrow(hs), 200)] <- NA
hsm$ses[ sample(nrow(hs), 200)] <- NA
hsm$Sex [ sample(nrow(hs), 200)] <- NA

require(vmv)
tablemissing(hsm)
xqplot(hsm)
hsm.mc <- misscode(hsm[,c('mathach','ses','Sex')])
head(hsm.mc)
require(p3ddev)
Plot3d( mathach ~ ses + as.numeric(Sex) | .nmiss, hsm.mc )

# full data

fit.full <- lme( mathach ~ (ses + Sex + Sector)^2, hs, random = ~ 1 |school)
summary(fit.full)
wald(fit.full,'Sex')
wald(fit.full,':Sex|Male:')

fit.cca <- lme( mathach ~ (ses + Sex + Sector)^2, hsm, random = ~ 1 |school,
                na.action = na.omit)

summary(fit.cca)
summary(fit.full)

'
################ Imputation Model
'

# Impute

# dry run
imp <- mice( hsm, maxit = 0)
imp

# For level 1 normally distributed variables we can use 2L.norm
?mice.impute.2L.norm
# The cluster variable is identified with a -2
# Level 1 variables random related to the imputed variable are identified with a 2

pred <- imp$pred
meth <- imp$meth

pred
meth

meth["mathach"] <- "2l.norm"

pred["mathach",]
pred["mathach","school"] <- -2
pred["mathach","ses"] <- 2


meth["ses"] <- '2l.norm'
pred["ses",]
pred["ses","school"] <- -2
pred["ses","mathach"] <- 2

pred

imp <- mice( hsm, pred = pred, meth = meth) 
plot(imp, c('mathach','ses','Sex')) # didn't look as if it had mixed

imp <- mice( hsm, pred = pred, meth = meth, maxit = 30) 
plot(imp, c('mathach','ses','Sex'))
stripplot(imp, pch=20, cex=1.2)



#### To save imp: save(imp, file = 'data/imp.rda')
# To restore:
# data(imp)


'
################ Analysis
 '  
##
## Analysis
##

fit <- with ( imp, lme( mathach ~ (ses+Sex+Sector)^2, random = ~ 1 | school))
lapply( fit$an , summary)
'
[edit] Pooling
 '
pl <- pool(fit)
summary(pl)

'
[edit] Post-pooling analysis
 '
#
# Pooling produces a Wald-type object with estimated coefficients, 
# their estimated variance-covariance matrix and estimated degrees
# of freedom.
#
# This is ideally suited to Wald-type estimation and testing based on
# linear combinations of coefficients. The wald method for 'mipo' objects
# currently uses the minimum degrees of freedom for a component in a
# linear combination. An improvement would be to explore using a better
# method. 
#
# Also, the between-imputation contribution to the pooled variance may
# lead to less stable tests of hypotheses with multiple degrees of 
# freedom. Increasing m, the number of imputations, in these cases is
# desirable.
wald(pl, ":")
wald(fit.full, ":")
Personal tools