# SCS Reads 2012/Multiple Imputation: The analysis is the easiest part.R

### From Wiki1

### ### SCS Reads 2012 ### Multiple Imputation ### The analysis is the easiest part! ### See the last few lines: we just use Wald tests on the fixed effects ### coefficients of the pooled object ### library(spidade) library(nlme) library(mice) # To illustrate what can be gained from MI even when # missingness is MCAR in many variable, we use the # familiar 'hs' data set head(hs) # Consider the model mathach ~ ses * Sex * Sector # where mathach, ses or Sex can be missing dim(hs) hsm <- hs set.seed(1247) hsm$mathach [ sample(nrow(hs), 200)] <- NA hsm$ses[ sample(nrow(hs), 200)] <- NA hsm$Sex [ sample(nrow(hs), 200)] <- NA require(vmv) tablemissing(hsm) xqplot(hsm) hsm.mc <- misscode(hsm[,c('mathach','ses','Sex')]) head(hsm.mc) require(p3ddev) Plot3d( mathach ~ ses + as.numeric(Sex) | .nmiss, hsm.mc ) # full data fit.full <- lme( mathach ~ (ses + Sex + Sector)^2, hs, random = ~ 1 |school) summary(fit.full) wald(fit.full,'Sex') wald(fit.full,':Sex|Male:') fit.cca <- lme( mathach ~ (ses + Sex + Sector)^2, hsm, random = ~ 1 |school, na.action = na.omit) summary(fit.cca) summary(fit.full) ' ################ Imputation Model ' # Impute # dry run imp <- mice( hsm, maxit = 0) imp # For level 1 normally distributed variables we can use 2L.norm ?mice.impute.2L.norm # The cluster variable is identified with a -2 # Level 1 variables random related to the imputed variable are identified with a 2 pred <- imp$pred meth <- imp$meth pred meth meth["mathach"] <- "2l.norm" pred["mathach",] pred["mathach","school"] <- -2 pred["mathach","ses"] <- 2 meth["ses"] <- '2l.norm' pred["ses",] pred["ses","school"] <- -2 pred["ses","mathach"] <- 2 pred imp <- mice( hsm, pred = pred, meth = meth) plot(imp, c('mathach','ses','Sex')) # didn't look as if it had mixed imp <- mice( hsm, pred = pred, meth = meth, maxit = 30) plot(imp, c('mathach','ses','Sex')) stripplot(imp, pch=20, cex=1.2) #### To save imp: save(imp, file = 'data/imp.rda') # To restore: # data(imp) ' ################ Analysis ' ## ## Analysis ## fit <- with ( imp, lme( mathach ~ (ses+Sex+Sector)^2, random = ~ 1 | school)) lapply( fit$an , summary) ' [edit] Pooling ' pl <- pool(fit) summary(pl) ' [edit] Post-pooling analysis ' # # Pooling produces a Wald-type object with estimated coefficients, # their estimated variance-covariance matrix and estimated degrees # of freedom. # # This is ideally suited to Wald-type estimation and testing based on # linear combinations of coefficients. The wald method for 'mipo' objects # currently uses the minimum degrees of freedom for a component in a # linear combination. An improvement would be to explore using a better # method. # # Also, the between-imputation contribution to the pooled variance may # lead to less stable tests of hypotheses with multiple degrees of # freedom. Increasing m, the number of imputations, in these cases is # desirable. wald(pl, ":") wald(fit.full, ":")