# Statistics/Clustering Optimal Sample Sizes/opt3.R

### From Wiki1

# Optimal 3-level cluster sampling # Pupils in Classes in Schools in Board = Population # # Pupils in Classes in Schools in Board = Population # How many # 1) schools # 2) classes per school # 3) pupils per class opt3 <- function(K, kS, kC, kP, ICC.pcs, ICC.csb, CL = .95){ # 3-level Clustered design calculator # see http://scs.math.yorku.ca/index.php/Statistics/Clustering_Optimal_Sample_Sizes # K capital # School: S schools at kS # Classes: C classes at kC # Pupils: P pupils at kP # INPUT: # ICC.pcs: ICC for pupils in same class within school # ICC.csb: ICC for class averages within same school relative to board # DERIVED: # ICC.pcb: ICC for pupils in same class relative to board # ICC.psb: ICC for pupils in same school relative to board # ICC.pcb <- (tau.s + tau.c)/(tau.s + tau.c + sigma2) # student in class rel to board # ICC.psb <- (tau.s)/(tau.s + tau.c + sigma2) # # INPUT ICCs in term of phi.s and phi.c # phi.s <- tau.s / sigma2 # phi.c <- tau.c / sigma2 # ICC.pcs <- tau.c /(tau.c + sigma2) = phi.s /(phi.s + 1) # ICC.csb <- tau.s / (tau.s + tau.c) = phi.s / (phi.s + phi.c) # phi.s <- ICC.pcs / (1 - ICC.pcs) phi.c <- phi.s/ICC.csb - phi.s ICC.pcb <- (phi.s + phi.c)/(phi.s + phi.c + 1) ICC.psb <- phi.s / (phi.s + phi.c + 1) rlam <- (sqrt(phi.s*kS) + sqrt(phi.c*kC) + sqrt(kP))/K S <- sqrt(phi.s/kS)/rlam C <- sqrt(phi.c/kC)/(rlam*S) P <- sqrt(1/kP)/(rlam*S*C) sd.in.class <- 1 sd.in.school <- sqrt( 1 + phi.c) sd.in.popn <- sqrt( 1 + phi.c + phi.s) var.est <- phi.s/S + phi.c/(S*C) + 1/(S*C*P) var.srs <- 1/(S*C*P) design.eff <- var.est / var.srs SRSeq <- 1/var.est ME <- qnorm(1-(1-CL)/2)/(2*sqrt(SRSeq)) # Margin of error list( Schools = S, Classes = C, Pupils = P, des.eff = design.eff, N = S*C*P, SRSeq = SRSeq, ME = ME, CLevel = CL, ICC.pcs = ICC.pcs, ICC.csb = ICC.csb, ICC.pcb = ICC.pcb, ICC.psb = ICC.psb, TotalCost = K, CostPerSch = kS , CostPerClass = kC, CostPerPupil = kP, sd.in.class = sd.in.class, sd.in.school = sd.in.school, sd.in.popn = sd.in.popn) } # # From Snijders and Bosker 3,792 pupils in 280 classes in 57 schools # (Opdenakker and Van Damme, 1997) # Var = 11.686 = tau.s + tau.c + s2 = 2.124 + 1.746 + 7.816 # 18% of total var at school level: 18% x 11.686 = 2.124 # 33% at class and school level: (2.124 + 1.746)/11.686 = 33% # # Here: 18% = ICC.psb = (2.124)/(2.124 + 1.746 + 7.816) # 33% = ICC.pcb = (2.124 + 1.746) / (2.124 + 1.746 + 7.816) # 18% = ICC.pcs = (1.746) / (1.746 + 7.816) # 55% = ICC.csb = (2.124)/(2.124 + 1.746) round(rbind( as.data.frame(opt3(100000, 500, 100, 5, .18, .55)), as.data.frame(opt3(100000, 1000, 100, 10, .18, .55)), as.data.frame(opt3(100000, 1000, 200, 5, .18, .55)) ),3)