# Statistics/Clustering Optimal Sample Sizes/opt3.R

Jump to: navigation, search
```#   Optimal 3-level cluster sampling
#   Pupils in Classes in Schools in Board = Population
#
#   Pupils in Classes in Schools in Board = Population
#   How many
#      1) schools
#      2) classes per school
#      3) pupils per class

opt3 <- function(K, kS, kC, kP, ICC.pcs, ICC.csb, CL = .95){
# 3-level Clustered design calculator
# see http://scs.math.yorku.ca/index.php/Statistics/Clustering_Optimal_Sample_Sizes
# K capital
# School: S schools at kS
# Classes: C classes at kC
# Pupils:  P pupils at kP
# INPUT:
# ICC.pcs: ICC for pupils in same class within school
# ICC.csb: ICC for class averages within same school relative to board
# DERIVED:
# ICC.pcb: ICC for pupils in same class relative to board
# ICC.psb: ICC for pupils in same school relative to board
#   ICC.pcb <- (tau.s + tau.c)/(tau.s + tau.c + sigma2)
# student in class rel to board
#   ICC.psb <- (tau.s)/(tau.s + tau.c + sigma2)
#
# INPUT ICCs in term of phi.s and phi.c
#   phi.s <- tau.s / sigma2
#   phi.c <- tau.c / sigma2
#   ICC.pcs <- tau.c /(tau.c + sigma2) = phi.s /(phi.s + 1)
#   ICC.csb <- tau.s / (tau.s + tau.c) = phi.s / (phi.s + phi.c)

#
phi.s <- ICC.pcs / (1 - ICC.pcs)
phi.c <- phi.s/ICC.csb - phi.s
ICC.pcb <- (phi.s + phi.c)/(phi.s + phi.c + 1)
ICC.psb <- phi.s / (phi.s + phi.c + 1)

rlam <- (sqrt(phi.s*kS) + sqrt(phi.c*kC) + sqrt(kP))/K

S <- sqrt(phi.s/kS)/rlam
C <- sqrt(phi.c/kC)/(rlam*S)
P <- sqrt(1/kP)/(rlam*S*C)

sd.in.class <- 1
sd.in.school <- sqrt( 1 + phi.c)
sd.in.popn <- sqrt( 1 + phi.c + phi.s)

var.est <- phi.s/S + phi.c/(S*C) + 1/(S*C*P)
var.srs <- 1/(S*C*P)
design.eff <- var.est / var.srs
SRSeq <- 1/var.est
ME <- qnorm(1-(1-CL)/2)/(2*sqrt(SRSeq)) # Margin of error
list( Schools = S, Classes = C, Pupils = P, des.eff = design.eff,
N = S*C*P, SRSeq = SRSeq, ME = ME, CLevel = CL,
ICC.pcs = ICC.pcs, ICC.csb = ICC.csb, ICC.pcb = ICC.pcb,
ICC.psb = ICC.psb,   TotalCost = K, CostPerSch = kS ,
CostPerClass = kC, CostPerPupil = kP, sd.in.class = sd.in.class,
sd.in.school = sd.in.school, sd.in.popn = sd.in.popn)
}

# # From Snijders and Bosker 3,792 pupils in 280 classes in 57 schools
# (Opdenakker and Van Damme, 1997)
# Var = 11.686 = tau.s + tau.c + s2 = 2.124 + 1.746 + 7.816
# 18% of total var at school level: 18% x 11.686 = 2.124
# 33% at class and school level: (2.124 + 1.746)/11.686 = 33%
#
# Here: 18% = ICC.psb = (2.124)/(2.124 + 1.746 + 7.816)
#       33% = ICC.pcb = (2.124 + 1.746) / (2.124 + 1.746 + 7.816)
#       18% = ICC.pcs = (1.746) / (1.746 + 7.816)
#       55% = ICC.csb = (2.124)/(2.124 + 1.746)

round(rbind(
as.data.frame(opt3(100000, 500, 100, 5, .18, .55)),
as.data.frame(opt3(100000, 1000, 100, 10, .18, .55)),
as.data.frame(opt3(100000, 1000, 200, 5, .18, .55))
),3)```