# Some trimmed-mean functions by Rand Wilcox # # http://www-rcf.usc.edu/~rwilcox/ # winvar<-function(x,tr=.2,na.rm=F){ # # Compute the gamma Winsorized variance for the data in the vector x. # tr is the amount of Winsorization which defaults to .2. # if(na.rm)x<-x[!is.na(x)] y<-sort(x) n<-length(x) ibot<-floor(tr*n)+1 itop<-length(x)-ibot+1 xbot<-y[ibot] xtop<-y[itop] y<-ifelse(y<=xbot,xbot,y) y<-ifelse(y>=xtop,xtop,y) winvar<-var(y) winvar } yuen<-function(x,y,tr=.2,alpha=.05){ # # Perform Yuen's test for trimmed means on the data in x and y. # The default amount of trimming is 20% # Missing values (values stored as NA) are automatically removed. # # A confidence interval for the trimmed mean of x minus the # the trimmed mean of y is computed and returned in yuen\$ci. # The p-value is returned in yuen\$p.value # # For an omnibus test with more than two independent groups, # use t1way. # This function uses winvar from chapter 2. # if(tr==.5)stop("Using tr=.5 is not allowed; use a method designed for medians") if(tr>.25)print("Warning: with tr>.25 type I error control might be poor") x<-x[!is.na(x)] # Remove any missing values in x y<-y[!is.na(y)] # Remove any missing values in y h1<-length(x)-2*floor(tr*length(x)) h2<-length(y)-2*floor(tr*length(y)) q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1)) q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1)) df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1))) crit<-qt(1-alpha/2,df) dif<-mean(x,tr)-mean(y,tr) low<-dif-crit*sqrt(q1+q2) up<-dif+crit*sqrt(q1+q2) test<-abs(dif/sqrt(q1+q2)) yuen<-2*(1-pt(test,df)) list(ci=c(low,up),p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test,crit=crit,df=df) } t2way<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,MAT=F, lev.col=c(1:2),var.col=3,pr=T){ # Perform a J by K (two-way) anova on trimmed means where # all groups are independent. # # The R variable data is assumed to contain the raw # data stored in list mode, or a matrix with columns # corresponding to groups. If stored in list mode, data[[1]] contains the data # for the first level of both factors: level 1,1. # data][2]] is assumed to contain the data for level 1 of the # first factor and level 2 of the second factor: level 1,2 # # The default amount of trimming is tr=.2 # # It is assumed that data has length JK, the total number of # groups being tested. # # MAT=T, assumes data are stored in matrix with 3 columns # with two of the columns indicated by the argument # lev.col # specifying the columns of x containing the values of the # levels of the two factors. # The outcome variable is in column # var.col # which defaults to column 3 # That is, this function calls selby2 for you. # if(is.data.frame(data))data=as.matrix(data) if(tr==.5){ print("For medians, use med2way if there are no ties") print("With ties, use linear contrasts in conjunction with medpb") stop("") } if(MAT){ if(!is.matrix(data))stop("With MAT=T, data must be a matrix") if(length(lev.col)!=2)stop("Argument lev.col should have 3 values") temp=selby2(data,lev.col,var.col) lev1=length(unique(temp\$grpn[,1])) lev2=length(unique(temp\$grpn[,2])) gv=apply(temp\$grpn,2,rank) gvad=10*gv[,1]+gv[,2] grp=rank(gvad) if(pr){ print(paste("Factor 1 has", lev1, "levels")) print(paste("Factor 2 has", lev2, "levels")) } if(J!=lev1)warning("J is being reset to the number of levels found") if(K!=lev2)warning("K is being reset to the number of levels found") J=lev1 K=lev2 data=temp\$x } if(is.matrix(data))data=listm(data) if(!is.list(data))stop("Data are not stored in list mode") if(p!=length(data)){ print("The total number of groups, based on the specified levels, is") print(p) print("The number of groups in data is") print(length(data)) print("Warning: These two values are not equal") } tmeans<-0 h<-0 v<-0 for (i in 1:p){ data[[grp[i]]]=elimna(data[[grp[i]]]) tmeans[i]<-mean(data[[grp[i]]],tr) h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]])) # h is the effective sample size v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1)) # v contains the squared standard errors } v<-diag(v,p,p) # Put squared standard errors in a diag matrix. ij<-matrix(c(rep(1,J)),1,J) ik<-matrix(c(rep(1,K)),1,K) jm1<-J-1 cj<-diag(1,jm1,J) for (i in 1:jm1)cj[i,i+1]<-0-1 km1<-K-1 ck<-diag(1,km1,K) for (i in 1:km1)ck[i,i+1]<-0-1 # Do test for factor A #cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A cmat<-kron(cj,ik) # Contrast matrix for factor A #alval<-c(999:1)/1000 alval<-c(1:999)/1000 for(i in 1:999){ irem<-i Qa<-johan(cmat,tmeans,v,h,alval[i]) if(Qa\$teststat>Qa\$crit)break } A.p.value=irem/1000 # Do test for factor B cmat<-kron(ij,ck) # Contrast matrix for factor B for(i in 1:999){ irem<-i Qb<-johan(cmat,tmeans,v,h,alval[i]) if(Qb\$teststat>Qb\$crit)break } B.p.value=irem/1000 # Do test for factor A by B interaction cmat<-kron(cj,ck) # Contrast matrix for factor A by B for(i in 1:999){ irem<-i Qab<-johan(cmat,tmeans,v,h,alval[i]) if(Qab\$teststat>Qab\$crit)break } AB.p.value=irem/1000 tmeans=matrix(tmeans,J,K,byrow=T) list(Qa=Qa\$teststat,A.p.value=A.p.value, Qb=Qb\$teststat,B.p.value=B.p.value, Qab=Qab\$teststat,AB.p.value=AB.p.value,means=tmeans) } selby2<-function(m,grpc,coln=NA){ # Create categories according to the grpc[1] and grpc[2] columns # of the matrix m. The function puts the values in column coln into # a vector having list mode. # if(is.na(coln))stop("The argument coln is not specified") if(length(grpc)>4)stop("The argument grpc must have length less than or equal to 4") x<-vector("list") ic<-0 if(length(grpc)==2){ cat1<-selby(m,grpc[1],coln)\$grpn cat2<-selby(m,grpc[2],coln)\$grpn for (i1 in 1:length(cat1)){ for (i2 in 1:length(cat2)){ temp<-NA it<-0 for (i in 1:nrow(m)){ if(sum(m[i,c(grpc[1],grpc[2])]==c(cat1[i1],cat2[i2]))==2){ it<-it+1 temp[it]<-m[i,coln] } } if(!is.na(temp[1])){ ic<-ic+1 x[[ic]]<-temp if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2]),1,2) if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2])) } }} } if(length(grpc)==3){ cat1<-selby(m,grpc[1],coln)\$grpn cat2<-selby(m,grpc[2],coln)\$grpn cat3<-selby(m,grpc[3],coln)\$grpn x<-vector("list") ic<-0 for (i1 in 1:length(cat1)){ for (i2 in 1:length(cat2)){ for (i3 in 1:length(cat3)){ temp<-NA it<-0 for (i in 1:nrow(m)){ if(sum(m[i,c(grpc[1],grpc[2],grpc[3])]==c(cat1[i1],cat2[i2],cat3[i3]))==3){ it<-it+1 temp[it]<-m[i,coln] }} if(!is.na(temp[1])){ ic<-ic+1 x[[ic]]<-temp if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3]),1,3) if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3])) }}}} } if(length(grpc)==4){ cat1<-selby(m,grpc[1],coln)\$grpn cat2<-selby(m,grpc[2],coln)\$grpn cat3<-selby(m,grpc[3],coln)\$grpn cat4<-selby(m,grpc[4],coln)\$grpn x<-vector("list") ic<-0 for (i1 in 1:length(cat1)){ for (i2 in 1:length(cat2)){ for (i3 in 1:length(cat3)){ for (i4 in 1:length(cat4)){ temp<-NA it<-0 for (i in 1:nrow(m)){ if(sum(m[i,c(grpc[1],grpc[2],grpc[3],grpc[4])]==c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]))==4){ it<-it+1 temp[it]<-m[i,coln] }} if(!is.na(temp[1])){ ic<-ic+1 x[[ic]]<-temp if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]),1,4) if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3],cat4[i4])) }}}}} } list(x=x,grpn=grpn) } johan<-function(cmat,vmean,vsqse,h,alpha=.05){ # # This function is used by other functions that come with this book, # and it can be used to test hypothesis not covered in the text. # # The function performs Johansen's test of C mu = 0 for p independent groups, # where C is a k by p matrix of rank k and mu is a p by 1 matrix of # of unknown trimmed means. # The argument cmat contains the matrix C. # vmean is a vector of length p containing the p trimmed means # vsqe is a diagonal matrix containing the squared standard errors of the # the trimmed means in vmean. # h is a vector containing the effective sample sizes # yvec<-matrix(vmean,length(vmean),1) if(!is.matrix(vsqse))vsqse<-diag(vsqse) test<-cmat%*%vsqse%*%t(cmat) invc<-solve(test) test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec R<-vsqse%*%t(cmat)%*%invc%*%cmat A<-sum(diag((diag(R))^2/diag(h-1))) df<-nrow(cmat) crit<-qchisq(1-alpha,df) crit<-crit+(crit/(2*df))*A*(1+3*crit/(df+2)) list(teststat=test[1],crit=crit[1]) } selby<-function(m,grpc,coln){ # # # A commmon situation is to have data stored in an n by p matrix where # one or more of the columns are group identification numbers. # This function groups all values in column coln according to the # group numbers in column grpc and stores the results in list mode. # # More than one column of data can sorted # # grpc indicates the column of the matrix containing group id number # if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame") if(is.na(grpc[1]))stop("The argument grpc is not specified") if(is.na(coln[1]))stop("The argument coln is not specified") if(length(grpc)!=1)stop("The argument grpc must have length 1") x<-vector("list") grpn<-sort(unique(m[,grpc])) it<-0 for (ig in 1:length(grpn)){ for (ic in 1:length(coln)){ it<-it+1 flag<-(m[,grpc]==grpn[ig]) x[[it]]<-m[flag,coln[ic]] }} list(x=x,grpn=grpn) } elimna<-function(m){ # # remove any rows of data having missing values # if(is.null(dim(m)))m<-as.matrix(m) ikeep<-c(1:nrow(m)) for(i in 1:nrow(m))if(sum(is.na(m[i,])>=1))ikeep[i]<-0 elimna<-m[ikeep[ikeep>=1],] elimna } kron<-function(m1,m2){ # compute the Kronecker product of the two matrices m1 and m2. # m1<-as.matrix(m1) # Vectors of length p are converted to a p by 1 matrix m2<-as.matrix(m2) kron<-vector(mode="numeric",length=0) for(i in 1:nrow(m1)){ m3<-m1[i,1]*m2 for(j in 2:ncol(m1))m3<-cbind(m3,m1[i,j]*m2) if(i==1)kron<-m3 if(i>=2)kron<-rbind(kron,m3) } kron } t1way<-function(x,tr=.2,grp=NA){ # # A heteroscedastic one-way ANOVA for trimmed means # using a generalization of Welch's method. # # The data are assumed to be stored in \$x\$ in list mode. # Length(x) is assumed to correspond to the total number of groups. # By default, the null hypothesis is that all groups have a common mean. # To compare a subset of the groups, use grp to indicate which # groups are to be compared. For example, if you type the # command grp<-c(1,3,4), and then execute this function, groups # 1, 3, and 4 will be compared with the remaining groups ignored. # # Missing values are automatically removed. # if(is.matrix(x)){ y<-list() for(j in 1:ncol(x))y[[j]]<-x[,j] x<-y } if(is.na(grp[1]))grp<-c(1:length(x)) if(!is.list(x))stop("Data are not stored in a matrix or in list mode.") J<-length(grp) # The number of groups to be compared h<-vector("numeric",J) w<-vector("numeric",J) xbar<-vector("numeric",J) for(j in 1:J){ xx<-!is.na(x[[j]]) val<-x[[j]] x[[j]]<-val[xx] # Remove missing values h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]])) # h is the number of observations in the jth group after trimming. w[j]<-h[j]*(h[j]-1)/((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr)) xbar[j]<-mean(x[[grp[j]]],tr) } u<-sum(w) xtil<-sum(w*xbar)/u A<-sum(w*(xbar-xtil)^2)/(J-1) B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1) TEST<-A/(B+1) nu1<-J-1 nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1)) sig<-1-pf(TEST,nu1,nu2) list(TEST=TEST,nu1=nu1,nu2=nu2,p.value=sig) }