stratify_cluswilcox.sas

/************************************************************************************************** ** PROGRAM: stratify.cluswilcox.sas ** ** PURPOSE: Incorporating clustering effects for the Wilcoxon Rank Sum Test ** for stratified balanced or unbalanced designs. In addition, ** one can control for confounding by forming strata which are ** defined based on cross-classification of one or more categorical ** covariates which are defined in terms of a single categorical ** variable denoted by strata. ** ** REQUIREMENT: An ASCII data file with 4 variables per record. The data file ** does not have to be sorted in ID order. ** The 3 variables need to be space delimited and arranged in the ** following order: ** 1. id ** 2. score ** 3. group (X, Y) indicators : need to be 1 and 2. ** 4. stratum ** ** MACRO VARIABLE: ** 1. name of data set. ** 2. number of stratum. ** ** USAGE: Enter "%stratify_cluswilcox(data=sample.dat, ** num_strata=total number of strata);" ** where sample.dat has the format described above. ***************************************************************************************************/ *************************************************************** ** read in data and rank all subunits ** ***************************************************************; %macro stratify_cluswilcox(data=, num_strata=); options ls=110 ps=58; filename crd "&data"; data samp2; infile crd; input id z group stratum @@; if z ne .; one=1; proc sort; by id; run; proc rank out=rnk; var z; ranks zrank; run; *********************************************************************** ** calculate ranksum within each cluster within each stratum ** ***********************************************************************; data ranksum; set rnk ; by id; retain g sumrnk 0; %do j=1 %to &num_strata; if stratum=&j then do; if (last.id le 1) then do; sumrnk=sumrnk+zrank; g=g+1; end; if (last.id eq 1) then do; output; sumrnk=0; g=0; end; end; %end; run; ***************************************************** ** compute maximum cluster size (gmax) ** *****************************************************; data ranksum ; set ranksum end=eof; retain gmax 0; if (g>gmax) then gmax=g; if eof then call symput("gmax",gmax); run; ******************************************************************************** ** count number of subunits within cluster size group within each stratum ** ********************************************************************************; data unitcnt; set ranksum end=eof; /*i: cluster size groups. j: stratum */ %do i=1 %to &gmax; %do j=1 %to &num_strata; retain ng&i&j ngx&i&j ngy&i&j 0; if g=&i and stratum=&j then ng&i&j=ng&i&j+1; if g=&i and stratum=&j and group=1 then ngx&i&j=ngx&i&j+1; if g=&i and stratum=&j and group=2 then ngy&i&j=ngy&i&j+1; retain psumrnk&i&j 0; if g=&i and stratum= &j then psumrnk&i&j=psumrnk&i&j+sumrnk; %end; %end; drop z zrank group stratum sumrnk g id; if eof then output unitcnt; run; data unitcnt; set unitcnt; %do i=1 %to &gmax; %do j=1 %to &num_strata; psumrnk&i&j=psumrnk&i&j/(ng&i&j); %end; %end; run; ***************************************************************************************** ** calculate part of variance within each cluster size group within each stratum ** *****************************************************************************************; data partvar; merge ranksum(keep=id sumrnk one g stratum) unitcnt end=eof; by one; %do i=1 %to &gmax; %do j=1 %to &num_strata; retain var&i&j 0; if psumrnk&i&j=. then do; psumrnk&i&j=0; var&i&j=0; end; else if g=&i and stratum=&j then var&i&j=var&i&j+(sumrnk-psumrnk&i&j)**2; %end; %end; if eof then output; run; data posa; set ranksum end=eof; retain wcplus 0; if group=1 then wcplus=wcplus+sumrnk; %do j=1 %to &num_strata; retain wcplus wcplus&j 0; if group=1 and stratum=&j then wcplus&j=wcplus&j+sumrnk; %end; if eof then output posa; run; ********************************************************** ** calculate large sample approximation statistics ** **********************************************************; data allstat; merge posa partvar; by one; label wcplus = 'Clustered Wilcoxon RankSum Statistic' expwcp = 'Expected Clustered Wilcoxon RankSum Statistic' varwc = 'Variance of Clustered Wilcoxon RankSum Statistic' zc = 'Z statistic for Clustered Wilcoxon RankSum Statistic' pval = 'P-value for Clustered Wilcoxon RankSum Z Statisic'; retain expwcp varwc 0; %do i=1 %to &gmax; %do j=1 %to &num_strata; retain expwcp&i&j var&i&j 0; if ng&i&j gt 1 then var&i&j = var&i&j*(ngx&i&j*ngy&i&j)/(ng&i&j*(ng&i&j-1)); expwcp=expwcp+psumrnk&i&j*ngx&i&j; varwc = varwc + var&i&j; %end; %end; zc=(wcplus - expwcp)/sqrt(varwc); pval=2*(1-probnorm(abs(zc))); proc print label noobs; var wcplus expwcp; run; proc print label noobs; var varwc; run; proc print label noobs; var zc pval; run; %mend; %*stratify_cluswilcox(data=stratum.dat, num_strata=2);