sas program for cluswilcox

/**************************************************************************************************

** PROGRAM: cluswilcox.sas

**

** PURPOSE: Incorporating clustering effects for the Wilcoxon Rank Sum Test

** for either balanced or unbalanced designs. i.e., clusters may have

** the same or different number of subunits. Group assignment is at

** the cluster level.

**

** REQUIREMENT: An ASCII data file with 3 variables per record. The data file does

** not have to be sorted in ID order.

** The 3 variables need to be space delimited and arranged in the

** following order:

** 1. id

** 2. score

** 3. group (X, Y) indicators : need to be 1 and 2.

**

** MACRO VARIABLE:

** 1. name of data set.

**

** USAGE: Enter "%cluswilcox(data=sample.dat);" where sample.dat has the

** format described above

***************************************************************************************************/

%macro cluswilcox(data=);

filename crd "&data";

***************************************************************

** read in data and rank all subunits **

***************************************************************;

data samp;

infile crd;

input id z group @@;

if z ne .;

one=1;

proc sort;

by id;

run;

proc rank out=rnk;

var z;

ranks zrank;

run;

******************************************************

** calculate ranksum within each cluster **

******************************************************;

data ranksum;

set rnk ;

by id;

retain g sumrnk 0;

if (last.id le 1) then do;

sumrnk=sumrnk+zrank;

g=g+1;

end;

if (last.id eq 1) then do;

output;

sumrnk=0;

g=0;

end;

run;

***************************************************

** compute maximum cluster size (gmax) **

***************************************************;

data ranksum ;

set ranksum end=eof;

retain gmax 0;

if (g>gmax) then gmax=g;

if eof then call symput("gmax",gmax);

run;

*******************************************************************

** count number of subunits within each cluster size group **

*******************************************************************;

data unitcnt;

set ranksum end=eof;

%do i=1 %to &gmax;

retain ng&i ngx&i ngy&i 0;

if g=&i then ng&i=ng&i+1;

if g=&i and group=1 then ngx&i=ngx&i+1;

if g=&i and group=2 then ngy&i=ngy&i+1;

retain psumrnk&i 0;

if g=&i then psumrnk&i=psumrnk&i+sumrnk;

%end;

drop id sumrnk g;

if eof then output unitcnt;

run;

data unitcnt;

set unitcnt;

%do i=1 %to &gmax;

retain fsumrnk&i 0;

fsumrnk&i=psumrnk&i/ng&i;

%end;

run;

**************************************************

** calculate part of variance **

**************************************************;

data partvar;

merge ranksum(keep=id sumrnk one g) unitcnt end=eof;

by one;

%do i=1 %to &gmax;

retain var&i 0;

if g=&i then var&i=var&i+(sumrnk-fsumrnk&i)**2;

if fsumrnk&i=. then do;

fsumrnk&i=0;

var&i=0;

end;

%end;

if eof then output;

run;

data posa;

set ranksum end=eof;

retain wcplus 0;

if group=1 then wcplus=wcplus+sumrnk;

if eof then output posa;

run;

**********************************************************

** calculate large sample approximation statistics **

**********************************************************;

data allstat;

merge posa partvar;

by one;

label wcplus = 'Clustered Wilcoxon RankSum Statistic'

expwcp = 'Expected Value Clustered Wilcoxon RankSum Statistic'

varwc = 'Variance of Clustered Wilcoxon RankSum Statistic'

zc = 'Z statistic for Clustered Wilcoxon RankSum Statistic'

pval = 'P-value for Clustered Wilcoxon RankSum Z Statisic';

retain varwc expwcp 0;

%do i=1 %to &gmax;

if ng&i gt 1 then var&i = var&i*(ngx&i*ngy&i)/(ng&i*(ng&i-1));

expwcp = expwcp + fsumrnk&i*ngx&i;

varwc = varwc + var&i;

%end;

zc=(wcplus - expwcp)/sqrt(varwc);

pval=2*(1-probnorm(abs(zc)));

proc print label noobs;

var wcplus expwcp varwc zc pval;

title "Clustered Wilcoxon Rank Sum Output";

run;

%mend;

%*cluswilcox(data=unbalanced.dat);