sas program for cluswilcox
/**************************************************************************************************
** PROGRAM: cluswilcox.sas
**
** PURPOSE: Incorporating clustering effects for the Wilcoxon Rank Sum Test
** for either balanced or unbalanced designs. i.e., clusters may have
** the same or different number of subunits. Group assignment is at
** the cluster level.
**
** REQUIREMENT: An ASCII data file with 3 variables per record. The data file does
** not have to be sorted in ID order.
** The 3 variables need to be space delimited and arranged in the
** following order:
** 1. id
** 2. score
** 3. group (X, Y) indicators : need to be 1 and 2.
**
** MACRO VARIABLE:
** 1. name of data set.
**
** USAGE: Enter "%cluswilcox(data=sample.dat);" where sample.dat has the
** format described above
***************************************************************************************************/
%macro cluswilcox(data=);
filename crd "&data";
***************************************************************
** read in data and rank all subunits **
***************************************************************;
data samp;
infile crd;
input id z group @@;
if z ne .;
one=1;
proc sort;
by id;
run;
proc rank out=rnk;
var z;
ranks zrank;
run;
******************************************************
** calculate ranksum within each cluster **
******************************************************;
data ranksum;
set rnk ;
by id;
retain g sumrnk 0;
if (last.id le 1) then do;
sumrnk=sumrnk+zrank;
g=g+1;
end;
if (last.id eq 1) then do;
output;
sumrnk=0;
g=0;
end;
run;
***************************************************
** compute maximum cluster size (gmax) **
***************************************************;
data ranksum ;
set ranksum end=eof;
retain gmax 0;
if (g>gmax) then gmax=g;
if eof then call symput("gmax",gmax);
run;
*******************************************************************
** count number of subunits within each cluster size group **
*******************************************************************;
data unitcnt;
set ranksum end=eof;
%do i=1 %to &gmax;
retain ng&i ngx&i ngy&i 0;
if g=&i then ng&i=ng&i+1;
if g=&i and group=1 then ngx&i=ngx&i+1;
if g=&i and group=2 then ngy&i=ngy&i+1;
retain psumrnk&i 0;
if g=&i then psumrnk&i=psumrnk&i+sumrnk;
%end;
drop id sumrnk g;
if eof then output unitcnt;
run;
data unitcnt;
set unitcnt;
%do i=1 %to &gmax;
retain fsumrnk&i 0;
fsumrnk&i=psumrnk&i/ng&i;
%end;
run;
**************************************************
** calculate part of variance **
**************************************************;
data partvar;
merge ranksum(keep=id sumrnk one g) unitcnt end=eof;
by one;
%do i=1 %to &gmax;
retain var&i 0;
if g=&i then var&i=var&i+(sumrnk-fsumrnk&i)**2;
if fsumrnk&i=. then do;
fsumrnk&i=0;
var&i=0;
end;
%end;
if eof then output;
run;
data posa;
set ranksum end=eof;
retain wcplus 0;
if group=1 then wcplus=wcplus+sumrnk;
if eof then output posa;
run;
**********************************************************
** calculate large sample approximation statistics **
**********************************************************;
data allstat;
merge posa partvar;
by one;
label wcplus = 'Clustered Wilcoxon RankSum Statistic'
expwcp = 'Expected Value Clustered Wilcoxon RankSum Statistic'
varwc = 'Variance of Clustered Wilcoxon RankSum Statistic'
zc = 'Z statistic for Clustered Wilcoxon RankSum Statistic'
pval = 'P-value for Clustered Wilcoxon RankSum Z Statisic';
retain varwc expwcp 0;
%do i=1 %to &gmax;
if ng&i gt 1 then var&i = var&i*(ngx&i*ngy&i)/(ng&i*(ng&i-1));
expwcp = expwcp + fsumrnk&i*ngx&i;
varwc = varwc + var&i;
%end;
zc=(wcplus - expwcp)/sqrt(varwc);
pval=2*(1-probnorm(abs(zc)));
proc print label noobs;
var wcplus expwcp varwc zc pval;
title "Clustered Wilcoxon Rank Sum Output";
run;
%mend;
%*cluswilcox(data=unbalanced.dat);