/********************************************************************************************************* ** PROGRAM: clusignrank.sas ** ** PURPOSE: Signed Rank Test for balanced/unbalanced designs ** ** REQUIREMENT: An ASCII data file with 2 variables per record. The data file does not have to ** be sorted in ID order. ** The 2 variables need to be space delimited and arranged in the following order: ** 1. id ** 2. score ** ** MACRO VARIABLE: ** 1. name of data set. ** ** USAGE: Enter "%clusignrank(data=sample.dat);" where sample.dat has the format described above ** ** REMARKS: 1. The program will determine if the dataset is balanced or unbalanced and will call ** the macro balanced for a balanced design and unbalanced for an unbalanced design. ** 2. Zero values can be included in a dataset but are not used in the analysis. *********************************************************************************************************/ %macro clusignrank(data=); option nosource; ******************************************************** ** read in data ** ** calculate number of observations per cluster ** ********************************************************; filename ppp "&data"; data dtset; infile ppp; input id z ; if z ne 0; proc sort; by id; run; data countg; set dtset; by id; retain g 0; if (last.id le 1) then do; g=g+1; end; if (last.id eq 1) then do; output; g=0; end; run; ************************************************************ **compute if each cluster has same number of observations ** ************************************************************; data count; set countg end=eof; retain balance 0; lagg=lag(g); diff=lagg-g; if _N_=1 then diff=0; if diff ne 0 then balance=balance+1; if eof then output; run; data _NULL_; set count; if balance=0 then call execute('%balanced'); else call execute('%unbalanced'); run; %mend; ******************************************************** ******************************************************** ** balanced design macro ** ******************************************************** ********************************************************; %macro balanced; ******************************************************** ** rank data ** ********************************************************; data dtset; set dtset; absz=abs(z); proc rank data=dtset out=rank; var absz; ranks zrank; run; ********************************************************** ** calculate signed ranks and contribution to ** ** test statistic ** **********************************************************; data sign; set rank end=eof; by id; retain n 0; if z lt 0 then signrank=-1.0*zrank; if z gt 0 then signrank=zrank; n=n+1; output sign; proc sort data=sign; by id; run; *********************************************************** ** calculate sum of signed ranks within each cluster ** ***********************************************************; data ranksum; set sign; by id; retain sumrank 0; if (last.id le 1) then do; sumrank=sumrank+signrank; end; if (last.id eq 1) then do; output; sumrank=0; end; run; data final; set ranksum end=eof; retain m sumsq T_c 0; T_c=T_c+sumrank; sumsq=sumsq+sumrank*sumrank; m=m+1; Var_t = sumsq; W_c=T_c/(sqrt(Var_t)); P_val=2*(1-probnorm(abs(W_c))); if eof then output; label P_val = 'P-value' n = 'Total Number of Observation' m = 'Total Number of Cluster'; proc print label; var T_c Var_t W_c P_val n m; run; %mend balanced; ******************************************************* ******************************************************* ** unbalanced design macro ** ******************************************************* *******************************************************; %macro unbalanced; data dtset; set dtset; absz=abs(z); proc rank data=dtset out=rank; var absz; ranks zrank; run; ******************************************************* **calculate sum of signed ranks for all observations ** *******************************************************; data sign sign2; set rank end=eof; by id; retain sumrank n 0; if z lt 0 then signrank=-1.0*zrank; if z gt 0 then signrank=zrank; sumrank=sumrank+signrank; one=1; n=n+1; output sign; if eof then output sign2; run; proc sort data=sign; by id; run; ******************************************************* ** calculate sum of signed ranks within each cluster ** *******************************************************; data ranksum; set sign; by id; retain g sumidrank 0; if (last.id le 1) then do; sumidrank=sumidrank+signrank; g=g+1; end; if (last.id eq 1) then do; output; sumidrank=0; g=0; end; run; data sum sum2; set ranksum end=eof; by id; retain sumsq sumi sumsqi m 0; sumsq=sumsq+sumidrank*sumidrank; meansumrank=sumidrank/g; sumi=sumi+g; sumsqi=sumsqi+g**2; m=m+1; output sum; if eof then output sum2; run; ******************************************************* ** calculate intraclass correlation between signed ** ** ranks within the same cluster ** *******************************************************; proc glm data=sign outstat=getro noprint; class id; model signrank=id; run; data glmout; set getro end=eof; retain errordf modeldf errorss modelss; if _TYPE_='ERROR' then do; errordf=df; errorss=ss; end; if _TYPE_='SS1' then do; modeldf=df; modelss=ss; end; if eof then output; run; data ro; merge sign2 sum2 end=eof; by one; if _n_=1 then set glmout; retain errordf errorss modeldf modelss; m0=(sumi-(sumsqi/sumi))/(m-1); totalss=errorss+modelss; totaldf=errordf+modeldf; wthms=errorss/errordf; betms=modelss/modeldf; vars=totalss/totaldf; s2b=(betms-wthms)/m0; s2w=wthms; rosglm=s2b/(s2b+s2w); if rosglm lt 0 then rosglm=0; ros=rosglm; roscor=ros*(1+(1-ros**2)/(m-2.5)); if roscor gt 1.0 then roscor=1.0; keep one vars roscor; run; ******************************************************* ** calculate weight (wi) for each cluster ** *******************************************************; data finala finalb; merge sum ro end=eof; by one; retain T_cs sqweightsum 0; wi=g/(vars*(1+(g-1)*roscor)); T_cs=T_cs+meansumrank*wi; sqweightsum=sqweightsum+(wi**2*meansumrank**2); output finala; Var_t=sqweightsum; W_cs=T_cs/(sqrt(Var_t)); P_val=2*(1-probnorm(abs(W_cs))); if eof then output finalb; label P_val = 'P-value' n = 'Total Number of Observations' m = 'Total Number of Clusters'; proc print label data=finalb; var T_cs Var_t W_cs P_val m n; run; %mend unbalanced; %*clusignrank(data=balanced.dat);