cluswilcox_subunit.sas

/***************************************************************************** ** PROGRAM: cluswilcox_subunitspec.sas ** ** PURPOSE: Incorporating clustering effects for the Wilcoxon Rank Sum ** Test where the cluster is the unit of randomization and the ** subunit is the unit of analysis and where group assignment ** is at the subunit level. The methods are for either balanced ** or unbalanced designs. i.e., clusters may have the same or ** different number of subunits. ** ** REQUIREMENT: An ASCII data file with 3 variables per record. The data file ** does not have to be sorted in ID order. ** The 3 variables need to be space delimited and arranged in ** the following order: ** 1. id ** 2. score ** 3. exposed or not : 1 for yes 0 for no. ** ** MACRO VARIABLE: ** 1. name of data set. ** ** USAGE: Enter "%cluswilcox_subunitspec(data=sample.dat);" where ** sample.dat has the format described above. ** ** REMARKS: The program will determine if the dataset is balanced or ** unbalanced and will call the macro balanced for a balanced ** design and unbalanced for an unbalanced design. ********************************************************************************/ %macro cluswilcox_subunitspec(data=); options nosource nocenter nodate nonumber formdlim=' '; options ls=96; filename readin "&data"; data dtset; infile readin; input id score exposed; if score ne .; proc sort; by id; run; data count; set dtset; by id; retain g 0; if (last.id le 1) then do; g=g+1; end; if (last.id eq 1) then do; output; g=0; end; run; data _N_; set count end=eof; by id; retain gmax n 0; if (g>gmax) then gmax=g; n = n + 1; if eof then call symput("gmax",trim(left(gmax))); if eof then call symput("n",trim(left(n))); run; ************************************************************** **determine if each cluster has same number of observations ** **************************************************************; data exec; set count end=eof; retain balance 0; lagg=lag(g); diff=lagg-g; if _N_=1 then diff=0; if diff ne 0 then balance=balance+1; if eof; if balance=0 then call execute('%balanced'); else call execute('%unbalanced'); run; %mend; ******************************************************** ******************************************************** ** balanced design macro ** ******************************************************** ********************************************************; %macro balanced; ******************************************************** ** rank data ** ********************************************************; data dtset; set dtset; proc rank out=rank; var score; ranks rank; run; proc sort data=rank; by id; run; data rank; set rank; by id; retain wc 0; wc = wc + rank*exposed; run; data rank(drop=meanRi) sumgq; set rank; by id; one=1; retain g q Ri meanRi 0; if first.id then do; g=0; q=0; Ri=0; meanRi=0; end; if (last.id le 1) then do; g = g + 1; if exposed = 1 then q = q + 1; Ri=Ri+rank; meanRi=Ri/g; end; output rank; if last.id then output sumgq; run; data getnq; set sumgq end=eof; by id; %do i=0 %to &gmax; retain n&i g&i 0; if g=&i then g&i = g&i + 1; if q=&i then n&i = n&i + 1; %end; one=1; if eof; drop Ri exposed g q id meanRi rank score wc; run; data getnq; set getnq; retain sumqNq sumq2Nq 0; %do j=1 %to &gmax; sumqNq = sumqNq + &j*n&j; %end; %do j=1 %to &gmax; sumq2Nq = sumq2Nq + &j**2*n&j; %end; run; data getsb; merge getnq sumgq end=eof; by one; EWc = ( (g*&n+1)/2 ) * sumqNq; varQ = ( sumq2Nq - sumqNq**2/&n ) /&n; retain sumssb 0; sumssb=sumssb + ( Ri - g*(g*&n+1)/2 )**2 ; ssb = sumssb / &n; if eof; run; data meanRi(keep=id meanRi); set sumgq; run; data getsw; merge rank meanRi sumgq end=eof; by id; retain sumssw ssw 0; sumssw=sumssw + (rank - meanRi)**2; ssw= sumssw / (&n*(g-1)); if eof; run; data getsw; merge getsw getsb getnq; by one; retain varb 0; %do j=1 %to &gmax; varb = varb + &j*(g-&j)*n&j*ssw/g; %end; label wc = 'Subunit-specific Clustered Wilcoxon RankSum Statistic' EWc = 'Expected Value of Subunit-specific Clustered Wilcoxon RankSum Statistic' stwc = 'Standard Deviation of Subunit-specific Clustered Wilcoxon Ranksum Statistic' zstat = 'Z statistic for Subunit-specific Clustered Wilcoxon RankSum Statistic' pvalue = 'P-value for Clustered Wilcoxon RankSum Z Statisic'; esswg = varb/&n; varwc = &n * ( &n/(&n-1)*varQ*ssb/g**2 + esswg ); stwc = sqrt(varwc); zstat = (wc - EWc)/stwc; pvalue =2*(1-probnorm(abs(zstat))); n=&n; proc print data=getsw noobs; title "Total number of clusters"; var n; run; proc print data=getsw noobs; title1 " "; title2 "Numbers of clusters of size 1,2,3..."; title3 "g1=clusters with 1 subunit 1, g2=clusters with 2 subunits..."; var g1-g&gmax; run; proc print data=getsw noobs; title1 " "; title2 "Number of clusters of exposed subunit Size 1,2 3..."; title3 "n0=clusters with 0 exposed subnits,"; title4 "n1=clusters with 1 exposed subunit 1,"; title5 "n2=clusters with 2 exposed subunits..."; var n0-n&gmax; run; proc print label data=getsw noobs; title1 " "; title2 "Subunitspecific clustered Wilcoxon RankSum Test"; title3 "Balanced design"; title4 "==============="; var wc EWc stwc; run; proc print label data=getsw noobs; title1 " "; title2 "Subunitspecific clustered Wilcoxon RankSum Test"; title3 "Balanced design"; title4 "==============="; var zstat pvalue; run; %mend balanced; ******************************************************** ******************************************************** ** unbalanced design macro ** ******************************************************** ********************************************************; %macro unbalanced; data gq sumgq(rename=(g=gsum q=qsum) drop=exposed score); set dtset; by id; one=1; retain g q 0; if first.id then do; g=0; q=0; end; if (last.id le 1) then do; g = g + 1; if exposed = 1 then q = q + 1; end; output gq; if last.id then output sumgq; run; data getng(drop=id gsum qsum); set sumgq end=eof; by id; %do i=1 %to &gmax; retain n&i qplus&i 0; if gsum=&i then do; n&i = n&i + 1; qplus&i = qplus&i + qsum; end; %end; %do i=1 %to &gmax; retain gNg&i deno&i 0; if n&i ne 0 then do; gNg&i = &i*n&i; deno&i = &i*n&i + 1; end; %end; if eof; run; /*numbers calculated in prtnq are for printing out a matrix of exposed subunits by cluster size*/ data prtnq; set sumgq end=eof; by id; label cluster='Cluster Size'; %do i=0 %to &gmax; %do j=0 %to &gmax; if gsum=&i then do; retain n&i.q&j nq&i 0; if qsum=&j then n&i.q&j=n&i.q&j + 1; end; nq&j=n&i.q&j; cluster=&i; keep cluster nq&j; %end; if eof then output; %end; run; data gq; merge sumgq gq; by id; proc sort data=gq; by gsum; run; proc rank out=rank; by gsum; var score; ranks rank; run; data rank; set rank; proc sort data=rank; by gsum; run; proc means noprint; var rank; by gsum; output out=meanrank mean=meanrank; run; data rank; merge rank meanrank; by gsum; run; data varr; merge rank getng end=eof; by one; %do i=1 %to &gmax; retain sumvarr&i varr&i 0; if gsum=&i then sumvarr&i= sumvarr&i + (rank-meanrank)**2; if gNg&i ne 0 then varr&i = sumvarr&i / gNg&i; %end; if eof; run; data rank; merge rank getng; by one; %do i=1 %to &gmax; retain wc&i 0; if gsum=&i then wc&i=wc&i+rank*exposed; %end; %do i=1 %to &gmax; if gsum=&i then pscore=probit(rank/deno&i); %end; proc glm data=rank outstat=intra noprint; class id; model pscore=id; run; data glmout; set intra end=eof; retain modelms errorms modeldf modelss errordf errorss; if _TYPE_='ERROR' then do; errorss=ss; errordf=df; errorms=errorss/errordf; end; if _TYPE_='SS1' then do; modeldf=df; modelss=ss; modelms=modelss/modeldf; end; one=1; if eof; run; data gettheta; set rank end=eof; if eof; %do i=1 %to &gmax; retain uc&i theta&i 0; if wc&i ne 0 then do; uc&i=wc&i - qplus&i*(qplus&i+1)/2; theta&i=uc&i / (qplus&i*(gNg&i - qplus&i) ); end; %end; run; data getng; merge getng glmout; by one; retain sumn sumnsq 0; %do i=1 %to &gmax; sumn = sumn + n&i*&i; sumnsq = sumnsq + n&i*&i**2; %end; g0 = (sumn-sumnsq/sumn) / (&n-1); sigsqa=(modelms-errorms)/g0; rhoh = sigsqa / ( sigsqa+errorms ); if rhoh < 0 then rhoh=0.0; rhoh = rhoh*( 1 + (1-rhoh**2)/(2*(&n-4)) ); pi=3.1415926; rhor = (6/pi) * arsin(rhoh/2); run; data getss; merge getng varr(drop=_TYPE_); by one; %do i=1 %to &gmax; retain ssb&i ssw&i 0; ssb&i = &i*varr&i*( 1+(&i-1)*rhor ); ssw&i = varr&i*(1-rhor); %end; run; data gete; merge sumgq getng end=eof; by one; %do i=1 %to &gmax; retain sume&i e&i 0; if gsum=&i then do; sume&i = sume&i + qsum*(gsum-qsum); e&i = sume&i/n&i; end; %end; if eof; run; data varq; merge sumgq getng end=eof; by one; %do i=1 %to &gmax; retain sumq&i sumqsq&i 0; if gsum=&i then sumq&i = sumq&i+qsum; if n&i ne 0 then sumqsq&i = sumq&i**2 / n&i; %end; %do i=1 %to &gmax; retain sumqq&i varq&i 0; if gsum=&i then sumqq&i = sumqq&i+qsum**2; if n&i ne 0 then varq&i = ( sumqq&i - sumqsq&i) / n&i; %end; if eof; run; data vartheta; merge varq getss gete; by one; %do i=1 %to &gmax; retain vartheta&i 0; if n&i ge 2 then vartheta&i = n&i * ( ( n&i/(n&i - 1) )* varq&i * ssb&i / &i**2 + e&i * ssw&i/&i ) / ( qplus&i*(gNg&i - qplus&i) ) **2; if n&i eq 1 then vartheta&i = n&i * ( e&i * ssw&i/&i ) / ( qplus&i*(gNg&i - qplus&i) ) **2; %end; run; data final; merge gettheta vartheta(drop=_TYPE_); by one; %do i=1 %to &gmax; retain wt&i 0; if vartheta&i ne 0 then wt&i = 1/vartheta&i; %end; retain sumwt sumwtheta 0; %do i=1 %to &gmax; if n&i ne 0 then do; sumwtheta = sumwtheta + wt&i*theta&i; sumwt =sumwt + wt&i; end; %end; label theta = 'Estimated Probability That a Random Exposed Subunit Will Have a Higher Score Than a Random Unexposed Subunit' sdtheta = 'Standard Deviation of Theta' zstat = 'Z statistic for Subunit-specific Clustered Wilcoxon RankSum Statistic' pvalue = 'P-value for Subunit-specific Clustered Wilcoxon RankSum Z Statisic'; theta = sumwtheta / sumwt; sdtheta = sqrt(1/sumwt); zstat = (theta - 1/2) / sdtheta; pvalue = 2*(1-probnorm(abs(zstat))); n=&n; proc print data=final noobs; title "Total number of clusters"; var n; run; proc print data=final noobs; title1 " "; title2 "Number of clusters of size 1,2,3..."; title3 "n1=clusters of size 1,n2=clusters of size 2..."; var n1-n&gmax; run; proc print data=final noobs; title1 " "; title2 "Number of exposed subunits over clusters of size 1,2,3..."; title3 "qplus1=Total # of exposed subunits for cluster of size 1, qplus2=Total # of exposed subunits for cluster of size 2..."; var qplus1-qplus&gmax; run; proc print label data=prtnq noobs; title1 " "; title2 "Number of exposed sbunits by cluster size"; title3 "nq0=Number of cluster with 0 exposed subunit, nq1=Number of cluster 1 exposed subnit..,among clusters of a specific size"; var cluster nq0-nq&gmax; run; proc print label data=final noobs; title1 "Subunitspecific clustered Wilcoxon RankSum Test"; title2 "Unbalanced design"; title3 "================="; var theta sdtheta zstat pvalue; run; %mend unbalanced; %*cluswilcox_subunitspec(data=bal.dat);

%*cluswilcox_subunitspec(data=unbal.dat);