CluswilcoxPower_sas

/************************************************************************* * Program Name: CluswilcxPowerNtiesSubUnit.sas * * Program dir : /proj/stross/stros0c/cluster_wilcoxRankSumTest/paper2010* * Program Date: June, 2010 * * * * Note : This program is to estimate the power * * for the clustered Wilcoxon test * * (Both balanced and unbalanced design) * * * * Base on paper "Power and Sample Size Estimation for the * * Clustered Wilcoxon Rank Sum Test" by B. Rosner and R. Glynn * *************************************************************************/ options formdlim=']'; %macro CluswilcoxPower( ipmlst =, /*input sample size of control group in new study */ ipnlst =, /*input sample size of treatment group in new study */ est_mu =y, /*y: use input data to estimate theta to compute power */ /*n: use input mu to compute power */ /*combine with mu, if est_mu=n, mu should not be empty */ est_rho =y, /* y: use input data to estimate rho */ /* n: use input rho */ /* combine with rho, if est_rho=n, rho should not be empty */ dsn =, /* enter data set name for estimating rho_hat */ grp =, /* variable name for groups; assuming: */ /* value 1: control group; 2: treatment group */ varn =, /* variable name for measurement */ alpha =0.05, /*significance level 0.05 by default */ mu = , /* effect size */ rho = , /* rho for computing power */ igmin =, /* input min size of a cluster */ /* required */ igmax = /* input max size of a cluster */ /* required */ ); ********************************************* *If there is no input data then go to * *power calculation part directly * *********************************************; %if &dsn=%str() %then %do; %if &mu^=%str() and &rho^=%str() %then %do; %if &igmin^=%str() and &igmax^=%str() %then %do; %let ngmin=&igmin; %let ngmax=&igmax; data new; %let count=1; %do i=&ngmin %to &ngmax; %let ipn&i=%scan(&ipnlst,&count,%str( )); %let ipm&i=%scan(&ipmlst,&count,%str( )); ipn&i=&&ipn&i; ipm&i=&&ipm&i; %let count=%eval(&count+1); %end; run; %goto cmptpower; %end; %else %goto out4; %end; %else %goto out3; %end; ************************************************************ * determine if pilot data has 2 groups * ***********************************************************; proc sql noprint; select count(distinct &grp) into :ngrp from &dsn; quit; %if &ngrp^=2 %then %goto out2; proc sort data=&dsn; by id; run; data person; set &dsn;; by id; retain g 0; if first.id then g=0; g=g+1; if last.id; run; proc sql noprint; select max(g), min(g) into :gmax, :gmin from person; quit; %if &igmin^=%str() and &igmax^=%str() %then %do; %let ngmin=&igmin; %let ngmax=&igmax; %let diff=%eval(&igmax-&igmin+1); %end; %else %do; %goto out6; %end; %numvar(&ipnlst); %if &diff^=&nvar %then %goto out5; ******************************************** * check the input data for new study * ********************************************; data new; %let count=1; %do i=&ngmin %to &ngmax; %let ipn&i=%scan(&ipnlst,&count,%str( )); %let ipm&i=%scan(&ipmlst,&count,%str( )); ipn&i=&&ipn&i; ipm&i=&&ipm&i; %let count=%eval(&count+1); %end; run; /* read in input m and n for each subunit size */ proc sort data=person; by &grp id; run; data getmn; set person end=eof;; by &grp id; %do i=&gmin %to &gmax; retain m&i n&i 0; %end; %do i=&gmin %to &gmax; if &grp=1 and g=&i then m&i=m&i+1; else if &grp=2 and g=&i then n&i=n&i+1; if eof then do; call symput("m&i", trim(left(m&i))); call symput("n&i", trim(left(n&i))); end; %end; one=1; if eof; run; proc sort data=person out=person(keep=id g); by id; run; data temp; merge &dsn(in=a) person; by id; if a; run; proc sort data=temp; by g; run; proc rank data=temp out=rank; by g; var &varn; ranks rscore; run; proc sort data=rank; by id; run; /* estimate of theta_i */ %if %upcase(&est_mu)=Y %then %do; data gettheta_i sumw; set rank end=eof; by id; %let keep=; %do i=&gmin %to &gmax; retain u&i r&i clust&i 0; if &grp=1 and g=&i then u&i=u&i+rscore; if first.id then r&i=0; if g=&i then do; if last.id then clust&i=clust&i+1; r&i=r&i+rscore; end; %let keep=&keep r&i u&i clust&i; %end; one=1; if last.id then output sumw; keep &keep one g; if eof then output gettheta_i; run; proc sort data=sumw; by g; run; /* get wi */ data getwi; set sumw end=eof; by g; %do i=&gmin %to &gmax; %let Ng&i=%eval(&&m&i+&&n&i); wi_part1=1/(&&Ng&i*(&&Ng&i-1)*(&i**4*&&m&i*&&n&i)); retain wi_part2 0; if first.g then wi_part2=0; %do j=1 %to &&Ng&i; if clust&i=&j and r&i^=0 then do; x&i=(r&i-&i*(1+&i*(&&Ng&i))/2)**2; wi_part2=wi_part2+x&i; end; %end; if last.g and g=&i then invw&i=wi_part1*wi_part2; %end; if last.g; run; data getwi; set getwi end=eof; one=1; %do i=&gmin %to &gmax; lginvw&i=lag(invw&i); if invw&i=. then invw&i=lginvw&i; %end; if eof; run; data gettheta_i; merge gettheta_i getwi getmn; retain sumw sumwtheta 0; %do i=&gmin %to &gmax; deno&i=n&i*&i*m&i*&i; theta&i=(n&i*m&i*&i**2-(u&i-m&i*&i*(m&i*&i+1)/2))/deno&i; w&i=1/invw&i; sumw=sumw+w&i; sumwtheta=sumwtheta+(w&i*theta&i); %end; theta_combine=sumwtheta/sumw; call symput('theta_est', trim(left(theta_combine))); run; %end; %else %do; %if &mu=%str() %then %goto out1; %end; /* estimation of rho_x and rho_y */ %if %upcase(&est_rho)=Y %then %do; data subgrp; set temp; if g>1; run; proc sort data=subgrp; by &grp; run; proc rank data=subgrp out=rank; by &grp; var &varn; ranks rscore; run; %est_rho(grpv=1,sunit=m,sub=x); %est_rho(grpv=2,sunit=n,sub=y); data overall; merge getrhox getrhoy; z_overall=((w_x*zx)+(w_y*zy))/(w_x+w_y); rho_overall=(exp(2*z_overall)-1)/(exp(2*z_overall)+1); call symput('rho_est', trim(left(rho_overall))); run; /* get var0theta */ data getvar0theta; set overall(keep=rho_overall); pi=3.14159; totw0g=0; %do i=&gmin %to &gmax; iw0g&i.1=2*(&i-1)*arsin((1+rho_overall)/2)/(2*pi); iw0g&i.2=(&i-1)**2*arsin(rho_overall)/(2*pi); iw0g&i.3=(&&ipn&i+&&ipm&i-2)*&i*(1/12+(&i-1)*arsin(rho_overall/2)/(2*pi)); iw0g&i=(1/4+iw0g&i.1+iw0g&i.2+iw0g&i.3)/(&&ipn&i*&&ipm&i*&i**2); w0g&i=1/iw0g&i; w0g&i.sq=w0g&i**2; totw0g=totw0g+w0g&i; drop iw0g&i.1 iw0g&i.2 iw0g&i.3 iw0g&i;; %end; var0theta=1/totw0g; run; %end; %else %do; %if &rho=%str() %then %goto out7; %end; /* get var1theta */ /* use pilot data to estimate theta (theta_combine) */ /* and compute var1theta */ %if %upcase(&est_mu)=Y & %upcase(&est_rho)=Y %then %do; data getvar1theta; merge overall(keep=rho_overall) gettheta_i(keep=theta_combine) getvar0theta; totwg=0; c1=probit(theta_combine); c2=probbnrm(c1,c1,0.5); %do i=&gmin %to &gmax; iw1g&i.1=2*(&i-1)*(probbnrm(c1,c1,(1+rho_overall)/2)-theta_combine**2); iw1g&i.2=(&i-1)**2*(probbnrm(c1,c1,rho_overall)-theta_combine**2); iw1g&i.3=&i*(probbnrm(c1,c1,0.5)-theta_combine**2+(&i-1)*(probbnrm(c1,c1,rho_overall/2)-theta_combine**2)); iw1g&i=(theta_combine*(1-theta_combine)+iw1g&i.1+iw1g&i.2+(&&ipm&i+&&ipn&i-2)*iw1g&i.3)/(&&ipn&i*&&ipm&i*&i**2); w1g&i=1/iw1g&i; totwg=totwg+(w0g&i.sq/w1g&i); %end; var1theta=totwg/(totw0g**2); run; %end; %else %if %upcase(&est_mu)=N and %upcase(&est_rho)=Y %then %do; data getvar1theta; merge overall(keep=rho_overall) getvar0theta; mu = μ theta_combine = probnorm(&mu/sqrt(2)); totwg=0; c1=probit(theta_combine); c2=probbnrm(c1,c1,0.5); %do i=&gmin %to &gmax; iw1g&i.1=2*(&i-1)*(probbnrm(c1,c1,(1+rho_overall)/2)-theta_combine**2); iw1g&i.2=(&i-1)**2*(probbnrm(c1,c1,rho_overall)-theta_combine**2); iw1g&i.3=&i*(probbnrm(c1,c1,0.5)-theta_combine**2+(&i-1)*(probbnrm(c1,c1,rho_overall/2)-theta_combine**2)); iw1g&i=(theta_combine*(1-theta_combine)+iw1g&i.1+iw1g&i.2+(&&ipm&i+&&ipn&i-2)*iw1g&i.3)/(&&ipn&i*&&ipm&i*&i**2); w1g&i=1/iw1g&i; totwg=totwg+(w0g&i.sq/w1g&i); %end; var1theta=totwg/(totw0g**2); run; %end; %goto power; %cmptpower: %gettheta; %goto power; %power: data power; set getvar1theta; z=probit(1-&alpha/2); sd0=sqrt(var0theta); sd1=sqrt(var1theta); power1=probnorm(abs(theta_combine-0.5)/sd1-z*(sd0/sd1)); power2=probnorm(-1*abs(theta_combine-0.5)/sd1-z*(sd0/sd1)); power =power1+power2; run; /* make a output data */ data power; merge power new; run; /* output the results */ %let tngmin=%trim(%left(&ngmin)); %let tngmax=%trim(%left(&ngmax)); %if &dsn^=%str() %then %do; %let tgmin=%trim(%left(&gmin)); %let tgmax=%trim(%left(&gmax)); title1 "Previous study "; title2 "Number of clusters of size &tgmin - &tgmax..."; title3 "m&tgmin=clusters of size &tgmin (control group)..."; proc print data=getmn noobs; var m&tgmin-m&tgmax; run; title1 "Previous study "; title2 "Number of clusters of size &tgmin - &tgmax..."; title3 "n&tgmin=clusters of size &tgmin (active treatment group)..."; proc print data=getmn noobs; var n&tgmin-n&tgmax; run; title1 "Estimations from Previous study "; proc print data=getvar1theta noobs; %if %upcase(&est_mu)=Y and %upcase(&est_rho)=Y %then %do; var theta_combine rho_overall; %end; %else %if %upcase(&est_mu)=Y and %upcase(&est_rho)=N %then %do;; var theta_combine ; %end; %else %if %upcase(&est_mu)=N and %upcase(&est_rho)=Y %then %do; var rho_overall; %end; run; %end; title1 "New study "; title2 "Number of clusters of size 1,2,3..."; title3 "ipm1=clusters of size 1 (control group), ipm2=clusters of size 2 (control group)..."; title4 "ipn1=clusters of size 1 (active treatment group)..."; proc print data=new noobs; var ipm&tngmin-ipm&tngmax ipn&tngmin-ipn&tngmax; run; title1 "New study"; proc print data=power; var w0g&tngmin-w0g&tngmax var0theta w1g&tngmin-w1g&tngmax var1theta power; run; %goto out; **** Error Message ***; %out1: %err_msg(1); %goto out; %out2: %err_msg(2); %goto out; %out3: %err_msg(3); %goto out; %out4: %err_msg(4); %goto out; %out5: %err_msg(5); %goto out; %out6: %err_msg(6); %goto out; %out7: %err_msg(7); %goto out; %out: %mend CluswilcoxPower; %macro err_msg(num); %if &num=1 %then %do; %put ****************ERROR IN SAS MACRO CluswilcxPower *****************; %put ****************Please set value for macro variable mu *****************; %end; %else %if &num=2 %then %do; %put ****************ERROR IN SAS MACRO CluswilcxPower *****************; %put ****************Please check # of groups in input dataset &dsn *****************; %put ****************The macro requires 2 groups *****************; %end; %else %if &num=3 %then %do; %put ****************ERROR IN SAS MACRO CluswilcxPower *****************; %put ****************There is no input data *****************; %put ****************Please specify mu and rho *****************; %end; %else %if &num=4 %then %do; %put ****************ERROR IN SAS MACRO CluswilcxPower *****************; %put ****************please specify igmin and igmax *****************; %end; %else %if &num=5 %then %do; %put ****************ERROR IN SAS MACRO CluswilcxPower *****************; %put ****************please check input variables ipmlst,ipnlst,igmin,igmax *****************; %put ****************# of cluster size does not match *****************; %end; %else %if &num=6 %then %do; %put ****************ERROR IN SAS MACRO CluswilcxPower *****************; %put ****************please input values for igmin,igmax *****************; %end; %else %if &num=7 %then %do; %put ****************ERROR IN SAS MACRO CluswilcxPower *****************; %put ****************please input values for rho when est_rho=n *****************; %end; %mend err_msg; %macro numvar(list); %global nvar; %let k=1; %let nvar=1; %let cvar = %scan(&list, &k); %do %while(&cvar ne); %let nvar=%eval(&nvar+1); %let k=%eval(&k+1); %let cvar=%scan(&list, &k); %end; %let nvar=%eval(&nvar-1); %mend numvar; %macro gettheta; data getvar0theta; pi=3.14159; totw0g=0; %if &rho^=%str() %then %do; rho_overall=ρ %end; %else %do; rho_overall=&rho_est; %end; %do i=&ngmin %to &ngmax; iw0g&i.1=2*(&i-1)*arsin((1+rho_overall)/2)/(2*pi); iw0g&i.2=(&i-1)**2*arsin(rho_overall)/(2*pi); iw0g&i.3=(&&ipn&i+&&ipm&i-2)*&i*(1/12+(&i-1)*arsin(rho_overall/2)/(2*pi)); iw0g&i=(1/4+iw0g&i.1+iw0g&i.2+iw0g&i.3)/(&&ipn&i*&&ipm&i*&i**2); w0g&i=1/iw0g&i; w0g&i.sq=w0g&i**2; totw0g=totw0g+w0g&i; drop iw0g&i.1 iw0g&i.2 iw0g&i.3 iw0g&i;; %end; var0theta=1/totw0g; run; /* get var1theta */ /* use mu and rho */ /* and compute var1theta */ data getvar1theta; set getvar0theta; %if &mu ^= %str() %then %do; mu = μ theta_combine = probnorm(&mu/sqrt(2)); %end; %else %do; theta_combine=&theta_est; %end; totwg=0; c1=probit(theta_combine); c2=probbnrm(c1,c1,0.5); %do i=&ngmin %to &ngmax; iw1g&i.1=2*(&i-1)*(probbnrm(c1,c1,(1+rho_overall)/2)-theta_combine**2); iw1g&i.2=(&i-1)**2*(probbnrm(c1,c1,rho_overall)-theta_combine**2); iw1g&i.3=&i*(probbnrm(c1,c1,0.5)-theta_combine**2+(&i-1)*(probbnrm(c1,c1,rho_overall/2)-theta_combine**2)); iw1g&i=(theta_combine*(1-theta_combine)+iw1g&i.1+iw1g&i.2+(&&ipm&i+&&ipn&i-2)*iw1g&i.3)/(&&ipn&i*&&ipm&i*&i**2); w1g&i=1/iw1g&i; totwg=totwg+(w0g&i.sq/w1g&i); %end; var1theta=totwg/(totw0g**2); run; %mend gettheta; %macro est_rho(grpv,sunit,sub); data gettot; retain totobs 0; %do i=2 %to &gmax; totobs=totobs+&&&sunit&i*&i; %end; run; data tempn; if _n_=1 then set gettot; set rank; if &grp=&grpv; if &grp=&grpv then pscore=probit(rscore/(totobs+1)); run; proc glm data=tempn noprint outstat=glm_out(keep=_TYPE_ DF SS); class id; model pscore=id; run; data glmout; set glm_out end=eof; retain modelms errms modeldf errdf modelss errss; if _TYPE_='ERROR' then do; errss=ss; errdf=df; errms=errss/errdf; end; if _TYPE_='SS1' then do; modelss=ss; modeldf=df; modelms=modelss/modeldf; end; if eof; run; data getn0; nobs =0; nobs2=0; tot =0; %do i=2 %to &gmax; nobs=nobs+&&&sunit&i*&i; nobs2=nobs2+(&&&sunit&i)*(&i**2); tot=tot+&&&sunit&i; %end; n0=(nobs-nobs2/nobs)/(tot-1); run; data getrho⊂ set glmout; if _n_=1 then set getn0; sigsqa&sub=(modelms-errms)/n0; rho_hat&sub=sigsqa&sub/(sigsqa&sub+errms); var_rho&sub=(2*(1-rho_hat&sub)**2*(1+(n0-1)*rho_hat&sub)**2)/(n0*(n0-1)*(tot-1)); z&sub =0.5*log((1+rho_hat&sub)/(1-rho_hat&sub)); var_z&sub=(1/(1-rho_hat&sub**2))**2*var_rho⊂ w_&sub=1/var_z⊂ one=1; run; %mend est_rho;