WilcxPowerWties_RawData.sas

/***********************************************************

* Program Name: WilcxPowerWties_RawData.sas *

* Program dir : /udd/nhron/macrodevelopment/WilcxRank/ *

* Program Date: Dec., 2007 *

* *

* Note : This program is to estimate the power *

* for grouped continuous random variables *

* with ties *

* Base on paper "Power and Sample Size Estimation for the *

* Wilcoxon Rank Sum Test" by B. Rosner and R. Glynn*

* Programmed by: Rong Chen and Marion McPhee *

***********************************************************/

%macro WilcxPowerWties_RawData (dsn=, /* input data */

mainvar=, /*variable of interest */

refgrp =, /* reference group; 1: reference group; 2: comparison group */

m=, /* sample size of one group */

n=, /* sample size of comparison group */

alpha=, /* significance level */

mu= /* effect size */);

/* if mu is given then use mu to compute theta and theta */

%if &mu ne %then %do;

data _null_;

t=probnorm(&mu/sqrt(2));

call symput("theta", trim(left(t)));

run;

%end;

/* estimate theta_star from the observed data */

%if &dsn=%str() %then %goto out1 ;

/* use observed data to estimate theta_star */

proc sort data=&dsn;

by &mainvar;

run;

data test(keep=&mainvar) ;

set &dsn;;

by &mainvar;

if last.&mainvar;

run;

/* get total # of reference group and comparison group */

proc freq data=&dsn noprint;

tables &refgrp/out=d;

run;

data _null_;

set d;

if &refgrp=1 then do;

call symput("refn", trim(left(count)));

end;

if &refgrp=2 then do;

call symput("campn", trim(left(count)));

end;

run;

%put &refn;

%put &campn;

data trt1 trt2;

set &dsn;

if &refgrp=1 then output trt1;

if &refgrp=2 then output trt2;

run;

proc sort data=trt1;

by &mainvar;

run;

data trt1;

merge test(in=a) trt1(in=b);

by &mainvar;

if b;

run;

proc freq data=trt1 noprint;

tables &mainvar/out=d1(keep=&mainvar

percent);

run;

proc sort data=trt2;

by &mainvar;

run;

data trt2;

merge test(in=a) trt2(in=b);

by &mainvar;

if b;

run;

proc freq data=trt2 noprint;

tables &mainvar/out=d2(keep=&mainvar

percent);

run;

data final;

merge test(in=a) d1(rename=(percent=p1)) d2(rename=(percent=p2));

by &mainvar;

if a;

if p1=. then p1=0;

else if p2=. then p2=0;

run;

data _null_;

set final end=eof;

if eof then do;

call symput('tot', trim(left(_n_)));

end;

run;

%let count=1;

%do %while(&count <=&tot);

data refgrp(keep=p x);

set final;

if _n_=&count;

p=p1;

x=&mainvar;

run;

data temp;

set final end=eof;

if _n_=1 then set refgrp;

retain usum usum&count 0;

/* calculation of u function */

uf=0;

if x>&mainvar then uf=0;

else if x=&mainvar then uf=0.5*(p/100)*(p2/100);

else if x<=&mainvar then uf=1*(p/100)*(p2/100);

usum=usum+uf;

if eof then do;

usum&count=usum;

end;

if eof;

run;

%if &count=1 %then %do;

data finl(keep=usum&count);

set temp;

run;

%end;

%else %do;

data finl;

merge finl temp(keep=usum&count);

run;

%end;

%let count=%eval(&count+1);

%end;

data finl;

set finl;

theta_star=sum(of usum1 - usum&tot);

call symput('theta_star', trim(left(theta_star)));

run;

/* read in data for estimating hx*/

proc transpose data=trt1 out=temp1;

var &mainvar;

run;

data temp;

set temp1;

total =round(sum(of col1- col&refn),1);

hx0=-10.0;

hx%eval(&refn+1)=10.0;

run;

data temp;

set temp;

/* get Hx */

array x{*} col1 - col&refn;

array px{*} px1 - px&refn;

array cdf{*} cdf1- cdf&refn;

array hx{*} hx0 - hx%eval(&refn+1);

do j=1 to dim(px);

px{j}=x{j}/total;

end;

do j=1 to %eval(&refn);

cdf{j}=0;

do l=1 to j;

cdf{j}=cdf{j}+px{l};

end;

jj=j+1;

if cdf{j} >1 then cdf{j}=0.99;

hx{jj}=probit(abs(cdf{j}));

end;

/* use the user defiend mu */

%if &mu^=%str() %then %do;

suma=0;

sumb=0;

do j=1 to &refn;

jj=j+1;

a=hx(jj-1)-sqrt(2)*probit(&theta);

b=hx(jj)-sqrt(2)*probit(&theta);

suma=sum(suma,probnorm(a)*probnorm(hx(jj)));

sumb=sum(sumb,probnorm(b)*probnorm(hx(jj-1)));

end;

theta_star=(1.0-suma+sumb)/2;

theta=&theta;

/* in this situation mu_est=mu */

mu_est=&mu;

%let mu_est=&mu;

%end;

/* calculation of theta_star */

/* use observed data to estimate theta_star */

%if &dsn^=%str() and &mu=%str() %then %do;

theta_star=&theta_star;

/* make it an macro for estimating theta */

theta0=theta_star;

diff=0.1;

count=1;

do while(diff > 0.00001);

suma=0;

sumb=0;

do j=1 to &refn;

jj=j+1;

a=hx(jj-1)-sqrt(2)*probit(theta0);

b=hx(jj)-sqrt(2)*probit(theta0);

suma=suma+probnorm(a)*probnorm(hx(jj));

sumb=sumb+probnorm(b)*probnorm(hx(jj-1));

end;

ftheta0=1.0-suma+sumb-2*theta_star;

/*calculation of dftheta0 */

z=sqrt(2)*probit(theta0);

yt=sqrt(2)*exp(0.5*(probit(theta0)**2));

sumc=0;

sumd=0;

do j=1 to &refn;

jj=j+1;

c=exp((-0.5)*((hx(jj-1)-z)**2));

d=exp((-0.5)*((hx(jj)-z)**2));

sumc=sumc+c*probnorm(hx(jj));

sumd=sumd+d*probnorm(hx(jj-1));

end;

dftheta0=yt*(sumc-sumd);

theta=theta0-(ftheta0/dftheta0);

diff=abs(theta-theta0);

/* iteration */

if diff>0.00001 then do;

theta0=theta;

end;

count+1;

if count>20 then do;

%err_msg(2);

end;

end;

/* get muest from theta */

mu_est=sqrt(2)*probit(theta);

call symput('muest',trim(left(mu_est)));

%end;

/*compute var(Uik_star), cov(uik1_star,uik2_star), cov(ui1k_star, ui2k_star) */

euik_star=0;

vuik_star1=0;

euik1uik2=0;

covuik1uik2=0;

eui1kui2k=0;

covui1kui2k=0;

part1=0;

part2=0;

part3=0;

do j=1 to &refn;

jj=j+1;

euik_star1=probnorm(hx(jj))-probnorm(hx(jj-1));

euik_star2=1-probnorm(hx(jj)-mu_est)+0.25*(probnorm(hx(jj)-mu_est)-probnorm(hx(jj-1)-mu_est));

euik_star=euik_star+euik_star1*euik_star2;

vuik_star1=vuik_star1+(euik_star1)*(1-0.5*(probnorm(hx(jj)-mu_est)+probnorm(hx(jj-1)-mu_est)));

euik1uik2=euik1uik2+euik_star1*(1-0.5*(probnorm(hx(jj)-mu_est)+probnorm(hx(jj-1)-mu_est)))**2;

eui1kui2k=eui1kui2k+(probnorm(hx(jj)-mu_est)-probnorm(hx(jj-1)-mu_est))*((probnorm(hx(jj))+probnorm(hx(jj-1)))/2)**2;

part1=part1+(euik_star1)*(1-probnorm(hx(jj)));

part2=part2+(probnorm(hx(jj))-probnorm(hx(jj-1)))**2;

part3=part3+(euik_star1)*((probnorm(hx(jj))+ probnorm(hx(jj-1)))**2-1)/4;

end;

/* var(Uik) */

vuik_star=euik_star-vuik_star1**2;

/* cov(Uik1,Uik2) */

covuik1uik2=euik1uik2-vuik_star1**2;

/* cov(Ui1k, Ui2k) */

covui1kui2k=eui1kui2k-vuik_star1**2;

/* var1thta_star */

var1_thetastar=(vuik_star+(&n-1)*covuik1uik2+(&m-1)*covui1kui2k)/(&m*&n);

/* var0theta_star */

var0_thetastar=(part1+(part2-1)/4+(&m+&n-2)*part3)/(&m*&n);

power1a=abs(theta_star-0.5)/sqrt(var1_thetastar)-1.96*sqrt(var0_thetastar/var1_thetastar);

power1b=-1.0*abs(theta_star-0.5)/sqrt(var1_thetastar)-1.96*sqrt(var0_thetastar/var1_thetastar);

power=probnorm(power1a)+probnorm(power1b);;

output;

run;

title;

%if &mu^=%str() %then %do;

title "Power Estimation for Sample1 (&m) and Sample2 (&n) with mu=&mu";

proc print noobs;

var theta_star theta var1_thetastar var0_thetastar power;

run;

%end;

%else %if &dsn^=%str() and &mu=%str() %then %do;

title "Power Estimation for Sample1 (&m) and Sample2 (&n) with mu_est=&muest";

proc print noobs;

var theta_star ftheta0 dftheta0 theta count var1_thetastar var0_thetastar power;

run;

%end;

%goto out;

**** Error Message ***;

%out1: %err_msg(1);

%goto out;

%out:

%mend WilcxPowerWties_RawData;

%macro err_msg(num);

%if &num=1 %then %do;

%put ****************ERROR IN SAS MACRO WilcxPowerContinuousNTies ****************;

%put ****************Please define macro variables observed data set name *****************;

%end;

%else %if &num=2 %then %do;

%put ****************ERROR IN SAS MACRO WilcxPowerContinuousNTies****************;

%put ****************program goes through 20 iterations data does not converge ****************;

%end;

%mend err_msg;

1