WilcxPowerWties_RawData.sas
/***********************************************************
* Program Name: WilcxPowerWties_RawData.sas *
* Program dir : /udd/nhron/macrodevelopment/WilcxRank/ *
* Program Date: Dec., 2007 *
* *
* Note : This program is to estimate the power *
* for grouped continuous random variables *
* with ties *
* Base on paper "Power and Sample Size Estimation for the *
* Wilcoxon Rank Sum Test" by B. Rosner and R. Glynn*
* Programmed by: Rong Chen and Marion McPhee *
***********************************************************/
%macro WilcxPowerWties_RawData (dsn=, /* input data */
mainvar=, /*variable of interest */
refgrp =, /* reference group; 1: reference group; 2: comparison group */
m=, /* sample size of one group */
n=, /* sample size of comparison group */
alpha=, /* significance level */
mu= /* effect size */);
/* if mu is given then use mu to compute theta and theta */
%if &mu ne %then %do;
data _null_;
t=probnorm(&mu/sqrt(2));
call symput("theta", trim(left(t)));
run;
%end;
/* estimate theta_star from the observed data */
%if &dsn=%str() %then %goto out1 ;
/* use observed data to estimate theta_star */
proc sort data=&dsn;
by &mainvar;
run;
data test(keep=&mainvar) ;
set &dsn;;
by &mainvar;
if last.&mainvar;
run;
/* get total # of reference group and comparison group */
proc freq data=&dsn noprint;
tables &refgrp/out=d;
run;
data _null_;
set d;
if &refgrp=1 then do;
call symput("refn", trim(left(count)));
end;
if &refgrp=2 then do;
call symput("campn", trim(left(count)));
end;
run;
%put &refn;
%put &campn;
data trt1 trt2;
set &dsn;
if &refgrp=1 then output trt1;
if &refgrp=2 then output trt2;
run;
proc sort data=trt1;
by &mainvar;
run;
data trt1;
merge test(in=a) trt1(in=b);
by &mainvar;
if b;
run;
proc freq data=trt1 noprint;
tables &mainvar/out=d1(keep=&mainvar
percent);
run;
proc sort data=trt2;
by &mainvar;
run;
data trt2;
merge test(in=a) trt2(in=b);
by &mainvar;
if b;
run;
proc freq data=trt2 noprint;
tables &mainvar/out=d2(keep=&mainvar
percent);
run;
data final;
merge test(in=a) d1(rename=(percent=p1)) d2(rename=(percent=p2));
by &mainvar;
if a;
if p1=. then p1=0;
else if p2=. then p2=0;
run;
data _null_;
set final end=eof;
if eof then do;
call symput('tot', trim(left(_n_)));
end;
run;
%let count=1;
%do %while(&count <=&tot);
data refgrp(keep=p x);
set final;
if _n_=&count;
p=p1;
x=&mainvar;
run;
data temp;
set final end=eof;
if _n_=1 then set refgrp;
retain usum usum&count 0;
/* calculation of u function */
uf=0;
if x>&mainvar then uf=0;
else if x=&mainvar then uf=0.5*(p/100)*(p2/100);
else if x<=&mainvar then uf=1*(p/100)*(p2/100);
usum=usum+uf;
if eof then do;
usum&count=usum;
end;
if eof;
run;
%if &count=1 %then %do;
data finl(keep=usum&count);
set temp;
run;
%end;
%else %do;
data finl;
merge finl temp(keep=usum&count);
run;
%end;
%let count=%eval(&count+1);
%end;
data finl;
set finl;
theta_star=sum(of usum1 - usum&tot);
call symput('theta_star', trim(left(theta_star)));
run;
/* read in data for estimating hx*/
proc transpose data=trt1 out=temp1;
var &mainvar;
run;
data temp;
set temp1;
total =round(sum(of col1- col&refn),1);
hx0=-10.0;
hx%eval(&refn+1)=10.0;
run;
data temp;
set temp;
/* get Hx */
array x{*} col1 - col&refn;
array px{*} px1 - px&refn;
array cdf{*} cdf1- cdf&refn;
array hx{*} hx0 - hx%eval(&refn+1);
do j=1 to dim(px);
px{j}=x{j}/total;
end;
do j=1 to %eval(&refn);
cdf{j}=0;
do l=1 to j;
cdf{j}=cdf{j}+px{l};
end;
jj=j+1;
if cdf{j} >1 then cdf{j}=0.99;
hx{jj}=probit(abs(cdf{j}));
end;
/* use the user defiend mu */
%if &mu^=%str() %then %do;
suma=0;
sumb=0;
do j=1 to &refn;
jj=j+1;
a=hx(jj-1)-sqrt(2)*probit(&theta);
b=hx(jj)-sqrt(2)*probit(&theta);
suma=sum(suma,probnorm(a)*probnorm(hx(jj)));
sumb=sum(sumb,probnorm(b)*probnorm(hx(jj-1)));
end;
theta_star=(1.0-suma+sumb)/2;
theta=θ
/* in this situation mu_est=mu */
mu_est=μ
%let mu_est=μ
%end;
/* calculation of theta_star */
/* use observed data to estimate theta_star */
%if &dsn^=%str() and &mu=%str() %then %do;
theta_star=&theta_star;
/* make it an macro for estimating theta */
theta0=theta_star;
diff=0.1;
count=1;
do while(diff > 0.00001);
suma=0;
sumb=0;
do j=1 to &refn;
jj=j+1;
a=hx(jj-1)-sqrt(2)*probit(theta0);
b=hx(jj)-sqrt(2)*probit(theta0);
suma=suma+probnorm(a)*probnorm(hx(jj));
sumb=sumb+probnorm(b)*probnorm(hx(jj-1));
end;
ftheta0=1.0-suma+sumb-2*theta_star;
/*calculation of dftheta0 */
z=sqrt(2)*probit(theta0);
yt=sqrt(2)*exp(0.5*(probit(theta0)**2));
sumc=0;
sumd=0;
do j=1 to &refn;
jj=j+1;
c=exp((-0.5)*((hx(jj-1)-z)**2));
d=exp((-0.5)*((hx(jj)-z)**2));
sumc=sumc+c*probnorm(hx(jj));
sumd=sumd+d*probnorm(hx(jj-1));
end;
dftheta0=yt*(sumc-sumd);
theta=theta0-(ftheta0/dftheta0);
diff=abs(theta-theta0);
/* iteration */
if diff>0.00001 then do;
theta0=theta;
end;
count+1;
if count>20 then do;
%err_msg(2);
end;
end;
/* get muest from theta */
mu_est=sqrt(2)*probit(theta);
call symput('muest',trim(left(mu_est)));
%end;
/*compute var(Uik_star), cov(uik1_star,uik2_star), cov(ui1k_star, ui2k_star) */
euik_star=0;
vuik_star1=0;
euik1uik2=0;
covuik1uik2=0;
eui1kui2k=0;
covui1kui2k=0;
part1=0;
part2=0;
part3=0;
do j=1 to &refn;
jj=j+1;
euik_star1=probnorm(hx(jj))-probnorm(hx(jj-1));
euik_star2=1-probnorm(hx(jj)-mu_est)+0.25*(probnorm(hx(jj)-mu_est)-probnorm(hx(jj-1)-mu_est));
euik_star=euik_star+euik_star1*euik_star2;
vuik_star1=vuik_star1+(euik_star1)*(1-0.5*(probnorm(hx(jj)-mu_est)+probnorm(hx(jj-1)-mu_est)));
euik1uik2=euik1uik2+euik_star1*(1-0.5*(probnorm(hx(jj)-mu_est)+probnorm(hx(jj-1)-mu_est)))**2;
eui1kui2k=eui1kui2k+(probnorm(hx(jj)-mu_est)-probnorm(hx(jj-1)-mu_est))*((probnorm(hx(jj))+probnorm(hx(jj-1)))/2)**2;
part1=part1+(euik_star1)*(1-probnorm(hx(jj)));
part2=part2+(probnorm(hx(jj))-probnorm(hx(jj-1)))**2;
part3=part3+(euik_star1)*((probnorm(hx(jj))+ probnorm(hx(jj-1)))**2-1)/4;
end;
/* var(Uik) */
vuik_star=euik_star-vuik_star1**2;
/* cov(Uik1,Uik2) */
covuik1uik2=euik1uik2-vuik_star1**2;
/* cov(Ui1k, Ui2k) */
covui1kui2k=eui1kui2k-vuik_star1**2;
/* var1thta_star */
var1_thetastar=(vuik_star+(&n-1)*covuik1uik2+(&m-1)*covui1kui2k)/(&m*&n);
/* var0theta_star */
var0_thetastar=(part1+(part2-1)/4+(&m+&n-2)*part3)/(&m*&n);
power1a=abs(theta_star-0.5)/sqrt(var1_thetastar)-1.96*sqrt(var0_thetastar/var1_thetastar);
power1b=-1.0*abs(theta_star-0.5)/sqrt(var1_thetastar)-1.96*sqrt(var0_thetastar/var1_thetastar);
power=probnorm(power1a)+probnorm(power1b);;
output;
run;
title;
%if &mu^=%str() %then %do;
title "Power Estimation for Sample1 (&m) and Sample2 (&n) with mu=&mu";
proc print noobs;
var theta_star theta var1_thetastar var0_thetastar power;
run;
%end;
%else %if &dsn^=%str() and &mu=%str() %then %do;
title "Power Estimation for Sample1 (&m) and Sample2 (&n) with mu_est=&muest";
proc print noobs;
var theta_star ftheta0 dftheta0 theta count var1_thetastar var0_thetastar power;
run;
%end;
%goto out;
**** Error Message ***;
%out1: %err_msg(1);
%goto out;
%out:
%mend WilcxPowerWties_RawData;
%macro err_msg(num);
%if &num=1 %then %do;
%put ****************ERROR IN SAS MACRO WilcxPowerContinuousNTies ****************;
%put ****************Please define macro variables observed data set name *****************;
%end;
%else %if &num=2 %then %do;
%put ****************ERROR IN SAS MACRO WilcxPowerContinuousNTies****************;
%put ****************program goes through 20 iterations data does not converge ****************;
%end;
%mend err_msg;