wsibmean_corrected.sas

/********************************************************************* This macro computes the interclass correlation estimators using the weighted sib mean approach introduced by I. Perisic and B. Rosner in their paper: "Comparisons of measures of interclass correlation, the general case of unequal group size". Note: this macro extends weighted sib mean method to ranked data. The arguments to the macro are: dataset : name of SAS data set which must contain: (1): the variable named in the "singlvar" argument (2): the variables named in "multvars" argument singlvar: Name of the variable which is observed exactly once for each individual. varname : Name of the variable for printing. multvars: List of names of the variables containing the repeated measurements (names must be separated only by blanks) num : macro creates output dataset as final&num For inexperienced macro users: (1) example usage when data are in SAS dataset DS: %wsibmean_s(dataset=DS, singlvar=M,varname=food, multvars= A B C D); (2) You must include the macro code in the program in which you wish to use it. You do this via an "%include" statement which gives the full path name of the file in which the macro code is stored e.g. %include "/udd/nhron/macroproduction/intercorr/wsibmean_corrected.sas"; *********************************************************************/ %macro wsibmean_s(dataset=, singlvar=, varname=, multvars=,num=); options ls=100 ps=60; %let nvar=%numargs(&multvars); %put "&nvar:" &nvar; /* create data set _glm_in to get msa and mse */ /* create data set _for_iml to keep FFQ score and # of DRs for each subject*/ data _glm_in(keep=group_no _D _K _Q) _for_iml(keep=_K _Q _Dbar tmp1-tmp&nvar tp1-tp&nvar _sumH2 _sumH _sumHH &singlvar &multvars); set &dataset(keep= &singlvar &multvars); array multv &multvars; array temp2 tmp1-tmp&nvar; array temp3 tp1-tp&nvar; retain group_no 0; group_no+1; do i =1 to dim(multv); temp2{i}=multv{i}*multv{i}; temp3{i}=multv{i}*&singlvar; end; _Q = &singlvar; _Dbar=mean(of multv{*}); _sumH2=sum(of temp2{*}); _sumH =sum(of multv{*}); _sumHH=sum(of temp3{*}); _K = dim(multv) - nmiss(of multv{*}); do i=1 to dim(multv); if multv{i} ^= . then do; _D = multv{i}; output _glm_in; end; end; output _for_iml; run; proc glm data=_glm_in noprint outstat=_glm_out(drop=_name_ f prob); class group_no; model _D=group_no; run; /* excluding the 0s */ proc sort data=_glm_in; by _K _D; run; data excl_d; set _glm_in; by _K _D ; if first._K or last._K; run; *proc print data=excl_d; data excl_d(keep=_K); set excl_d; by _K _D; lagD=lag(_D); if last._K; if (round(lagD,.01)=round(_D,.01)=0.00) or (round(lagD,.01)=round(_D,.01)=-0.00); run; proc sort data=_glm_in; by _K _Q; run; data excl_k; set _glm_in; by _K _Q ; if first._K or last._K; run; *proc print data=excl_k; data excl_k(keep=_K); set excl_k; by _K _Q; lagQ=lag(_Q); if last._K; if (round(lagQ,.01)=round(_Q,.01)=0.00) or (round(lagQ,.01)=round(_Q,.01)=-0.00); run; data excl; merge excl_k excl_d; by _K; run; proc sort; by _K; run; proc sort data=_for_iml; by _K; run; data _for_iml; merge _for_iml(in=a) excl (in=b); by _K; if a and not b; run; proc datasets nolist; delete _glm_in excl_d excl_k excl; run; proc iml workspace=999; options linesize=80 pagesize=78 nodate nonumber; reset noname nocenter; reset nocenter; use _glm_out; read all var {DF SS} where(_TYPE_= "ERROR") into EM; read all var {DF SS} where(_TYPE_= "SS1") into AM; close _glm_out; msa = am[2]/am[1]; /* between group MS */ mse = em[2]/em[1]; /* also within group sigma: sigma_i in the program */ /* K: family size Q: score for FFQ Dbar: average of Q for each subject */ use _for_iml; read all var {_K} into K; read all var {_Q} into Q; read all var {_Dbar} into Dbar; read all var {_sumH} into SumH; read all var {_sumH2} into SumH2; read all var {_sumHH} into SumHH; close _for_iml; /* N: # of groups */ N = nrow(k); sumk = sum(k); /* total # of subjects */ sumk2= sum(k#k); sumk3= sum(k#k#k); n0 = (sumk-sumk2/sumk)/(N-1); /* p is used to denote greek letter rho else var names get to long. */ /* p_DD is ANOVA based estimator; p_DD is intraclass correlation */ p_DD = (msa-mse)/(msa+(n0-1)*mse); var_p_dd= (2*((1-p_dd)/n0)**2)*( ((1+p_dd*(n0-1))**2)/(sumk-N) + (1-p_dd)*(1+p_dd*(2*n0-1))/(N-1) + (p_dd**2)*(sumk2-2*sumk3/sumk+(sumk2**2)/(sumk)**2)/(N-1)**2 ); /* print 'sumk= ' sumk, 'N = ' N ; */ /* simplified variance of p_DD */ part1 =((2*(sumk-1)*(1-p_dd)**2)*(1+(n0-1)*p_dd)**2); part2 =n0**2*(sumk-N)*(N-1); var_p_dd1 =part1/part2; kvals = unique(K); ** returns a row vector of unique vals of K; /* count # of subjects for each DR size (family size) */ /* nk is # of different sizes */ nk = ncol(kvals); N_k = j(1,nk); do l=1 to nk; N_k[l]=sum((k=kvals[l])); end; if sum(N_k=1)>0 then do; /* if there is only 1 subject in certain size group, that */ /* group will be ignored */ kl= kvals[loc(N_k=1)]; print "*** There is only one cluster of each of the", "*** following sizes present:" kl; print "*** There must be two or more clusters of each", "*** size present in order to compute the estimator.", "*** The clusters having the listed sizes cannot be", "*** used and will be ignored."; kvals = kvals[loc(N_k^=1)]; nk = ncol(kvals); N_k = N_k[loc(N_k^=1)]; end; z_true_k = j(1,nk); z_th_k = j(1,nk); rho_th_k = j(1,nk); var_zth_k = j(1,nk); z1_th_k = j(1,nk); z2_th_k = j(1,nk); z_obs = j(1,nk); cov = j(1,1,0); cntse = j(1,1,0); cnt1 = j(1,1,0); cnt2 = j(1,1,0); bias = j(1,1,0); p_orig = j(1,1,0); /* sigma_i sigma_ai */ sigma_i=mse; sigma_ai=(msa-mse)/n0; rih =sigma_ai/(sigma_ai+sigma_i); lamdah =sigma_i/sigma_ai; /* print "sigma_i= " sigma_i, "sigma_ai=" sigma_ai, "p_dd =" p_dd, "var_p_dd=" var_p_dd ; */ if sigma_i>=0 & sigma_ai>=0 then do; do j = 1 to nk; i=kvals[j]; loc_i = loc((k=i)); k_i =k[loc_i]; /* samples with certain group size */ db_i = Dbar[loc_i]; q_i = Q[loc_i]; sumh2_i =SumH2[loc_i]; sumhh_i =SumHH[loc_i]; sumh_i =SumH[loc_i]; dbbar_i= db_i[:]; ** average of db_i values; qbar_i = q_i[:]; ** average of q_i values; /* estimate correlation of FFQ score with average DR scores */ p_obs_i= t(q_i - qbar_i)*(db_i - dbbar_i)/ sqrt((db_i - dbbar_i)[##]*(q_i - qbar_i)[##]); var_pobs=(1-p_obs_i**2)**2/N_k[j]; p_oh_i =sum(sumhh_i/i)/sqrt(sum((sumh_i/i)##2)*(sum(q_i##2))); /* variance of log(p_oh_i) */ var_pohi=(1-p_oh_i**2)**2/N_k[j]; var_ln_pohi=(1/(p_oh_i**2))*((1-p_oh_i**2)**2/N_k[j]); /* variance of p_th_i */ var_ln_pthi =var_ln_pohi+(1/4*(1/(1-1/i+1/(i*p_dd))**2))*1/(i**2)*(1/p_dd**4)*var_p_dd; z_obs[j]=.5*log((1+p_obs_i)/(1-p_obs_i)); /* rho_th */ p_th_i = (p_oh_i)*sqrt(1+sigma_i/(i*sigma_ai)); if p_th_i >=0.99 then do; p_orig =p_th_i; p_th_i=0.99; *j=nk; pgt1=1; end; else if p_th_i <= -0.99 then do; p_orig =p_th_i; p_th_i=-0.99; *j=nk; pgt1=1; end; /* varinace of z_th_i */ var_zth_i=(p_th_i)**2*var_ln_pthi*(1/(1-p_th_i**2))**2; p_true_i = (p_obs_i)*sqrt(1+(1/p_dd-1)/i); delta_i = (p_th_i**2)*(var_pohi/p_oh_i**2+var_p_dd/((4*p_dd**2)*(1+(i-1)*p_dd)**2)); /* rho_th_k */ rho_th_k[j]=p_th_i; z_th_k[j]=0.5*log((1+p_th_i)/(1-p_th_i)); var_zth_k[j]=var_zth_i; z1_th_k[j]=z_th_k[j]-1.96*sqrt(var_zth_k[j]); z2_th_k[j]=z_th_k[j]+1.96*sqrt(var_zth_k[j]); *print "j =" j, * "N_k[j] =" (N_k[j]), * "p_orig =" p_orig, * "p_th_i =" p_th_i, * "p_oh_i =" p_oh_i, * "p_obs_i =" p_obs_i, * "z_th =" (z_th_k[j]), * "var_zth_i=" var_zth_i, * "z_obs =" (z_obs[j]); end; end; else do; print "***sigma_i or sigma_ai <0"; p_th_i=1.00; end; /************************************/ /* try to calculate zth1,2 */ /************************************/ if p_th_i^=1.00 then do; pi=3.1415926; /* caculation of observed z */ z_obs =sum(z_obs*t(n_k-3))/sum(n_k-3); rho_obs =(exp(2*z_obs)-1)/(exp(2*z_obs)+1); var_z_obs=1/(sum(n_k-3)); se_z_obs =sqrt(var_z_obs); z1_obs =z_obs-1.96*se_z_obs; z2_obs =z_obs+1.96*se_z_obs; rho1_obs =(exp(2*z1_obs)-1)/(exp(2*z1_obs)+1); rho2_obs =(exp(2*z2_obs)-1)/(exp(2*z2_obs)+1); p_obs =(exp(2*z_obs)-1)/(exp(2*z_obs)+1); /* compute rhos_obs */ rhos_obs =6/pi*arsin(p_obs/2); rhos1_obs =6/pi*arsin(rho1_obs/2); rhos2_obs =6/pi*arsin(rho2_obs/2); ci_obs =rhos1_obs || rhos2_obs; /* calculation in JC's program */ z_th=sum(z_th_k*t(n_k-3))/sum(n_k-3); var_zth=((n_k-3)##2*t(var_zth_k))/(sum(n_k-3)##2); se_zth =sqrt(var_zth); z1_th=z_th-1.96*se_zth; z2_th=z_th+1.96*se_zth; rho_th=(exp(2*z_th)-1)/(exp(2*z_th)+1); rho1_th=(exp(2*z1_th)-1)/(exp(2*z1_th)+1); rho2_th=(exp(2*z2_th)-1)/(exp(2*z2_th)+1); rho_ts=6/pi*arsin(rho_th/2); rho1_ts=6/pi*arsin(rho1_th/2); rho2_ts=6/pi*arsin(rho2_th/2); icc_spear=round(6/pi*arsin(p_dd/2), 0.01); ci_th =rho1_ts || rho2_ts; /* print "z_th =" z_th, "var_zth=" var_zth, "z1_th =" z1_th, "z2_th =" z2_th, "rho_th =" rho_th, "rho1_th=" rho1_th, "rho2_th=" rho2_th; ; print " ", "Variable " "&varname"; print "Obseved (95% CI) =" rhos_obs "(" ci_obs ")",; print "Corrected (95% CI) =" rho_ts "(" ci_th ")",; print "ICC =" icc_spear; */ rhos_all=rho_th||rho_ts||rho1_ts||rho2_ts||se_zth||z_th||rhos_obs||rhos1_obs||rhos2_obs||icc_spear; tmpcolname={"rho_th","rho_ts","rho1_ts","rho2_ts","se_zth","z_th","rhos_obs","rhos1_obs","rhos2_obs","icc_spear"}; create final from rhos_all[colname=tmpcolname]; append from rhos_all; end; else do; dummy=j(1,8,.); rhos_all=rho_th_k||dummy; end; quit; data final&num; set final; length food $20.; food="&varname"; run; %mend wsibmean_s; %macro numargs(arg); %let n=1; %do %until (%scan(&arg,%eval(&n),%str( ))=%str()); %let n=%eval(&n+1); %end; %eval(&n-1) %mend numargs;