wsibmean_pearson.sas

%macro wsibmean(dataset=, singlvar=,varname=, multvars=, num=); /********************************************************************* This macro computes the interclass correlation estimators using the weighted sib mean approach introduced by I. Perisic and B. Rosner in their paper: "Comparisons of measures of interclass correlation, the general case of unequal group size". The arguments to the macro are: dataset : name of SAS data set which must contain: (1): the variable named in the "singlvar" argument (2): the variables named in "multvars" argument singlvar: Name of the variable which is observed exactly once for each individual. varname: Name of the food or nutrient. multvars: List of names of the variables containing the repeated measurements (names must be separated only by blanks) num: for output dataset. Assign any number output data set name is as final1, final2 and etc. For inexperienced macro users: (1) example usage when data are in SAS dataset DS: %wsibmean(dataset=DS, singlvar=M,varname=nutrient,multvars= A B C D,num=1); (2) You must include the macro code in the program in which you wish to use it. You do this via an "%include" statement which gives the full path name of the file in which the macro code is stored e.g. %include "/udd/nhron/macroproduction/intercorr/wsibmean.sas". Note: This program uses "/udd/stdig/ROSNER/DISTRIBUTE/wsibmean.sas", added p_obs and 95%CI, output a data set with ICC, P_true, 95%CI, *********************************************************************/ data _glm_in(keep=group_no _D) _for_iml(keep=_K _Q _Dbar); set &dataset(keep= &singlvar &multvars); array multv &multvars; retain group_no 0; group_no+1; _Q = &singlvar; _Dbar=mean(of multv{*}); _K = dim(multv) - nmiss(of multv{*}); do over multv; if multv ^= . then do; _D = multv; output _glm_in; end; end; output _for_iml; run; proc glm data=_glm_in outstat=_glm_out(drop=_name_ f prob) noprint ; class group_no; model _D=group_no; run; proc means data=_glm_in noprint; var _D; output out=formean mean=xbar; run; proc datasets nolist; delete _glm_in; run; proc iml workspace=999; options linesize=80 pagesize=78 nodate nonumber; reset noname nocenter; use _glm_out; read all var {DF SS} where(_TYPE_= "ERROR") into EM; read all var {DF SS} where(_TYPE_= "SS1") into AM; close _glm_out; use formean; read all var {xbar} into xbar; close formean; msa = am[2]/am[1]; mse = em[2]/em[1]; use _for_iml; read all var {_K} into K; read all var {_Q} into Q; read all var {_Dbar} into Dbar; close _for_iml; N = nrow(k); sumk = sum(k); sumk2= sum(k#k); sumk3= sum(k#k#k); n0 = (sumk-sumk2/sumk)/(N-1); /* I'm using p to denote greek letter rho else var names get to long. */ p_DD = (msa-mse)/(msa+(n0-1)*mse); var_p_dd= (2*((1-p_dd)/n0)**2)*( ((1+p_dd*(n0-1))**2)/(sumk-N) + (1-p_dd)*(1+p_dd*(2*n0-1))/(N-1) + (p_dd**2)*(sumk2-2*sumk3/sumk+(sumk2**2)/(sumk)**2)/(N-1)**2 ); kvals = unique(K); ** returns a row vector of unique vals of K; nk = ncol(kvals); N_k = j(1,nk); do l=1 to nk; N_k[l]=sum((k=kvals[l])); end; if sum(N_k=1)>0 then do; kl= kvals[loc(N_k=1)]; print "*** There is only one cluster of each of the", "*** following sizes present:" kl; print "*** There must be two or more clusters of each", "*** size present in order to compute the estimator.", "*** The clusters having the listed sizes cannot be", "*** used and will be ignored."; kvals = kvals[loc(N_k^=1)]; nk = ncol(kvals); N_k = N_k[loc(N_k^=1)]; end; z_true_k = j(1,nk); w_k = j(1,nk); z_obs_k=j(1,nk); w_k_obs=j(1,nk); p_obs_k=j(1,nk); p_true_k=j(1,nk); do j = 1 to nk; i=kvals[j]; loc_i = loc((k=i)); db_i = Dbar[loc_i]; q_i = Q[loc_i]; dbbar_i = db_i[:]; ** average of db_i values; qbar_i = q_i[:]; ** average of q_i values; p_obs_i = t(q_i - qbar_i)*(db_i - dbbar_i)/ sqrt((db_i - dbbar_i)[##]*(q_i - qbar_i)[##]); p_obs_k[j]=p_obs_i; var_pobs= (1-p_obs_i**2)**2/N_k[j]; if i>1 then do; z_obs_k[j]=.5*log((1+p_obs_i)/(1-p_obs_i)); p_true_i= (p_obs_i)*sqrt(1+(1/p_DD-1)/i); p_true_k[j]=p_true_i; delta_i = (p_true_i**2)*( var_pobs/p_obs_i**2 + var_p_dd/((4*p_dd**2)*(1+(i-1)*p_dd)**2) ); w_k[j] = delta_i/(1-p_true_i**2)**2; ** w_k is asym. var of z_true_k; w_k_obs[j]= (1-z_obs_k[j]**2)**2/N_k[j]; end; else do; p_true_i= p_obs_i; p_true_k[j]=p_true_i; ** delta_i = var_pobs; z_obs_k[j]=.5*log((1+p_obs_i)/(1-p_obs_i)); w_k[j]= 1/(N_k[1]-1) + (4 - p_true_i**2)/(2*(N_k[1]-1)**2); w_k_obs[j]= (1-z_obs_k[j]**2)**2/N_k[j]; end; z_true_k[j]= .5*log((1+p_true_i)/(1-p_true_i)); end; z_true = sum(z_true_k*t(n_k-3))/sum(n_k-3); se_zt = sqrt(1/sum(n_k-3)); p_true = (exp(2*z_true)-1)/(exp(2*z_true)+1); /* 95% C.I. for z_true */ zt_95ci = z_true + se_zt*probit(.975)*{-1 1}; /* lower and upper bounds, and full 95% C.I. for p_true */ lbci_pt = (exp(2*zt_95ci[1])-1)/(exp(2*zt_95ci[1])+1); ubci_pt = (exp(2*zt_95ci[2])-1)/(exp(2*zt_95ci[2])+1); ci_ptrue = lbci_pt || ubci_pt ; /* observed p_obs */ z_obs =sum(z_obs_k*t(n_k-3))/sum(n_k-3); se_zobs=sqrt(1/sum(n_k-3)); p_obs =(exp(2*z_obs)-1)/(exp(2*z_obs)+1); /* 95% C.I. for p_obs */ zobs_95ci = z_obs + se_zobs*probit(.975)*{-1 1}; lbci_pobs = (exp(2*zobs_95ci[1])-1)/(exp(2*zobs_95ci[1])+1); ubci_pobs = (exp(2*zobs_95ci[2])-1)/(exp(2*zobs_95ci[2])+1); ci_pobs = lbci_pobs || ubci_pobs ; rhos_all=p_dd||p_obs||lbci_pobs||ubci_pobs||p_true||lbci_pt||ubci_pt||p_obs_k||p_true_k||msa||mse||n0||xbar; tmpcolname={"icc","p_obs","lbci_pobs","ubci_pobs","p_true","lbci_pt","ubci_pt","p_obs1","p_obs2","p_obs3","p_true1","p_true2","p_true3", "msa","mse","n0","mean"}; create final from rhos_all[colname=tmpcolname]; append from rhos_all; *print " ", "Variables: " "&varname, &singlvar , &multvars"; *print "Intraclass correlation =" p_dd; *print "Interclass correlation =" p_true, "95% CI for Interclass : (" ci_ptrue")",; *print "observed Interclass correlation =" p_obs, "95% CI for Interclass : (" ci_pobs")",; quit; data final&num; set final; length food $20.; food="&varname"; run; %mend wsibmean;