sUpdate.sas
/*************************************************************************************
*
* Macro sUpdate - Measurement Error Correction Methods Based on the Simple Update method of Intake
* for Nutritional Data
* This macro calculates corrected OR using simple update methods.
*
* This macro calls
* %blinplus92.
* %blinplus92 is a modified version of %blinplus by Weiliang
* Location:
*
* Program: sUpdate.sas
* Created by: Weiliang Qiu (stwxq) on 5/21/07
* Modified by: Rong Chen (nhron) on 07/06/07
***************************************************************************************/
%global c1 ndat c2 ctot errblin trueblin c1 yr cmblist;
%macro sUpdate(
overall=, /* one data set that includes all the diet info. */
/* that user needs for a particular analysis */
/* for example if user wants to analyze diet info. */
/* from 1980 - 1998, this data set must has the diet info. */
/* from 1980 -1998 */
eventim =,
diagdate=diagdate, /* variable name for dxdt, by default, it is diagdate */
maindat=, /* a list of data sets' names. To use this macro, user needs */
/* create a data set for each year that the user is interested in */
/* for example, main80 is for 1980 diet info. and etc. */
valid80=, /*validation data set */
valid86=, /* if the year of FFQ is 86 and later, use 86 validat data */
period=, /* total # of years of incidence. For example if 1980-2002 is the time */
/* span of interest, then period=22 */
yr =, /*list of questionnare years that correspond to maindat */
wts =, /*data set for weights */
event =, /* event */
cmblist=, /* prefix of nutrien variables and other covariates */
err_term=, /*surrogate values of variables measured with error (Z) */
true_term=, /*goldstandard values of variables measured with error (X) */
u_term =, /* variables measured without error (U) */
outUn=, /* output data with uncorrected results */
outC=, /* output data with corrected results */
long_prt=F, /* T: print RR for each year; F: no; default is F */
short_prt=T /* T: print combined results; F: no; default is T */
);
options linesize=100 pagesize=60 formdlim='_';
/* count # of main data set */
%let ndat=%numargs(&maindat);
%let c1=%numargs(&err_term);
%let c2=%numargs(&u_term);
%let ctot=%eval(&c1+&c2);
%do nn=1 %to &ndat;
%let dsn=%scan(&maindat, %eval(&nn), %str( ));
%let year=%scan(&yr, %eval(&nn), %str( ));
%if &nn=1 %then %do;
%let yr1=&year;
%end;
%if &year =80 or &year=82 or &year=84 %then %do;
%let validdat=&valid80;
%end;
%else %if &year >84 %then %do;
%let validdat=&valid86;
%end;
%if &year=80 %then %do;
%sUpdate80(maindat=&dsn, validdat=&validdat,
yr=80, wts=&wts, event=&event, diagdate=&diagdate,
err_term=&err_term,
true_term=&true_term,
u_term=&u_term,
t=&nn,
pd=&period,
outUn=outUn&nn, outC=outc&nn);
data resUn;
set outUn&nn;
yr=&year;
run;
data resC;
set outc&nn;
yr=&year;
run;
/* prepare combine data set for future use */
/* if first data set is from 1980 */
data combndat;
set &dsn;
run;
%end;
%else %do;
data &dsn;
set &dsn;
eventtime=&diagdate-(&yr1*12+6);
if &diagdate=. then eventtime=(%eval(&yr1+&period)*12+6)-(&yr1*12+6);
run;
title "Cox proportional hazards for surrogate variables (&year)";
proc phreg data=&dsn covout outest=bvarbm&year;
model eventtime*&event(0) = &err_term &u_term;
ods output ParameterEstimates=lgtEst&year;
run;
title "get covariance matrix for 19&year main study data";
proc corr data=&dsn COV outp=covm&year noprob nosimple noprint;
var &err_term &u_term;
run;
title "get covariance matrix for &year validation study data";
proc corr data=&validdat COV outp=covv noprob nosimple noprint;
var &true_term &err_term &u_term;
run;
proc iml;
use lgtEst&year;
read all into lgtEst;
close lgtEst&year;
use covm&year;
read all into covmMat;
close covm&year;
use covv;
read all into covvMat;
close covv;
/* get row name */
use &wts;
read all var _char_ into NAM;
close &wts;
/* get beta_c */
row =nrow(lgtest);
betac=lgtest[1:row,2];
betac=t(betac);
betacMat=lgtest[,{2 3 5}];
*print betacMat [rowname=NAM colname={'coef', 'se', 'pval'} format=best8.5];
/* get matrix B */
/* [Cov(Zt) Cov(Zt, Ut)]
[Cov(Ut, Zt) Cov(Ut) ]
*/
brow=row;
B=covmMat[1:brow, 1:brow];
/* get matrix A = [Cov(Xt, Zt), Cov(Xt, Ut)] */
lev=loc({&true_term}^={&err_term}); /*obtain a row vector of the true variables */
/* obtain # of true variables: k1 */
/* obtain # of error variables: k2 */
k1=ncol(lev);
tot=ncol(covvMat);
A=covvMat[1:k1, k1+1:tot];
/* get matrix Lambda_c */
iB=inv(B);
Lambdac1=A*iB;
k2=&c2;
k=k1+k2;
tmp=J(k2, k1, 0) || I(k2);
Lambdac=Lambdac1 // tmp;
/* create a data set */
create LambDat from Lambdac;
append from Lambdac;
title "********** blinplus &year ************";
%blinplus92(
main = &dsn,
Lambda = LambDat,
valid = &validdat,
main_est = bvarbm&year,
weights = &wts,
err_var = &err_term &u_term,
true_var = &true_term &u_term,
t=&nn,
outUn=outUn&nn,
outC=outC&nn
);
quit;
data outUn&nn;
set outUn&nn;
yr=&year;
run;
data outC&nn;
set outC&nn;
yr=&year;
run;
%if &nn=1 %then %do;
data resUn;
set outUn&nn;
run;
data resC;
set outC&nn;
run;
data combndat;
set &dsn;
run;
%end;
%else %if &nn>=2 %then %do;
proc append base=resUn data=outUn&nn;
run;
proc append base=resC data=outC&nn;
run;
data combndat;
set combndat &dsn;
run;
%end;
%end;
%end;
/* output resutls data */
data &outUn;
set resUn;
run;
data &outC;
set resC;
run;
/* prepare data set for %combine */
data temp;
do i=1 to &ctot;
x=i;
output;
end;
run;
/* produce combined beta */
%if %upcase(&short_prt)=T %then %do;
title;
%combine(outset=&overall, inset=,wt=&wts);
title;
title "Combined simple updated Corrected";
%getCovVecE8086(valid80=&valid80, valid86=&valid86,err_var=&err_term &u_term,
true_var=&true_term &u_term);
%getBetaStarAll(yrTotal=&ndat, k1=&c1, k2=&c2, wts=&wts);
%end;
%mend;
%macro sUpdate80(
maindat=, /*main study data set */
validdat=, /*validation data set */
diagdate=diagdate, /* dxdt, default is diagdate */
yr =, /*questionnare year */
wts =, /*data set for weights */
event =, /* event */
err_term=, /* surrogate values of variables measured with error (Z) */
true_term=, /* golds-tandard values of variables measured with error (X)*/
u_term =, /* variables measured without error (U) */
t =, /* time point */
pd =, /* total # of years of incidence */
outUn=, /* output data with uncorrected results */
outC= /* output data with corrected results */
);
options linesize=100 pagesize=60 formdlim='_';
data &maindat;
set &maindat;
eventtime=&diagdate-(&yr*12+6);
if &diagdate=. then eventtime=(%eval(&yr+&pd)*12+6)-(&yr*12+6);
run;
title "**** multivariate regression for 1980 data";
proc reg data=&validdat covout outest=est80 noprint;
model &true_term=&err_term &u_term;
run;
title "Cox proportional Hazards for surrogate variables (&yr)";
proc phreg data=&maindat covout outest=bvarbm&yr;
model eventtime*&event(0)= &err_term &u_term;
*ods output ParameterEstimates=lgtEst&yr;
run;
proc corr data=&maindat COV outp=covm&yr noprob nosimple noprint;
var &err_term &u_term;
run;
title "********** blinplus &yr ************";
%let weights=&wts;
%let main_est=bvarbm&yr;
%let valid=&validdat;
%let err_var=&err_term &u_term;
%let true_var=&true_term &u_term;
proc iml workspace=999;
reset center noname;
file print;
*** Read in variable names and their "weights";
use &weights;
read all var _char_ into NAM;
read all var _num_ into WT;
close &weights;
*** Read in validation data;
use &valid;
read all var { &err_var } into X;
read all var { &true_var } into Y;
close &valid;
/* number of observations */
n=nrow(X);
/* number of predictors */
k=ncol(X);
*** Find the location of the variables that are measured with error;
lev = loc({&true_var}^={&err_var}); /*vector of locations of the error variables*/
/* variables with measurement errors */
Y2=Y[,lev];
/* number of variables with measurement errors */
k1=ncol(Y2);
/* number of variables without measurement errors */
k2=k-k1;
%let m_est=&main_est;
use &m_est(where=(_name_^="Intercept"));
read all var {&err_var} into BG;
/* estimate of kx1 vector beta */
B = t(BG[1,]); /* kx1 vector of main study regression param est.s */
create betaDat&t from B;
append from B;
VB = BG[2:k+1,]; /* VB is the Cov matrix of B; dim kxk */
/* create data of beta for future use */
create covBetaDat&t from VB;
append from VB;
seb= sqrt(vecdiag(VB)); /* seb[i] is the std error of B[i]; seb has dim kx1 */
free BG;
/* ____________________________________________ */
***add column of ones to the matrix of covariates (for model intercept term);
X=J(n,1,1) || X;
create designWDat&t from X;
append from X;
*** Weighted Multivariate Regression (validation study regression);
F=inv(t(X)*X); /* used to shorten following expressions */
/* it was ginv() */
Theta=F*t(X)*Y2; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */
GEV=t(Theta);
/* hat matrix X(X^TX)^{-1}X^T */
H=X*F*t(X);
/* residuals */
ERR = Y2 - H*Y2;
/* estimate of variance-covariance matrix */
p=ncol(X);
Sigma = t(ERR)*ERR/(n-p);
D=J(k,1,0) || I(k);
Theta1=D*Theta;
/* cov(Vec(Theta1)) */
covVecTheta&t=Sigma@(D*F*t(D));
create covVecTheta1Dat&t from covVecTheta&t;
append from covVecTheta&t;
C=J(k2,k1,0) || I(k2);
Lambda=t(Theta1||t(C));
A=ginv(Lambda);
create ADat&t from A;
append from A;
/* correctd beta */
/* B=t(beta), Bstar=t(beta)^* */
Bstar=t(A)*B;
create betaStarDat&t from Bstar;
append from Bstar;
A1=A[,lev];
create A1Dat&t from A1;
append from A1;
tmpMat1=t(B)*A1*Sigma*t(A1)*B;
tmpMat2=t(A)*D*F*t(D)*A;
tmpMat3=t(A)*VB*A;
tmpMat4=tmpMat1@tmpMat2;
/* Cov(B^*) */
covBstar=tmpMat3+tmpMat4;
se_est=sqrt(vecdiag(covBstar));
free X Y Y2 tmpMat tmpMat1 tmpMat2 F ERR A H;
/* ____________________________________________ */
***-----------------------------------------------------;
*** BEGIN FORMATTING AND OUTPUT ***;
***-----------------------------------------------------;
%let col_nam=" WT B SE HR 95% CI p";
%let bfunc=exp;
%let head1 = Logistic Regression:;
titlu="Main study regression coeffiecients: Uncorrected";
%pr_table(est=b, se_est=seb, tab_tit=titlu, row_lab=NAM, outset=&outUn);
titlc="Main study regression coefficients: Corrected";
%pr_table(est=Bstar, se_est=se_est, tab_tit=titlc, row_lab=NAM, outset=&outC);
%mend;
/* ===================================================================================
Macro BLINPLUS - measurement error correction for SAS version 8.
*** ALL DATASETS MUST BE SAS DATASETS - Put more info on macro here ***
Modified by Weiliang Qiu on Nov. 20, 2006
===================================================================================*/
%macro blinplus92(
main =, /* dataset with main study data */
Lambda =,
valid =, /* Dataset with Validation Study data */
main_est =, /* Dataset containing est.s from main study regress. */
/* on the var.s given in err_var (below). */
weights =, /* Dataset with weights */
err_var =, /* List of the variables measured with error */
true_var =, /* List of the variables measured without error */
t=,
outUn=, /* dat set records uncorrected results */
outC=, /* dat set records corrected results */
);
/* __________________________________________________________________________________ */
/* Check some arguments */;
%if %numargs(&err_var) ^= %numargs(&true_var) %then %goto out1;
proc sort data=&main; by id; run;
proc sort data=&valid; by id; run;
data combine;
merge &main (in=a) &valid (in=b);
by id;
if a and b;
run;
proc iml workspace=999;
reset center noname;
file print;
*** Read in variable names and their "weights";
use &weights;
read all var _char_ into NAM;
read all var _num_ into WT;
close &weights;
*** Read in validation data;
use &valid;
read all var { &err_var } into X;
read all var { &true_var } into Y;
close &valid;
use combine;
read all var {&err_var} into X2;
close combine;
/* number of observations */
n=nrow(X);
/* number of predictors */
k=ncol(X);
*** Find the location of the variables that are measured with error;
lev = loc({&true_var}^={&err_var}); /*vector of locations of the error variables*/
/* variables with measurement errors */
Y2=Y[,lev];
/* number of variables with measurement errors */
k1=ncol(Y2);
/* number of variables without measurement errors */
k2=k-k1;
%let m_est=&main_est;
use &m_est(where=(_name_^="Intercept"));
read all var {&err_var} into BG;
/* estimate of kx1 vector beta */
B = t(BG[1,]); /* kx1 vector of main study regression param est.s */
create betaDat&t from B;
append from B;
VB = BG[2:k+1,]; /* VB is the Cov matrix of B; dim kxk */
create covBetaDat&t from VB;
append from VB;
seb= sqrt(vecdiag(VB)); /* seb[i] is the std error of B[i]; seb has dim kx1 */
close;
free BG;
/* ____________________________________________ */
***add column of ones to the matrix of covariates (for model intercept term);
X=J(n,1,1) || X;
create designWDat&t from X;
append from X;
n2=nrow(X2);
X2=J(n2,1,1) || X2;
*** Weighted Multivariate Regression (validation study regression);
F=inv(t(X)*X); /* used to shorten following expressions */
F2=inv(t(X2)*X2); /* it was inv() here because of error message */
/* change to ginv() */
Theta=F*t(X)*Y2; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */
GEV=t(Theta);
/* hat matrix X(X^TX)^{-1}X^T */
H=X*F*t(X);
/* residuals */
ERR = Y2 - H*Y2;
/* estimate of variance-covariance matrix */
p=ncol(X);
Sigma = t(ERR)*ERR/(n-p);
D=J(k,1,0) || I(k);
/* cov(Vec(Theta1)) */
covVecTheta&t=Sigma@(D*F*t(D));
create covVecTheta1Dat&t from covVecTheta&t;
append from covVecTheta&t;
use Λ
read all into Lam;
close Λ
A=inv(Lam);
create ADat&t from A;
append from A;
/* correctd beta */
/* B=t(beta), Bstar=t(beta)^* */
Bstar=t(A)*B;
create betaStarDat&t from Bstar;
append from Bstar;
A1=A[,lev];
create A1Dat&t from A1;
append from A1;
tmpMat1=t(B)*A1*Sigma*t(A1)*B;
tmpMat2=t(A)*D*F2*t(D)*A;
tmpMat3=t(A)*VB*A;
tmpMat4=tmpMat1@tmpMat2;
/* Cov(B^*) */
covBstar=tmpMat3+tmpMat4;
se_est=sqrt(vecdiag(covBstar));
free X X2 Y Y2 tmpMat tmpMat1 tmpMat2 F F2 ERR A H;
/* ____________________________________________ */
***-----------------------------------------------------;
*** BEGIN FORMATTING AND OUTPUT ***;
***-----------------------------------------------------;
%let col_nam=" WT B SE HR 95% CI p";
%let bfunc=exp;
%let head1 = Logistic Regression:;
options nocenter;
titlu="Main study regression coeffiecients: Uncorrected";
%pr_table(est=b, se_est=seb, tab_tit=titlu, row_lab=NAM, outset=&outUn);
titlc="Main study regression coefficients: Corrected";
%pr_table(est=Bstar, se_est=se_est, tab_tit=titlc, row_lab=NAM, outset=&outC);
%goto out;
%out:;
quit;
run;
%mend blinplus92;
%macro err_msg(err_num);
%put; %put;
%put ********************** ERROR IN SAS MACRO BLINPLUS: **********************;
%put ****** ******;
%if &err_num = 1 %then %do;
%put ****** The number of variables given in the ERR_VAR argument must ******;
%put ****** be equal to the number given in the TRUE_VAR argument. ******;
%end;
%else %if &err_num = 2 %then %do;
%put ****** Invalid option chosen for argument TYPE ******;
%put ****** Possible values are: LS (or REG), COX (or PHREG), LOGISTIC ******;
%end;
%else %if &err_num = 3 %then %do;
%put ****** Invalid argument combination: INTERNAL and VAL_EST ******;
%put ****** When Internal=TRUE, a value for VAL_EST must be provided ******;
%end;
%else %if &err_num = 4 %then %do;
%put ****** Invalid argument specification: When HETER_ME=TRUE the ******;
%put ****** arg.s HET_REG, HET_VAR and HET_FUNC must also be provided ******;
%end;
%else %if &err_num = 5 %then %do;
%put ****** Invalid argument specification. When ROBUST=TRUE ******;
%put ****** the arguments PREDS and PRED must also be provided. ******;
%end;
%else %if &err_num = 6 %then %do;
%put ****** Invalid argument combination: HETER_ME=T, HETER_CV=T ******;
%put ****** Cannot run with both these options specified ******;
%end;
%put ****** ******;
%put ******************************************************************************;
%put; %put;
%mend err_msg;
/* for printing detail */
%macro pr_table(
est=,
se_est=,
tab_tit=,
row_lab=,
outset=,
wt=wt,
func = &bfunc,
col_lab=&col_nam
);
oddr= &func(&wt#&est);
ub = &func(&wt#(&est + 1.96*&se_est));
lb = &func(&wt#(&est - 1.96*&se_est));
pval= 1 - probchi((&est/&se_est)#(&est/&se_est),1);
mat=&wt || &est || &se_est || oddr || lb || ub ||pval;
tmpcolname={"WT","B","SE","HR","Low", "Upp", "Pvalue"};
create &outset from mat [colname=tmpcolname];
append from mat;
obj1 = char(&wt,10,2)||char(&est||&se_est||oddr||lb,9,5);
obj2 = left(concat("- ",right(char(ub,8,5))))||char(pval,9,5);
out_obj= obj1||obj2;
%if %upcase(&long_prt)=T %then %do;
divider=" ________ _______________________________________________________________";
print , &tab_tit;
print mat[rowname=&row_lab colname=tmpcolname format=8.5];
print divider,,;
%end;
%mend pr_table;
/* print summary */
%macro pr_table_s(
est=,
se_est=,
tab_tit=,
row_lab=,
outset=,
wt=wt,
func = &bfunc,
col_lab=&col_nam
);
oddr= &func(&wt#&est);
ub = &func(&wt#(&est + 1.96*&se_est));
lb = &func(&wt#(&est - 1.96*&se_est));
pval= 1 - probchi((&est/&se_est)#(&est/&se_est),1);
mat=&wt || &est || &se_est || oddr || lb || ub ||pval;
tmpcolname={"WT","B","SE","HR","Low", "Upp", "Pvalue"};
create &outset from mat [colname=tmpcolname];
append from mat;
obj1 = char(&wt,10,2)||char(&est||&se_est||oddr||lb,9,5);
obj2 = left(concat("- ",right(char(ub,8,5))))||char(pval,9,5);
out_obj= obj1||obj2;
divider=" ________ _______________________________________________________________";
print , &tab_tit;
print mat[rowname=&row_lab colname=tmpcolname format=8.5];
print divider,,;
%mend pr_table_s;
%macro numargs(arg);
%let n=1;
%do %until (%scan(&arg,%eval(&n),%str( ))=%str());
%let n=%eval(&n+1);
%end;
%eval(&n-1)
%mend numargs;
%macro robust( cov = , /* Variance Covariance dataset */
preds = , /* Predicted Variable dataset */
xvar = , /* x variable names */
pred = , /* Predicted Variable name */
dep = , /* Dependent Variable name */
out = , /* Output dataset */
);
data _temp_;
set &cov(where=(_name_="ESTIMATE"));
data cov;
set &cov(where=(_name_^="ESTIMATE"));
run;
proc iml;
use cov;
read all var{intercep &xvar } into cov;
use &preds;
read all var{ &xvar } into x;
x = j(nrow(x),1,1) || x;
read all var { &pred } into p;
read all var { &dep } into d;
rsq = (d - p)#(d - p);
gg = j(ncol(x),ncol(x),0);
do i = 1 to nrow(x);
tmp = rsq[i] # t(x[i,])*(x[i,]);
gg = gg + tmp;
end;
rob = cov*gg*cov;
create &out from rob[colname={INTERCEP &xvar }];
append from rob;
quit;
run;
data &out;
merge cov &out;
data &out;
set _temp_ &out;
run;
%mend;
/* The final desicion is to use proc phreg on the overall */
/* data instead of combine all the results */
%macro combine(outset=, inset=, wt=);
data _null_;
%do i=1 %to &ctot;
%let arrn&i=;
%let arrel = %scan(&cmblist,%eval(&i), %str( ));
%do j=1 %to &ndat;
%let year=%scan(&yr,%eval(&j), %str( ));
%let arrn&i=&&arrn&i &arrel&year;
%end;
%end;
run;
ods output ParameterEstimates=paraEstSimple CensoredSummary=mycensorsumm;
proc phreg data=&outset;
model &eventim*&event(0)= &err_term &u_term / RL;
%do ii=1 %to &c1;
%let err&ii=%scan(&err_term, %eval(&ii), %str( ));
array arr&ii{*} &&arrnⅈ
%end;
%do jj=1 %to &c2;
%let u&jj=%scan(&u_term, %eval(&jj), %str( ));
%let k=%eval(&jj+&c1);
array arr&k{*} &&arrn&k;
%end;
%do kk=1 %to &ndat;
%if &kk=1 %then %do;
%let year=%scan(&yr, %eval(&kk+1), %str( ));
if &eventim <= %eval(&year-80)*12 then do;
%do ii=1 %to &c1;
&&err&ii=arr&ii{&kk};
%end;
%do h=1 %to &c2;
%let l=%eval(&h+&c1);
&&u&h =arr&l{&kk};
%end;
end;
%end;
%else %if 1<&kk and &kk<&ndat %then %do;
%let yr2=%scan(&yr, %eval(&kk+1), %str( ));
else if &eventim <= %eval(&yr2-80)*12 then do;
%do ii=1 %to &c1;
&&err&ii=arr&ii{&kk};
%end;
%do h=1 %to &c2;
%let l=%eval(&h+&c1);
&&u&h =arr&l{&kk};
%end;
end;
%end;
%else %if &kk=&ndat %then %do;
else do;
%do ii=1 %to &c1;
&&err&ii=arr&ii{&kk};
%end;
%do h=1 %to &c2;
%let l=%eval(&h+&c1);
&&u&h =arr&l{&kk};
%end;
end;
%end;
%end;
run;
ods output close;
data paraEstSimple;
set paraEstSimple;
beta=Estimate;
se =StdErr;
OR =HazardRatio;
low =HRLowerCL;
upp =HRUpperCL;
pval=ProbChisq;
run;
proc iml;
reset center noname;
use paraEstSimple;
read all var {beta} into b;
read all var {se} into seb;
close paraEstSimple;
use &wt;
read all var _char_ into NAM;
read all var _num_ into Wts;
close &wt;
%let col_nam=" WT B SE HR 95% CI p";
%let bfunc=exp;
options nocenter;
titlu="Combined simple updated Uncorrected";
%pr_table_s(est=b, se_est=seb, tab_tit=titlu, row_lab=NAM, outset=outCombine, wt=wts);
quit;
%mend combine;
/* new method for combining the results from all time point */
/* ===================================================================================
Macro getCovVecE8086 - .
===================================================================================*/
%macro getCovVecE8086(
valid80 =, /* Dataset with 1980 Validation Study data */
valid86 =, /* Dataset with 1980 Validation Study data */
err_var =, /* List of the variables measured with error */
true_var = /* List of the variables measured without error*/
);
data v80;
set &valid80;
*if dtsatf = . or dtcalor = . or dtalco = . or ffqsatf = . or ffqcalor = . then delete;
run;
data v86;
set &valid86;
*if dtsatf = . or dtcalor = . or dtalco = . or ffqsatf = . or ffqcalor = . then delete;
run;
proc sort data=v80; by id; run;
proc sort data=v86; by id; run;
data combine1;
merge v86 (in=a) v80 (in=b);
by id;
if a and b;
run;
data combine2;
merge v80 (in=a) v86 (in=b);
by id;
if a and b;
run;
proc sort data=combine1; by id; run;
proc sort data=combine2; by id; run;
proc iml;
reset center noname;
file print;
use combine1;
read all var{ &err_var } into Mat80;
close combine1;
use combine2;
read all var{ &err_var } into Mat86;
close combine2;
*** Read in validation data;
use v80;
read all var { &err_var } into X80;
read all var { &true_var } into Y80;
close v80;
use v86;
read all var { &err_var } into X86;
read all var { &true_var } into Y86;
close v86;
use v80;
read all var {id} into id80;
close v80;
use v86;
read all var {id} into id86;
close v86;
/* number of observations */
n80=nrow(X80);
n86=nrow(X86);
/* number of predictors */
k80=ncol(X80);
k86=ncol(X86);
*** Find the location of the variables that are measured with error;
lev = loc({&true_var}^={&err_var}); /*vector of locations of the error variables*/
/* variables with measurement errors */
Y280=Y80[,lev];
Y286=Y86[,lev];
/* number of variables with measurement errors */
k180=ncol(Y280);
k186=ncol(Y286);
/* number of variables without measurement errors */
k280=k80-k180;
k286=k86-k186;
***add column of ones to the matrix of covariates (for model intercept term);
X80=J(n80,1,1) || X80;
X86=J(n86,1,1) || X86;
create designWDat80 from X80 ;
append from X80;
create designWDat86 from X86 ;
append from X86;
*** Weighted Multivariate Regression (validation study regression);
tMat80=t(X80)*X80;
tMat86=t(X86)*X86;
F80=inv(tMat80); /* used to shorten following expressions */
F86=inv(tMat86); /* used to shorten following expressions */
Theta80=F80*t(X80)*Y280; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */
Theta86=F86*t(X86)*Y286; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */
GEV80=t(Theta80);
GEV86=t(Theta86);
/* hat matrix X(X^TX)^{-1}X^T */
H80=X80*F80*t(X80);
H86=X86*F86*t(X86);
/* residuals */
ERR80 = Y280 - H80*Y280;
ERR86 = Y286 - H86*Y286;
/* estimate of variance-covariance matrix */
p80=ncol(X80);
p86=ncol(X86);
Sigma80 = t(ERR80)*ERR80/(n80-p80);
Sigma86 = t(ERR86)*ERR86/(n86-p86);
create sigma80 from Sigma80;
append from Sigma80;
create sigma86 from Sigma86;
append from Sigma86;
ERR802=id80 || ERR80;
create resi80 from ERR802;
append from ERR802;
ERR862=id86 || ERR86;
create resi86 from ERR862;
append from ERR862;
quit;
data resi80;
set resi80;
rename col1=id;
%do i=2 %to &c1+1;
rename col&i=col&i.80;
%end;
run;
data resi86;
set resi86;
rename col1=id;
%do i=2 %to &c1+1;
rename col&i=col&i.86;
%end;
run;
proc sort data=resi80; by id; run;
proc sort data=resi86; by id; run;
data resi8086;
merge resi80 (in=a) resi86 (in=b);
by id;
if a and b;
run;
data resi8086;
set resi8086;
drop id;
run;
proc iml;
use resi8086;
read all into e8086;
close resi8086;
use sigma80;
read all into sigma80;
close sigma80;
use sigma86;
read all into sigma86;
close sigma86;
e80=e8086[,1:&c1];
e86=e8086[,%eval(&c1+1):%eval(2*&c1)];
n=nrow(e80);
sigma8086 = t(e80)*e86/(n-8);
sigma8086=(sigma8086+t(sigma8086))/2;
use v80;
read all var { &err_var } into X80;
read all var { &true_var } into Y80;
read all var {id} into id80;
close v80;
n80 = nrow(X80);
k = ncol(X80);
k1=nrow(Sigma80);
k2=k-k1;
*print 'k=', k, ' k1=', k1, ' k2=', k2;
use v86;
read all var { &err_var } into X86;
read all var { &true_var } into Y86;
read all var {id} into id86;
close v86;
n86 = nrow(X86);
Hmat=J(n80, n86, 0);
do i = 1 to n80;
do j =1 to n86;
if id80[i]=id86[j] then
do;
Hmat[i,j]=1;
end;
else
do;
Hmat[i,j]=0;
end;
end;
end;
create Sigma8086Dat from Sigma8086;
append from Sigma8086;
covVecE8086=Sigma8086 @ Hmat;
use designWDat80;
read all into designWMat80;
close designWDat80;
use designWDat86;
read all into designWMat86;
close designWDat86;
Dmat=J(k, 1, 0) || I(k);
/* it was ginv() */
tmpt1 = inv(t(designWMat80)*(designWMat80)) * t(designWMat80);
tmpt2 = inv(t(designWMat86)*(designWMat86)) * t(designWMat86);
tmpt1D1=Dmat*tmpt1;
tmpt2D1=Dmat*tmpt2;
tmp=tmpt1D1*Hmat*t(tmpt2D1);
*print 'D*(Wt1^T*Wt1)^{-1}*Wt1^T*H*Wt2*(Wt2^T*Wt2)^{-1}D^T>>>';
*print tmp;
covVecTheta1t1t2=Sigma8086 @ tmp;
*print 'Sigma8086=', Sigma8086;
*print 'covVecTheta1t1t2=', covVecTheta1t1t2;
create covVecTheta1t1t2Dat from covVecTheta1t1t2;
append from covVecTheta1t1t2;
quit;
%mend getCovVecE8086;
/*******************************************************************/
/* get overall estimate betastar over 'yrTotal' time perionds */
%macro getBetaStarAll(yrTotal=, k1=, k2=, wts=);
%let k = %eval(&k1 + &k2);
%getCovXi(yrTotal=&yrTotal, k1=&k1, k2=&k2, covXiDat=covXiDat);
%do ell=1 %to &k;
%getOmegaEll(ell=&ell, yrTotal=&yrTotal, k1=&k1, k2=&k2, covXiDat=covXiDat,
OmegaDat=OmegaDat);
%getBetaStarAllEll(ell=&ell, yrTotal=&yrTotal, OmegaDat=OmegaDat,
betaStarAllDat=tmpDat&ell, varBetaStarAllDat=vartmpDat&ell);
%end;
proc iml;
betaStarAll=J(&k, 1, 0);
varBetaStarAll=J(&k, 1, 0);
%do ell = 1 %to &k;
use tmpDatℓ
read all into tmpBetaStarAll;
close tmpDatℓ
use vartmpDatℓ
read all into tmpVarBetaStarAll;
close vartmpDatℓ
betaStarAll[&ell]=tmpBetaStarAll;
varBetaStarAll[&ell]=tmpVarBetaStarAll;
%end;
use &wts;
read all var _char_ into NAM;
read all var _num_ into WT;
close &wts;
seb=sqrt(varBetaStarAll);
myOR=exp(WT#betaStarAll);
low=exp(WT#(betaStarAll-1.96*seb));
upp=exp(WT#(betaStarAll+1.96*seb));
pval=1-probchi((betaStarAll/seb)##2,1);
resMat=WT||betaStarAll||seb||myOR||low||upp||pval;
options nocenter;
print resMat[rowname=NAM colname={"WT","beta", "se", "HR", "low", "upp", "pval"} format=8.5];
quit;
%mend getBetaStarAll;
/* get overall estimate of the ell-th effect over 'yrTotal' time periods */
%macro getBetaStarAllEll(ell=, yrTotal=, OmegaDat=, betaStarAllDat=, varBetaStarAllDat=);
proc iml;
use &OmegaDat;
read all into Omega;
close &OmegaDat;
oneVec=J(&yrTotal, 1, 1);
iOmega=inv(Omega); /* it was ginv() */
denom=t(oneVec)*iOmega*oneVec;
wVec=iOmega*oneVec/denom;
tmpBetaStarAll = 0;
%do i=1 %to &yrTotal;
use betaStarDat&i;
read all into betaStarVeci;
close betaStarDat&i;
tmpBetaStarAll=tmpBetaStarAll+wVec[&i]*betaStarVeci[&ell];
%end;
tmpVarBetaStarAll=1/denom;
create &betaStarAllDat from tmpBetaStarAll;
append from tmpBetaStarAll;
create &varBetaStarAllDat from tmpVarBetaStarAll;
append from tmpVarBetaStarAll;
quit;
%mend getBetaStarAllEll;
%macro getOmegaEll(ell=, yrTotal=, k1=, k2=, covXiDat=, OmegaDat=);
proc iml;
use &covXiDat;
read all into covXi;
close &covXiDat;
/* get partial h(xi) / partial xi */
%let k = %eval(&k1 + &k2);
%let kk1 = %eval((&k1)*(&k));
%let tmpnrUp = %eval((&kk1)*(&yrTotal));
%let tmpnrDown = %eval((&k)*(&yrTotal));
upMat= J(&tmpnrUp, &yrTotal,0);
downMat= J(&tmpnrDown, &yrTotal,0);
%do i=1 %to &yrTotal;
/* A=Lambda^{-1} */
/* A1=first k1 columns of A */
use ADat&i;
read all into Ai;
close ADat&i;
use A1Dat&i;
read all into A1i;
close A1Dat&i;
/* read eta=beta^T */
use betaDat&i;
read all into etaVeci;
close betaDat&i;
%let startUp=%eval((&i-1)*(&kk1)+1);
%let endUp=%eval((&i)*(&kk1));
upMat[&startUp:&endUp, &i]= - (t(A1i) * etaVeci) @ (Ai[,&ell]);
%let startDown=%eval((&i-1)*(&k)+1);
%let endDown=%eval((&i)*(&k));
downMat[&startDown:&endDown, &i]=(Ai[,&ell]);
%end;
/* stack upMat and downMat */
dhdxi=upMat//downMat;
Omega=t(dhdxi) * (covXi) * dhdxi;
create &OmegaDat from Omega;
append from Omega;
quit;
%mend getOmegaEll;
/* get cov(Xi) */
%macro getCovXi(yrTotal=, k1=, k2=, covXiDat=);
proc iml;
%let k = %eval(&k1 + &k2);
%let kk1 = %eval((&k) * (&k1));
%let tmpnrUp = %eval((&kk1)*(&yrTotal));
%let tmpnrDown = %eval((&k)*(&yrTotal));
%let nr=%eval(&tmpnrUp+&tmpnrDown);
covXi = J((&nr), (&nr), 0);
%do i=1 %to &yrTotal;
/* read cov(Vec(Theta1)) */
use covVecTheta1Dat&i;
read all into covVecTheta1Mati;
close covVecTheta1Dat&i;
/* read cov(eta) */
use covBetaDat&i;
read all into covBetaMati;
close covBetaDat&i;
%let startUp=%eval((&i-1)*(&kk1)+1);
%let endUp=%eval((&i)*(&kk1));
covXi[&startUp:&endUp, &startup:&endUp]=covVecTheta1Mati;
%let startDown=%eval(&tmpnrUp+(&i-1)*(&k)+1);
%let endDown=%eval(&tmpnrUp+(&i)*(&k));
covXi[&startDown:&endDown, &startDown:&endDown]=covBetaMati;
%do j = %eval(&i)+1 %to &yrTotal;
use covVecTheta1t1t2Dat;
read all into covVecTheta1t1t2;
close covVecTheta1t1t2Dat;
%let startUp2=%eval((&j-1)*(&kk1)+1);
%let endUp2=%eval((&j)*(&kk1));
covXi[&startUp:&endUp, &startup2:&endUp2]=covVecTheta1t1t2;
covXi[&startUp2:&endUp2, &startup:&endUp]=t(covVecTheta1t1t2);
%end;
%end;
create &covXiDat from covXi;
append from covXi;
quit;
%mend getCovXi;