sUpdate.sas

/*************************************************************************************

*

* Macro sUpdate - Measurement Error Correction Methods Based on the Simple Update method of Intake

* for Nutritional Data

* This macro calculates corrected OR using simple update methods.

*

* This macro calls

* %blinplus92.

* %blinplus92 is a modified version of %blinplus by Weiliang

* Location:

*

* Program: sUpdate.sas

* Created by: Weiliang Qiu (stwxq) on 5/21/07

* Modified by: Rong Chen (nhron) on 07/06/07

***************************************************************************************/

%global c1 ndat c2 ctot errblin trueblin c1 yr cmblist;

%macro sUpdate(

overall=, /* one data set that includes all the diet info. */

/* that user needs for a particular analysis */

/* for example if user wants to analyze diet info. */

/* from 1980 - 1998, this data set must has the diet info. */

/* from 1980 -1998 */

eventim =,

diagdate=diagdate, /* variable name for dxdt, by default, it is diagdate */

maindat=, /* a list of data sets' names. To use this macro, user needs */

/* create a data set for each year that the user is interested in */

/* for example, main80 is for 1980 diet info. and etc. */

valid80=, /*validation data set */

valid86=, /* if the year of FFQ is 86 and later, use 86 validat data */

period=, /* total # of years of incidence. For example if 1980-2002 is the time */

/* span of interest, then period=22 */

yr =, /*list of questionnare years that correspond to maindat */

wts =, /*data set for weights */

event =, /* event */

cmblist=, /* prefix of nutrien variables and other covariates */

err_term=, /*surrogate values of variables measured with error (Z) */

true_term=, /*goldstandard values of variables measured with error (X) */

u_term =, /* variables measured without error (U) */

outUn=, /* output data with uncorrected results */

outC=, /* output data with corrected results */

long_prt=F, /* T: print RR for each year; F: no; default is F */

short_prt=T /* T: print combined results; F: no; default is T */

);

options linesize=100 pagesize=60 formdlim='_';

/* count # of main data set */

%let ndat=%numargs(&maindat);

%let c1=%numargs(&err_term);

%let c2=%numargs(&u_term);

%let ctot=%eval(&c1+&c2);

%do nn=1 %to &ndat;

%let dsn=%scan(&maindat, %eval(&nn), %str( ));

%let year=%scan(&yr, %eval(&nn), %str( ));

%if &nn=1 %then %do;

%let yr1=&year;

%end;

%if &year =80 or &year=82 or &year=84 %then %do;

%let validdat=&valid80;

%end;

%else %if &year >84 %then %do;

%let validdat=&valid86;

%end;

%if &year=80 %then %do;

%sUpdate80(maindat=&dsn, validdat=&validdat,

yr=80, wts=&wts, event=&event, diagdate=&diagdate,

err_term=&err_term,

true_term=&true_term,

u_term=&u_term,

t=&nn,

pd=&period,

outUn=outUn&nn, outC=outc&nn);

data resUn;

set outUn&nn;

yr=&year;

run;

data resC;

set outc&nn;

yr=&year;

run;

/* prepare combine data set for future use */

/* if first data set is from 1980 */

data combndat;

set &dsn;

run;

%end;

%else %do;

data &dsn;

set &dsn;

eventtime=&diagdate-(&yr1*12+6);

if &diagdate=. then eventtime=(%eval(&yr1+&period)*12+6)-(&yr1*12+6);

run;

title "Cox proportional hazards for surrogate variables (&year)";

proc phreg data=&dsn covout outest=bvarbm&year;

model eventtime*&event(0) = &err_term &u_term;

ods output ParameterEstimates=lgtEst&year;

run;

title "get covariance matrix for 19&year main study data";

proc corr data=&dsn COV outp=covm&year noprob nosimple noprint;

var &err_term &u_term;

run;

title "get covariance matrix for &year validation study data";

proc corr data=&validdat COV outp=covv noprob nosimple noprint;

var &true_term &err_term &u_term;

run;

proc iml;

use lgtEst&year;

read all into lgtEst;

close lgtEst&year;

use covm&year;

read all into covmMat;

close covm&year;

use covv;

read all into covvMat;

close covv;

/* get row name */

use &wts;

read all var _char_ into NAM;

close &wts;

/* get beta_c */

row =nrow(lgtest);

betac=lgtest[1:row,2];

betac=t(betac);

betacMat=lgtest[,{2 3 5}];

*print betacMat [rowname=NAM colname={'coef', 'se', 'pval'} format=best8.5];

/* get matrix B */

/* [Cov(Zt) Cov(Zt, Ut)]

[Cov(Ut, Zt) Cov(Ut) ]

*/

brow=row;

B=covmMat[1:brow, 1:brow];

/* get matrix A = [Cov(Xt, Zt), Cov(Xt, Ut)] */

lev=loc({&true_term}^={&err_term}); /*obtain a row vector of the true variables */

/* obtain # of true variables: k1 */

/* obtain # of error variables: k2 */

k1=ncol(lev);

tot=ncol(covvMat);

A=covvMat[1:k1, k1+1:tot];

/* get matrix Lambda_c */

iB=inv(B);

Lambdac1=A*iB;

k2=&c2;

k=k1+k2;

tmp=J(k2, k1, 0) || I(k2);

Lambdac=Lambdac1 // tmp;

/* create a data set */

create LambDat from Lambdac;

append from Lambdac;

title "********** blinplus &year ************";

%blinplus92(

main = &dsn,

Lambda = LambDat,

valid = &validdat,

main_est = bvarbm&year,

weights = &wts,

err_var = &err_term &u_term,

true_var = &true_term &u_term,

t=&nn,

outUn=outUn&nn,

outC=outC&nn

);

quit;

data outUn&nn;

set outUn&nn;

yr=&year;

run;

data outC&nn;

set outC&nn;

yr=&year;

run;

%if &nn=1 %then %do;

data resUn;

set outUn&nn;

run;

data resC;

set outC&nn;

run;

data combndat;

set &dsn;

run;

%end;

%else %if &nn>=2 %then %do;

proc append base=resUn data=outUn&nn;

run;

proc append base=resC data=outC&nn;

run;

data combndat;

set combndat &dsn;

run;

%end;

%end;

%end;

/* output resutls data */

data &outUn;

set resUn;

run;

data &outC;

set resC;

run;

/* prepare data set for %combine */

data temp;

do i=1 to &ctot;

x=i;

output;

end;

run;

/* produce combined beta */

%if %upcase(&short_prt)=T %then %do;

title;

%combine(outset=&overall, inset=,wt=&wts);

title;

title "Combined simple updated Corrected";

%getCovVecE8086(valid80=&valid80, valid86=&valid86,err_var=&err_term &u_term,

true_var=&true_term &u_term);

%getBetaStarAll(yrTotal=&ndat, k1=&c1, k2=&c2, wts=&wts);

%end;

%mend;

%macro sUpdate80(

maindat=, /*main study data set */

validdat=, /*validation data set */

diagdate=diagdate, /* dxdt, default is diagdate */

yr =, /*questionnare year */

wts =, /*data set for weights */

event =, /* event */

err_term=, /* surrogate values of variables measured with error (Z) */

true_term=, /* golds-tandard values of variables measured with error (X)*/

u_term =, /* variables measured without error (U) */

t =, /* time point */

pd =, /* total # of years of incidence */

outUn=, /* output data with uncorrected results */

outC= /* output data with corrected results */

);

options linesize=100 pagesize=60 formdlim='_';

data &maindat;

set &maindat;

eventtime=&diagdate-(&yr*12+6);

if &diagdate=. then eventtime=(%eval(&yr+&pd)*12+6)-(&yr*12+6);

run;

title "**** multivariate regression for 1980 data";

proc reg data=&validdat covout outest=est80 noprint;

model &true_term=&err_term &u_term;

run;

title "Cox proportional Hazards for surrogate variables (&yr)";

proc phreg data=&maindat covout outest=bvarbm&yr;

model eventtime*&event(0)= &err_term &u_term;

*ods output ParameterEstimates=lgtEst&yr;

run;

proc corr data=&maindat COV outp=covm&yr noprob nosimple noprint;

var &err_term &u_term;

run;

title "********** blinplus &yr ************";

%let weights=&wts;

%let main_est=bvarbm&yr;

%let valid=&validdat;

%let err_var=&err_term &u_term;

%let true_var=&true_term &u_term;

proc iml workspace=999;

reset center noname;

file print;

*** Read in variable names and their "weights";

use &weights;

read all var _char_ into NAM;

read all var _num_ into WT;

close &weights;

*** Read in validation data;

use &valid;

read all var { &err_var } into X;

read all var { &true_var } into Y;

close &valid;

/* number of observations */

n=nrow(X);

/* number of predictors */

k=ncol(X);

*** Find the location of the variables that are measured with error;

lev = loc({&true_var}^={&err_var}); /*vector of locations of the error variables*/

/* variables with measurement errors */

Y2=Y[,lev];

/* number of variables with measurement errors */

k1=ncol(Y2);

/* number of variables without measurement errors */

k2=k-k1;

%let m_est=&main_est;

use &m_est(where=(_name_^="Intercept"));

read all var {&err_var} into BG;

/* estimate of kx1 vector beta */

B = t(BG[1,]); /* kx1 vector of main study regression param est.s */

create betaDat&t from B;

append from B;

VB = BG[2:k+1,]; /* VB is the Cov matrix of B; dim kxk */

/* create data of beta for future use */

create covBetaDat&t from VB;

append from VB;

seb= sqrt(vecdiag(VB)); /* seb[i] is the std error of B[i]; seb has dim kx1 */

free BG;

/* ____________________________________________ */

***add column of ones to the matrix of covariates (for model intercept term);

X=J(n,1,1) || X;

create designWDat&t from X;

append from X;

*** Weighted Multivariate Regression (validation study regression);

F=inv(t(X)*X); /* used to shorten following expressions */

/* it was ginv() */

Theta=F*t(X)*Y2; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */

GEV=t(Theta);

/* hat matrix X(X^TX)^{-1}X^T */

H=X*F*t(X);

/* residuals */

ERR = Y2 - H*Y2;

/* estimate of variance-covariance matrix */

p=ncol(X);

Sigma = t(ERR)*ERR/(n-p);

D=J(k,1,0) || I(k);

Theta1=D*Theta;

/* cov(Vec(Theta1)) */

covVecTheta&t=Sigma@(D*F*t(D));

create covVecTheta1Dat&t from covVecTheta&t;

append from covVecTheta&t;

C=J(k2,k1,0) || I(k2);

Lambda=t(Theta1||t(C));

A=ginv(Lambda);

create ADat&t from A;

append from A;

/* correctd beta */

/* B=t(beta), Bstar=t(beta)^* */

Bstar=t(A)*B;

create betaStarDat&t from Bstar;

append from Bstar;

A1=A[,lev];

create A1Dat&t from A1;

append from A1;

tmpMat1=t(B)*A1*Sigma*t(A1)*B;

tmpMat2=t(A)*D*F*t(D)*A;

tmpMat3=t(A)*VB*A;

tmpMat4=tmpMat1@tmpMat2;

/* Cov(B^*) */

covBstar=tmpMat3+tmpMat4;

se_est=sqrt(vecdiag(covBstar));

free X Y Y2 tmpMat tmpMat1 tmpMat2 F ERR A H;

/* ____________________________________________ */

***-----------------------------------------------------;

*** BEGIN FORMATTING AND OUTPUT ***;

***-----------------------------------------------------;

%let col_nam=" WT B SE HR 95% CI p";

%let bfunc=exp;

%let head1 = Logistic Regression:;

titlu="Main study regression coeffiecients: Uncorrected";

%pr_table(est=b, se_est=seb, tab_tit=titlu, row_lab=NAM, outset=&outUn);

titlc="Main study regression coefficients: Corrected";

%pr_table(est=Bstar, se_est=se_est, tab_tit=titlc, row_lab=NAM, outset=&outC);

%mend;

/* ===================================================================================

Macro BLINPLUS - measurement error correction for SAS version 8.

*** ALL DATASETS MUST BE SAS DATASETS - Put more info on macro here ***

Modified by Weiliang Qiu on Nov. 20, 2006

===================================================================================*/

%macro blinplus92(

main =, /* dataset with main study data */

Lambda =,

valid =, /* Dataset with Validation Study data */

main_est =, /* Dataset containing est.s from main study regress. */

/* on the var.s given in err_var (below). */

weights =, /* Dataset with weights */

err_var =, /* List of the variables measured with error */

true_var =, /* List of the variables measured without error */

t=,

outUn=, /* dat set records uncorrected results */

outC=, /* dat set records corrected results */

);

/* __________________________________________________________________________________ */

/* Check some arguments */;

%if %numargs(&err_var) ^= %numargs(&true_var) %then %goto out1;

proc sort data=&main; by id; run;

proc sort data=&valid; by id; run;

data combine;

merge &main (in=a) &valid (in=b);

by id;

if a and b;

run;

proc iml workspace=999;

reset center noname;

file print;

*** Read in variable names and their "weights";

use &weights;

read all var _char_ into NAM;

read all var _num_ into WT;

close &weights;

*** Read in validation data;

use &valid;

read all var { &err_var } into X;

read all var { &true_var } into Y;

close &valid;

use combine;

read all var {&err_var} into X2;

close combine;

/* number of observations */

n=nrow(X);

/* number of predictors */

k=ncol(X);

*** Find the location of the variables that are measured with error;

lev = loc({&true_var}^={&err_var}); /*vector of locations of the error variables*/

/* variables with measurement errors */

Y2=Y[,lev];

/* number of variables with measurement errors */

k1=ncol(Y2);

/* number of variables without measurement errors */

k2=k-k1;

%let m_est=&main_est;

use &m_est(where=(_name_^="Intercept"));

read all var {&err_var} into BG;

/* estimate of kx1 vector beta */

B = t(BG[1,]); /* kx1 vector of main study regression param est.s */

create betaDat&t from B;

append from B;

VB = BG[2:k+1,]; /* VB is the Cov matrix of B; dim kxk */

create covBetaDat&t from VB;

append from VB;

seb= sqrt(vecdiag(VB)); /* seb[i] is the std error of B[i]; seb has dim kx1 */

close;

free BG;

/* ____________________________________________ */

***add column of ones to the matrix of covariates (for model intercept term);

X=J(n,1,1) || X;

create designWDat&t from X;

append from X;

n2=nrow(X2);

X2=J(n2,1,1) || X2;

*** Weighted Multivariate Regression (validation study regression);

F=inv(t(X)*X); /* used to shorten following expressions */

F2=inv(t(X2)*X2); /* it was inv() here because of error message */

/* change to ginv() */

Theta=F*t(X)*Y2; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */

GEV=t(Theta);

/* hat matrix X(X^TX)^{-1}X^T */

H=X*F*t(X);

/* residuals */

ERR = Y2 - H*Y2;

/* estimate of variance-covariance matrix */

p=ncol(X);

Sigma = t(ERR)*ERR/(n-p);

D=J(k,1,0) || I(k);

/* cov(Vec(Theta1)) */

covVecTheta&t=Sigma@(D*F*t(D));

create covVecTheta1Dat&t from covVecTheta&t;

append from covVecTheta&t;

use Λ

read all into Lam;

close Λ

A=inv(Lam);

create ADat&t from A;

append from A;

/* correctd beta */

/* B=t(beta), Bstar=t(beta)^* */

Bstar=t(A)*B;

create betaStarDat&t from Bstar;

append from Bstar;

A1=A[,lev];

create A1Dat&t from A1;

append from A1;

tmpMat1=t(B)*A1*Sigma*t(A1)*B;

tmpMat2=t(A)*D*F2*t(D)*A;

tmpMat3=t(A)*VB*A;

tmpMat4=tmpMat1@tmpMat2;

/* Cov(B^*) */

covBstar=tmpMat3+tmpMat4;

se_est=sqrt(vecdiag(covBstar));

free X X2 Y Y2 tmpMat tmpMat1 tmpMat2 F F2 ERR A H;

/* ____________________________________________ */

***-----------------------------------------------------;

*** BEGIN FORMATTING AND OUTPUT ***;

***-----------------------------------------------------;

%let col_nam=" WT B SE HR 95% CI p";

%let bfunc=exp;

%let head1 = Logistic Regression:;

options nocenter;

titlu="Main study regression coeffiecients: Uncorrected";

%pr_table(est=b, se_est=seb, tab_tit=titlu, row_lab=NAM, outset=&outUn);

titlc="Main study regression coefficients: Corrected";

%pr_table(est=Bstar, se_est=se_est, tab_tit=titlc, row_lab=NAM, outset=&outC);

%goto out;

%out:;

quit;

run;

%mend blinplus92;

%macro err_msg(err_num);

%put; %put;

%put ********************** ERROR IN SAS MACRO BLINPLUS: **********************;

%put ****** ******;

%if &err_num = 1 %then %do;

%put ****** The number of variables given in the ERR_VAR argument must ******;

%put ****** be equal to the number given in the TRUE_VAR argument. ******;

%end;

%else %if &err_num = 2 %then %do;

%put ****** Invalid option chosen for argument TYPE ******;

%put ****** Possible values are: LS (or REG), COX (or PHREG), LOGISTIC ******;

%end;

%else %if &err_num = 3 %then %do;

%put ****** Invalid argument combination: INTERNAL and VAL_EST ******;

%put ****** When Internal=TRUE, a value for VAL_EST must be provided ******;

%end;

%else %if &err_num = 4 %then %do;

%put ****** Invalid argument specification: When HETER_ME=TRUE the ******;

%put ****** arg.s HET_REG, HET_VAR and HET_FUNC must also be provided ******;

%end;

%else %if &err_num = 5 %then %do;

%put ****** Invalid argument specification. When ROBUST=TRUE ******;

%put ****** the arguments PREDS and PRED must also be provided. ******;

%end;

%else %if &err_num = 6 %then %do;

%put ****** Invalid argument combination: HETER_ME=T, HETER_CV=T ******;

%put ****** Cannot run with both these options specified ******;

%end;

%put ****** ******;

%put ******************************************************************************;

%put; %put;

%mend err_msg;

/* for printing detail */

%macro pr_table(

est=,

se_est=,

tab_tit=,

row_lab=,

outset=,

wt=wt,

func = &bfunc,

col_lab=&col_nam

);

oddr= &func(&wt#&est);

ub = &func(&wt#(&est + 1.96*&se_est));

lb = &func(&wt#(&est - 1.96*&se_est));

pval= 1 - probchi((&est/&se_est)#(&est/&se_est),1);

mat=&wt || &est || &se_est || oddr || lb || ub ||pval;

tmpcolname={"WT","B","SE","HR","Low", "Upp", "Pvalue"};

create &outset from mat [colname=tmpcolname];

append from mat;

obj1 = char(&wt,10,2)||char(&est||&se_est||oddr||lb,9,5);

obj2 = left(concat("- ",right(char(ub,8,5))))||char(pval,9,5);

out_obj= obj1||obj2;

%if %upcase(&long_prt)=T %then %do;

divider=" ________ _______________________________________________________________";

print , &tab_tit;

print mat[rowname=&row_lab colname=tmpcolname format=8.5];

print divider,,;

%end;

%mend pr_table;

/* print summary */

%macro pr_table_s(

est=,

se_est=,

tab_tit=,

row_lab=,

outset=,

wt=wt,

func = &bfunc,

col_lab=&col_nam

);

oddr= &func(&wt#&est);

ub = &func(&wt#(&est + 1.96*&se_est));

lb = &func(&wt#(&est - 1.96*&se_est));

pval= 1 - probchi((&est/&se_est)#(&est/&se_est),1);

mat=&wt || &est || &se_est || oddr || lb || ub ||pval;

tmpcolname={"WT","B","SE","HR","Low", "Upp", "Pvalue"};

create &outset from mat [colname=tmpcolname];

append from mat;

obj1 = char(&wt,10,2)||char(&est||&se_est||oddr||lb,9,5);

obj2 = left(concat("- ",right(char(ub,8,5))))||char(pval,9,5);

out_obj= obj1||obj2;

divider=" ________ _______________________________________________________________";

print , &tab_tit;

print mat[rowname=&row_lab colname=tmpcolname format=8.5];

print divider,,;

%mend pr_table_s;

%macro numargs(arg);

%let n=1;

%do %until (%scan(&arg,%eval(&n),%str( ))=%str());

%let n=%eval(&n+1);

%end;

%eval(&n-1)

%mend numargs;

%macro robust( cov = , /* Variance Covariance dataset */

preds = , /* Predicted Variable dataset */

xvar = , /* x variable names */

pred = , /* Predicted Variable name */

dep = , /* Dependent Variable name */

out = , /* Output dataset */

);

data _temp_;

set &cov(where=(_name_="ESTIMATE"));

data cov;

set &cov(where=(_name_^="ESTIMATE"));

run;

proc iml;

use cov;

read all var{intercep &xvar } into cov;

use &preds;

read all var{ &xvar } into x;

x = j(nrow(x),1,1) || x;

read all var { &pred } into p;

read all var { &dep } into d;

rsq = (d - p)#(d - p);

gg = j(ncol(x),ncol(x),0);

do i = 1 to nrow(x);

tmp = rsq[i] # t(x[i,])*(x[i,]);

gg = gg + tmp;

end;

rob = cov*gg*cov;

create &out from rob[colname={INTERCEP &xvar }];

append from rob;

quit;

run;

data &out;

merge cov &out;

data &out;

set _temp_ &out;

run;

%mend;

/* The final desicion is to use proc phreg on the overall */

/* data instead of combine all the results */

%macro combine(outset=, inset=, wt=);

data _null_;

%do i=1 %to &ctot;

%let arrn&i=;

%let arrel = %scan(&cmblist,%eval(&i), %str( ));

%do j=1 %to &ndat;

%let year=%scan(&yr,%eval(&j), %str( ));

%let arrn&i=&&arrn&i &arrel&year;

%end;

%end;

run;

ods output ParameterEstimates=paraEstSimple CensoredSummary=mycensorsumm;

proc phreg data=&outset;

model &eventim*&event(0)= &err_term &u_term / RL;

%do ii=1 %to &c1;

%let err&ii=%scan(&err_term, %eval(&ii), %str( ));

array arr&ii{*} &&arrnⅈ

%end;

%do jj=1 %to &c2;

%let u&jj=%scan(&u_term, %eval(&jj), %str( ));

%let k=%eval(&jj+&c1);

array arr&k{*} &&arrn&k;

%end;

%do kk=1 %to &ndat;

%if &kk=1 %then %do;

%let year=%scan(&yr, %eval(&kk+1), %str( ));

if &eventim <= %eval(&year-80)*12 then do;

%do ii=1 %to &c1;

&&err&ii=arr&ii{&kk};

%end;

%do h=1 %to &c2;

%let l=%eval(&h+&c1);

&&u&h =arr&l{&kk};

%end;

end;

%end;

%else %if 1<&kk and &kk<&ndat %then %do;

%let yr2=%scan(&yr, %eval(&kk+1), %str( ));

else if &eventim <= %eval(&yr2-80)*12 then do;

%do ii=1 %to &c1;

&&err&ii=arr&ii{&kk};

%end;

%do h=1 %to &c2;

%let l=%eval(&h+&c1);

&&u&h =arr&l{&kk};

%end;

end;

%end;

%else %if &kk=&ndat %then %do;

else do;

%do ii=1 %to &c1;

&&err&ii=arr&ii{&kk};

%end;

%do h=1 %to &c2;

%let l=%eval(&h+&c1);

&&u&h =arr&l{&kk};

%end;

end;

%end;

%end;

run;

ods output close;

data paraEstSimple;

set paraEstSimple;

beta=Estimate;

se =StdErr;

OR =HazardRatio;

low =HRLowerCL;

upp =HRUpperCL;

pval=ProbChisq;

run;

proc iml;

reset center noname;

use paraEstSimple;

read all var {beta} into b;

read all var {se} into seb;

close paraEstSimple;

use &wt;

read all var _char_ into NAM;

read all var _num_ into Wts;

close &wt;

%let col_nam=" WT B SE HR 95% CI p";

%let bfunc=exp;

options nocenter;

titlu="Combined simple updated Uncorrected";

%pr_table_s(est=b, se_est=seb, tab_tit=titlu, row_lab=NAM, outset=outCombine, wt=wts);

quit;

%mend combine;

/* new method for combining the results from all time point */

/* ===================================================================================

Macro getCovVecE8086 - .

===================================================================================*/

%macro getCovVecE8086(

valid80 =, /* Dataset with 1980 Validation Study data */

valid86 =, /* Dataset with 1980 Validation Study data */

err_var =, /* List of the variables measured with error */

true_var = /* List of the variables measured without error*/

);

data v80;

set &valid80;

*if dtsatf = . or dtcalor = . or dtalco = . or ffqsatf = . or ffqcalor = . then delete;

run;

data v86;

set &valid86;

*if dtsatf = . or dtcalor = . or dtalco = . or ffqsatf = . or ffqcalor = . then delete;

run;

proc sort data=v80; by id; run;

proc sort data=v86; by id; run;

data combine1;

merge v86 (in=a) v80 (in=b);

by id;

if a and b;

run;

data combine2;

merge v80 (in=a) v86 (in=b);

by id;

if a and b;

run;

proc sort data=combine1; by id; run;

proc sort data=combine2; by id; run;

proc iml;

reset center noname;

file print;

use combine1;

read all var{ &err_var } into Mat80;

close combine1;

use combine2;

read all var{ &err_var } into Mat86;

close combine2;

*** Read in validation data;

use v80;

read all var { &err_var } into X80;

read all var { &true_var } into Y80;

close v80;

use v86;

read all var { &err_var } into X86;

read all var { &true_var } into Y86;

close v86;

use v80;

read all var {id} into id80;

close v80;

use v86;

read all var {id} into id86;

close v86;

/* number of observations */

n80=nrow(X80);

n86=nrow(X86);

/* number of predictors */

k80=ncol(X80);

k86=ncol(X86);

*** Find the location of the variables that are measured with error;

lev = loc({&true_var}^={&err_var}); /*vector of locations of the error variables*/

/* variables with measurement errors */

Y280=Y80[,lev];

Y286=Y86[,lev];

/* number of variables with measurement errors */

k180=ncol(Y280);

k186=ncol(Y286);

/* number of variables without measurement errors */

k280=k80-k180;

k286=k86-k186;

***add column of ones to the matrix of covariates (for model intercept term);

X80=J(n80,1,1) || X80;

X86=J(n86,1,1) || X86;

create designWDat80 from X80 ;

append from X80;

create designWDat86 from X86 ;

append from X86;

*** Weighted Multivariate Regression (validation study regression);

tMat80=t(X80)*X80;

tMat86=t(X86)*X86;

F80=inv(tMat80); /* used to shorten following expressions */

F86=inv(tMat86); /* used to shorten following expressions */

Theta80=F80*t(X80)*Y280; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */

Theta86=F86*t(X86)*Y286; /* Est. of parameter matrix, incl. intercept; dim: (k+1)xk1 */

GEV80=t(Theta80);

GEV86=t(Theta86);

/* hat matrix X(X^TX)^{-1}X^T */

H80=X80*F80*t(X80);

H86=X86*F86*t(X86);

/* residuals */

ERR80 = Y280 - H80*Y280;

ERR86 = Y286 - H86*Y286;

/* estimate of variance-covariance matrix */

p80=ncol(X80);

p86=ncol(X86);

Sigma80 = t(ERR80)*ERR80/(n80-p80);

Sigma86 = t(ERR86)*ERR86/(n86-p86);

create sigma80 from Sigma80;

append from Sigma80;

create sigma86 from Sigma86;

append from Sigma86;

ERR802=id80 || ERR80;

create resi80 from ERR802;

append from ERR802;

ERR862=id86 || ERR86;

create resi86 from ERR862;

append from ERR862;

quit;

data resi80;

set resi80;

rename col1=id;

%do i=2 %to &c1+1;

rename col&i=col&i.80;

%end;

run;

data resi86;

set resi86;

rename col1=id;

%do i=2 %to &c1+1;

rename col&i=col&i.86;

%end;

run;

proc sort data=resi80; by id; run;

proc sort data=resi86; by id; run;

data resi8086;

merge resi80 (in=a) resi86 (in=b);

by id;

if a and b;

run;

data resi8086;

set resi8086;

drop id;

run;

proc iml;

use resi8086;

read all into e8086;

close resi8086;

use sigma80;

read all into sigma80;

close sigma80;

use sigma86;

read all into sigma86;

close sigma86;

e80=e8086[,1:&c1];

e86=e8086[,%eval(&c1+1):%eval(2*&c1)];

n=nrow(e80);

sigma8086 = t(e80)*e86/(n-8);

sigma8086=(sigma8086+t(sigma8086))/2;

use v80;

read all var { &err_var } into X80;

read all var { &true_var } into Y80;

read all var {id} into id80;

close v80;

n80 = nrow(X80);

k = ncol(X80);

k1=nrow(Sigma80);

k2=k-k1;

*print 'k=', k, ' k1=', k1, ' k2=', k2;

use v86;

read all var { &err_var } into X86;

read all var { &true_var } into Y86;

read all var {id} into id86;

close v86;

n86 = nrow(X86);

Hmat=J(n80, n86, 0);

do i = 1 to n80;

do j =1 to n86;

if id80[i]=id86[j] then

do;

Hmat[i,j]=1;

end;

else

do;

Hmat[i,j]=0;

end;

end;

end;

create Sigma8086Dat from Sigma8086;

append from Sigma8086;

covVecE8086=Sigma8086 @ Hmat;

use designWDat80;

read all into designWMat80;

close designWDat80;

use designWDat86;

read all into designWMat86;

close designWDat86;

Dmat=J(k, 1, 0) || I(k);

/* it was ginv() */

tmpt1 = inv(t(designWMat80)*(designWMat80)) * t(designWMat80);

tmpt2 = inv(t(designWMat86)*(designWMat86)) * t(designWMat86);

tmpt1D1=Dmat*tmpt1;

tmpt2D1=Dmat*tmpt2;

tmp=tmpt1D1*Hmat*t(tmpt2D1);

*print 'D*(Wt1^T*Wt1)^{-1}*Wt1^T*H*Wt2*(Wt2^T*Wt2)^{-1}D^T>>>';

*print tmp;

covVecTheta1t1t2=Sigma8086 @ tmp;

*print 'Sigma8086=', Sigma8086;

*print 'covVecTheta1t1t2=', covVecTheta1t1t2;

create covVecTheta1t1t2Dat from covVecTheta1t1t2;

append from covVecTheta1t1t2;

quit;

%mend getCovVecE8086;

/*******************************************************************/

/* get overall estimate betastar over 'yrTotal' time perionds */

%macro getBetaStarAll(yrTotal=, k1=, k2=, wts=);

%let k = %eval(&k1 + &k2);

%getCovXi(yrTotal=&yrTotal, k1=&k1, k2=&k2, covXiDat=covXiDat);

%do ell=1 %to &k;

%getOmegaEll(ell=&ell, yrTotal=&yrTotal, k1=&k1, k2=&k2, covXiDat=covXiDat,

OmegaDat=OmegaDat);

%getBetaStarAllEll(ell=&ell, yrTotal=&yrTotal, OmegaDat=OmegaDat,

betaStarAllDat=tmpDat&ell, varBetaStarAllDat=vartmpDat&ell);

%end;

proc iml;

betaStarAll=J(&k, 1, 0);

varBetaStarAll=J(&k, 1, 0);

%do ell = 1 %to &k;

use tmpDat&ell;

read all into tmpBetaStarAll;

close tmpDat&ell;

use vartmpDat&ell;

read all into tmpVarBetaStarAll;

close vartmpDat&ell;

betaStarAll[&ell]=tmpBetaStarAll;

varBetaStarAll[&ell]=tmpVarBetaStarAll;

%end;

use &wts;

read all var _char_ into NAM;

read all var _num_ into WT;

close &wts;

seb=sqrt(varBetaStarAll);

myOR=exp(WT#betaStarAll);

low=exp(WT#(betaStarAll-1.96*seb));

upp=exp(WT#(betaStarAll+1.96*seb));

pval=1-probchi((betaStarAll/seb)##2,1);

resMat=WT||betaStarAll||seb||myOR||low||upp||pval;

options nocenter;

print resMat[rowname=NAM colname={"WT","beta", "se", "HR", "low", "upp", "pval"} format=8.5];

quit;

%mend getBetaStarAll;

/* get overall estimate of the ell-th effect over 'yrTotal' time periods */

%macro getBetaStarAllEll(ell=, yrTotal=, OmegaDat=, betaStarAllDat=, varBetaStarAllDat=);

proc iml;

use &OmegaDat;

read all into Omega;

close &OmegaDat;

oneVec=J(&yrTotal, 1, 1);

iOmega=inv(Omega); /* it was ginv() */

denom=t(oneVec)*iOmega*oneVec;

wVec=iOmega*oneVec/denom;

tmpBetaStarAll = 0;

%do i=1 %to &yrTotal;

use betaStarDat&i;

read all into betaStarVeci;

close betaStarDat&i;

tmpBetaStarAll=tmpBetaStarAll+wVec[&i]*betaStarVeci[&ell];

%end;

tmpVarBetaStarAll=1/denom;

create &betaStarAllDat from tmpBetaStarAll;

append from tmpBetaStarAll;

create &varBetaStarAllDat from tmpVarBetaStarAll;

append from tmpVarBetaStarAll;

quit;

%mend getBetaStarAllEll;

%macro getOmegaEll(ell=, yrTotal=, k1=, k2=, covXiDat=, OmegaDat=);

proc iml;

use &covXiDat;

read all into covXi;

close &covXiDat;

/* get partial h(xi) / partial xi */

%let k = %eval(&k1 + &k2);

%let kk1 = %eval((&k1)*(&k));

%let tmpnrUp = %eval((&kk1)*(&yrTotal));

%let tmpnrDown = %eval((&k)*(&yrTotal));

upMat= J(&tmpnrUp, &yrTotal,0);

downMat= J(&tmpnrDown, &yrTotal,0);

%do i=1 %to &yrTotal;

/* A=Lambda^{-1} */

/* A1=first k1 columns of A */

use ADat&i;

read all into Ai;

close ADat&i;

use A1Dat&i;

read all into A1i;

close A1Dat&i;

/* read eta=beta^T */

use betaDat&i;

read all into etaVeci;

close betaDat&i;

%let startUp=%eval((&i-1)*(&kk1)+1);

%let endUp=%eval((&i)*(&kk1));

upMat[&startUp:&endUp, &i]= - (t(A1i) * etaVeci) @ (Ai[,&ell]);

%let startDown=%eval((&i-1)*(&k)+1);

%let endDown=%eval((&i)*(&k));

downMat[&startDown:&endDown, &i]=(Ai[,&ell]);

%end;

/* stack upMat and downMat */

dhdxi=upMat//downMat;

Omega=t(dhdxi) * (covXi) * dhdxi;

create &OmegaDat from Omega;

append from Omega;

quit;

%mend getOmegaEll;

/* get cov(Xi) */

%macro getCovXi(yrTotal=, k1=, k2=, covXiDat=);

proc iml;

%let k = %eval(&k1 + &k2);

%let kk1 = %eval((&k) * (&k1));

%let tmpnrUp = %eval((&kk1)*(&yrTotal));

%let tmpnrDown = %eval((&k)*(&yrTotal));

%let nr=%eval(&tmpnrUp+&tmpnrDown);

covXi = J((&nr), (&nr), 0);

%do i=1 %to &yrTotal;

/* read cov(Vec(Theta1)) */

use covVecTheta1Dat&i;

read all into covVecTheta1Mati;

close covVecTheta1Dat&i;

/* read cov(eta) */

use covBetaDat&i;

read all into covBetaMati;

close covBetaDat&i;

%let startUp=%eval((&i-1)*(&kk1)+1);

%let endUp=%eval((&i)*(&kk1));

covXi[&startUp:&endUp, &startup:&endUp]=covVecTheta1Mati;

%let startDown=%eval(&tmpnrUp+(&i-1)*(&k)+1);

%let endDown=%eval(&tmpnrUp+(&i)*(&k));

covXi[&startDown:&endDown, &startDown:&endDown]=covBetaMati;

%do j = %eval(&i)+1 %to &yrTotal;

use covVecTheta1t1t2Dat;

read all into covVecTheta1t1t2;

close covVecTheta1t1t2Dat;

%let startUp2=%eval((&j-1)*(&kk1)+1);

%let endUp2=%eval((&j)*(&kk1));

covXi[&startUp:&endUp, &startup2:&endUp2]=covVecTheta1t1t2;

covXi[&startUp2:&endUp2, &startup:&endUp]=t(covVecTheta1t1t2);

%end;

%end;

create &covXiDat from covXi;

append from covXi;

quit;

%mend getCovXi;