Here returns to the code page.
Bissantz, Chown and Dette (2016+) investigate a smooth bootstrap approach to bandwidth selection for an indirect regression and the empirical process of regression residuals from this model.
R code:
###############################################################################
## Simulation to demonstrate the root-n consistency of Fhat via the MSE and ##
## IMSE for finite samples. The smooth bootstrap approach for parameter ##
## selection is implemented. ##
###############################################################################
## Required libraries
require(tcltk)
## Set the correct working directory.
setwd("~/Documents/Published/irerrdf/simulation/")
## Custom functions.
## First case regression function: hill shaped.
reg1 = function(xs) {
return( 6 / 5 + sqrt(2) * cos( 2 * pi * xs )
+ sqrt(2) * cos( 4 * pi * xs ) / 4 )
}
## Second case regression function: wave shaped.
reg2 = function(xs) {
return( 3 / 2 + sqrt(2) * cos( 2 * pi * xs ) / 2
- sqrt(2) * cos( 4 * pi * xs ) / 8
- 4 * sqrt(2) * cos( 6 * pi * xs ) / 3 )
}
## Fourier coefficients of psi.
psi = function(ks) {
return( ( 1 - ( -1 ) ^ ( abs(ks) ) * exp( -5 ) ) /
( 1 + 4 * pi ^ 2 * ks ^ 2 / 10 ^ 2 ) /
( 1 - exp( -5 ) ) )
}
## Fourier basis transformation.
B_trans = function(xs, ks) {
## n-by-p matrix of transformed covariates.
Kx = matrix(xs, ncol=1) %*% matrix(ks, nrow=1)
## Return the Fourier basis covariates.
return( cos( 2 * pi * Kx ) + 1i * sin( 2 * pi * Kx ) )
}
## Distorted regression function, first case.
K_reg1 = function(xs) {
## Fourier frequencies to threshold 2.
freqs = -2 : 2
## Fourier coefficients of psi to threshold 2.
Psi = psi(freqs)
## Fourier basis.
B = B_trans(xs, freqs)
## DFT of reg1.
theta = t( Conj(B) ) %*% matrix(reg1(xs), nrow=length(xs)) / length(xs)
## Distorted Fourier coefficients.
Dk = theta * Psi
## Return distorted regression values.
return( Re( B %*% Dk ) )
}
## Distorted regression function, second case.
K_reg2 = function(xs) {
## Fourier frequencies to threshold 3.
freqs = -3 : 3
## Fourier coefficients of psi to threshold 3.
Psi = psi(freqs)
## Fourier basis.
B = B_trans(xs, freqs)
## DFT of reg2.
theta = t( Conj(B) ) %*% matrix(reg2(xs), nrow=length(xs)) / length(xs)
## Distorted Fourier coefficients.
Dk = theta * Psi
## Return distorted regression values.
return( Re( B %*% Dk ) )
}
## Indirect regression estimator.
htheta = function(xs, xdata, ydata, reg_parm) {
## When reg_parm = 0 return the mean of ydata for each xs.
if( reg_parm == 0 ) { return( rep(mean(ydata), length.out=length(xs)) ) }
else {
## Sample size.
nsamp = length(ydata)
## Fourier frequencies to threshold reg_parm.
freqs = -reg_parm : reg_parm
## Transformed covariates.
B = B_trans(xdata, freqs)
## Transformed predictors.
B_xs = B_trans(xs, freqs)
## Fourier coefficients of psi to threshold reg_parm.
Psi = psi(freqs)
## Diagonal matrix of inverse Fourier coefficients.
Psi_inv = diag( Conj(Psi) / Re( Psi * Conj(Psi) ) )
## Deconvolution operator.
P = B_xs %*% Psi_inv %*% t( Conj(B) ) / nsamp
## Deconvolution regression estimator at xs.
return( Re( P %*% matrix(ydata, nrow=nsamp) ) )
}
}
## Distorted regression estimator.
hKtheta = function(xs, xdata, ydata, reg_parm) {
## When reg_parm = 0 return the mean of ydata for each xs.
if( reg_parm == 0 ) { return( rep(mean(ydata), length.out=length(xs)) ) }
else {
## Sample size.
nsamp = length(ydata)
## Fourier frequencies to threshold reg_parm.
freqs = -reg_parm : reg_parm
## Transformed covariates.
B = B_trans(xdata, freqs)
## Transformed predictors.
B_xs = B_trans(xs, freqs)
## Prediction operator.
P = B_xs %*% t( Conj(B) ) / nsamp
## Regression estimator at xs.
return( Re( P %*% matrix(ydata, nrow=nsamp) ) )
}
}
## Estimated ISE between htheta_0 and a bootstrap version htheta_boot.
ISE_htheta_b = function(b, reg_parm, es, fits, htheta_0, xdata) {
## Sample size.
nsamp = length(es)
## Resample residual indices.
boot_ind = sample(1 : nsamp, replace=TRUE)
## Resampled zero mean residuals.
es_boot = ( es - mean(es) )[boot_ind]
## Smooth bootstrap residuals.
es_smboot = es_boot +
1.06 * sd(es) * nsamp ^ ( -1 / 5 ) * rnorm(nsamp)
## Smooth bootstrap responses.
ydata_boot = fits + es_smboot
## Bootstrap version of htheta.
htheta_boot = htheta(xdata, xdata, ydata_boot, reg_parm)
## Return the estimated ISE based on design points.
return( crossprod( htheta_0 - htheta_boot ) / nsamp )
}
## Estimated IMSE between htheta_0 and a bootstrap version.
IMSE_htheta = function(reg_parm, es, fits, htheta_0, xdata) {
## Simulate ISE based on smooth bootstrapping.
ISE_htheta = sapply(1 : 200, ISE_htheta_b,
reg_parm, es, fits, htheta_0, xdata)
## Return the average of the bootstrap ISE estimates.
return( mean(ISE_htheta) )
}
## Bootstrap regularization parameter selector.
boot_reg_parm = function(xdata, ydata, pilot_parm) {
## Initial estimate based on pilot bandwidth.
htheta_0 = htheta(xdata, xdata, ydata, pilot_parm)
## Fitted values.
fits = hKtheta(xdata, xdata, ydata, pilot_parm)
## Residuals.
res = ydata - fits
## Maximum Fourier frequency threshold.
reg_max = 5 * ceiling( length(ydata) ^ ( 1 / 10 ) )
## Grid of regularization thresholds.
grid = 0 : reg_max
## Compute the IMSE for each threshold in the grid.
IMSE = sapply(grid, IMSE_htheta, res, fits, htheta_0, xdata)
## Return the threshold that minimizes IMSE.
return( grid[ which.min(IMSE) ] )
}
## Estimated ISE with respect to true theta_0.
ISE_htheta = function(reg_parm, xdata, ydata, theta_0) {
## Deconvolution fits.
htheta_fits = htheta(xdata, xdata, ydata, reg_parm)
## Return the estimated ISE.
return( crossprod( theta_0 - htheta_fits ) / length(ydata) )
}
## L2 bandwidth selector.
L2_reg_parm = function(xdata, ydata, theta_0) {
## Maximum Fourier frequency threshold.
reg_max = 5 * ceiling( length(ydata) ^ ( 1 / 10 ) )
## Grid of regularization thresholds.
grid = 0 : reg_max
## Calculate ISE for each threshold in the grid.
ISE = sapply(grid, ISE_htheta, xdata, ydata, theta_0)
## Return the threshold that minimizes ISE.
return( grid[ which.min(ISE) ] )
}
## Computes Fhat.
Fhat = function(us, res) {
return( sapply(us, function(u, es) { return( mean( es <= u ) ) }, res) )
}
## Computes the influence function for Fhat in the normal model.
b_norm = function(es, us) {
return( sapply(us, function(u, es) {
return( 1 * ( es <= u ) - pnorm(u, sd=sig0) +
dnorm(u, sd=sig0) * es ) }, es) )
}
## Computes the influence function for Fhat in the t model.
b_t = function(es, us) {
return( sapply(us, function(u, es) {
return( 1 * ( es <= u ) - pt(u / trscl, df=tdf) +
es * dt(u / trscl, df=tdf) / trscl ) }, es) )
}
## Computes ( Fhat - F ) ^ 2 for the normal model.
diff_sq_norm = function(us, res) {
return( ( Fhat(us, res) - pnorm(us, sd=sig0) ) ^ 2 )
}
## Computes ( Fhat - F ) ^ 2 for the t model.
diff_sq_t = function(us, res) {
return( ( Fhat(us, res) - pt(us / trscl, df=tdf) ) ^ 2 )
}
## Simulation.
## Number of simulation runs.
runs = 1000
## Sample sizes 51, 101, 201 and 301.
samples = c(25, 50, 100, 150)
## Error standard deviation for normal model.
sig0 = 2 / 3
## Degrees of freedom for t model.
tdf = 4
## Rescaling of t errors to have scale of sig0.
trscl = sig0 * sqrt( ( tdf - 2 ) / tdf )
## u values to check pointwise inflated MSE.
useq = c(-2, -1, 0, 1, 2)
## Bootstrap selected parameters.
boot_parm_norm_reg1 = array(NA, c(length(samples), runs))
boot_parm_norm_reg2 = array(NA, c(length(samples), runs))
boot_parm_t_reg1 = array(NA, c(length(samples), runs))
boot_parm_t_reg2 = array(NA, c(length(samples), runs))
## ISE minimizing parameters.
L2_parm_norm_reg1 = array(NA, c(length(samples), runs))
L2_parm_norm_reg2 = array(NA, c(length(samples), runs))
L2_parm_t_reg1 = array(NA, c(length(samples), runs))
L2_parm_t_reg2 = array(NA, c(length(samples), runs))
## ISE values.
htheta_boot_parm_ISE_norm_reg1 = rep(NA, runs)
htheta_L2_parm_ISE_norm_reg1 = rep(NA, runs)
htheta_boot_parm_ISE_norm_reg2 = rep(NA, runs)
htheta_L2_parm_ISE_norm_reg2 = rep(NA, runs)
htheta_boot_parm_ISE_t_reg1 = rep(NA, runs)
htheta_L2_parm_ISE_t_reg1 = rep(NA, runs)
htheta_boot_parm_ISE_t_reg2 = rep(NA, runs)
htheta_L2_parm_ISE_t_reg2 = rep(NA, runs)
## IMSE values.
htheta_boot_parm_IMSE_norm_reg1 = rep(NA, length(samples))
htheta_L2_parm_IMSE_norm_reg1 = rep(NA, length(samples))
htheta_boot_parm_IMSE_norm_reg2 = rep(NA, length(samples))
htheta_L2_parm_IMSE_norm_reg2 = rep(NA, length(samples))
htheta_boot_parm_IMSE_t_reg1 = rep(NA, length(samples))
htheta_L2_parm_IMSE_t_reg1 = rep(NA, length(samples))
htheta_boot_parm_IMSE_t_reg2 = rep(NA, length(samples))
htheta_L2_parm_IMSE_t_reg2 = rep(NA, length(samples))
## Fhat - F values.
Fhat_diff_norm_reg1 = array(NA, c(runs, length(useq)))
Fhat_diff_norm_reg2 = array(NA, c(runs, length(useq)))
Fhat_diff_t_reg1 = array(NA, c(runs, length(useq)))
Fhat_diff_t_reg2 = array(NA, c(runs, length(useq)))
## Fhat inflated bias values.
Fhat_ibias_norm_reg1 = array(NA, c(length(samples), length(useq)))
Fhat_ibias_norm_reg2 = array(NA, c(length(samples), length(useq)))
Fhat_ibias_t_reg1 = array(NA, c(length(samples), length(useq)))
Fhat_ibias_t_reg2 = array(NA, c(length(samples), length(useq)))
## Fhat inflated variance values.
Fhat_ivar_norm_reg1 = array(NA, c(length(samples), length(useq)))
Fhat_ivar_norm_reg2 = array(NA, c(length(samples), length(useq)))
Fhat_ivar_t_reg1 = array(NA, c(length(samples), length(useq)))
Fhat_ivar_t_reg2 = array(NA, c(length(samples), length(useq)))
## Fhat inflated MSE values.
Fhat_iMSE_norm_reg1 = array(NA, c(length(samples), length(useq)))
Fhat_iMSE_norm_reg2 = array(NA, c(length(samples), length(useq)))
Fhat_iMSE_t_reg1 = array(NA, c(length(samples), length(useq)))
Fhat_iMSE_t_reg2 = array(NA, c(length(samples), length(useq)))
## Fhat ISE values.
Fhat_ISE_norm_reg1 = rep(NA, runs)
Fhat_ISE_norm_reg2 = rep(NA, runs)
Fhat_ISE_t_reg1 = rep(NA, runs)
Fhat_ISE_t_reg2 = rep(NA, runs)
## Fhat inflated IMSE values
Fhat_iIMSE_norm_reg1 = rep(NA, length(samples))
Fhat_iIMSE_norm_reg2 = rep(NA, length(samples))
Fhat_iIMSE_t_reg1 = rep(NA, length(samples))
Fhat_iIMSE_t_reg2 = rep(NA, length(samples))
## Open a progress bar.
pb = tkProgressBar(title="Indirect regression simulation", min=0, max=runs)
## Simulation
for(n in samples) {
## Sample size.
nsamp = 2 * n + 1
## Index of sample size.
n_ind = which( samples == n )
## Fill progress bar with current sample size information.
setTkProgressBar(pb, 0, label=paste0("Sample size: ", nsamp))
## Initial time difference is zero.
avg_time_diff = 0
for(s in 1 : runs) {
## Start a computing timer.
init_time = proc.time()[3]
## Fixed covariates in [-1 / 2, 1 / 2].
xn = ( -n : n ) / ( 2 * n )
## Pilot parameter for reg1.
pilot_parm_reg1 = ceiling( 1.5 * ( nsamp / log(nsamp) ) ^ ( 1 / 11 ) )
## Pilot parameter for reg2.
pilot_parm_reg2 = ceiling( 2 * ( nsamp / log(nsamp) ) ^ ( 1 / 11 ) )
## Indirect regression values.
reg1_vals = reg1(xn)
reg2_vals = reg2(xn)
## Distorted regression values.
K_reg1_vals = K_reg1(xn)
K_reg2_vals = K_reg2(xn)
## Model responses.
Y_norm_reg1 = K_reg1_vals + rnorm(nsamp, sd=sig0)
Y_norm_reg2 = K_reg2_vals + rnorm(nsamp, sd=sig0)
Y_t_reg1 = K_reg1_vals + trscl * rt(nsamp, df=tdf)
Y_t_reg2 = K_reg2_vals + trscl * rt(nsamp, df=tdf)
## Bootstrap selected parameters.
boot_parm_norm_reg1[n_ind, s] =
boot_reg_parm(xn, Y_norm_reg1, pilot_parm_reg1)
boot_parm_norm_reg2[n_ind, s] =
boot_reg_parm(xn, Y_norm_reg2, pilot_parm_reg2)
boot_parm_t_reg1[n_ind, s] =
boot_reg_parm(xn, Y_t_reg1, pilot_parm_reg1)
boot_parm_t_reg2[n_ind, s] =
boot_reg_parm(xn, Y_t_reg2, pilot_parm_reg2)
## L2 selected parameters.
L2_parm_norm_reg1[n_ind, s] =
L2_reg_parm(xn, Y_norm_reg1, reg1_vals)
L2_parm_norm_reg2[n_ind, s] =
L2_reg_parm(xn, Y_norm_reg2, reg2_vals)
L2_parm_t_reg1[n_ind, s] =
L2_reg_parm(xn, Y_t_reg1, reg1_vals)
L2_parm_t_reg2[n_ind, s] =
L2_reg_parm(xn, Y_t_reg2, reg2_vals)
## Deconvolution regression fits.
deconv_fits_boot_parm_norm_reg1 =
htheta(xn, xn, Y_norm_reg1, boot_parm_norm_reg1[n_ind, s])
deconv_fits_L2_parm_norm_reg1 =
htheta(xn, xn, Y_norm_reg1, L2_parm_norm_reg1[n_ind, s])
deconv_fits_boot_parm_norm_reg2 =
htheta(xn, xn, Y_norm_reg2, boot_parm_norm_reg2[n_ind, s])
deconv_fits_L2_parm_norm_reg2 =
htheta(xn, xn, Y_norm_reg2, L2_parm_norm_reg2[n_ind, s])
deconv_fits_boot_parm_t_reg1 =
htheta(xn, xn, Y_t_reg1, boot_parm_t_reg1[n_ind, s])
deconv_fits_L2_parm_t_reg1 =
htheta(xn, xn, Y_t_reg1, L2_parm_t_reg1[n_ind, s])
deconv_fits_boot_parm_t_reg2 =
htheta(xn, xn, Y_t_reg2, boot_parm_t_reg2[n_ind, s])
deconv_fits_L2_parm_t_reg2 =
htheta(xn, xn, Y_t_reg2, L2_parm_t_reg2[n_ind, s])
## ISE estimates.
htheta_boot_parm_ISE_norm_reg1[s] =
crossprod( deconv_fits_boot_parm_norm_reg1 - reg1_vals ) / nsamp
htheta_L2_parm_ISE_norm_reg1[s] =
crossprod( deconv_fits_L2_parm_norm_reg1 - reg1_vals ) / nsamp
htheta_boot_parm_ISE_norm_reg2[s] =
crossprod( deconv_fits_boot_parm_norm_reg2 - reg2_vals ) / nsamp
htheta_L2_parm_ISE_norm_reg2[s] =
crossprod( deconv_fits_L2_parm_norm_reg2 - reg2_vals ) / nsamp
htheta_boot_parm_ISE_t_reg1[s] =
crossprod( deconv_fits_boot_parm_t_reg1 - reg1_vals ) / nsamp
htheta_L2_parm_ISE_t_reg1[s] =
crossprod( deconv_fits_L2_parm_t_reg1 - reg1_vals ) / nsamp
htheta_boot_parm_ISE_t_reg2[s] =
crossprod( deconv_fits_boot_parm_t_reg2 - reg2_vals ) / nsamp
htheta_L2_parm_ISE_t_reg2[s] =
crossprod( deconv_fits_L2_parm_t_reg2 - reg2_vals ) / nsamp
## Fitted values.
fits_norm_reg1 =
hKtheta(xn, xn, Y_norm_reg1, boot_parm_norm_reg1[n_ind, s])
fits_norm_reg2 =
hKtheta(xn, xn, Y_norm_reg2, boot_parm_norm_reg2[n_ind, s])
fits_t_reg1 = hKtheta(xn, xn, Y_t_reg1, boot_parm_t_reg1[n_ind, s])
fits_t_reg2 = hKtheta(xn, xn, Y_t_reg2, boot_parm_t_reg2[n_ind, s])
## Model residuals.
res_norm_reg1 = Y_norm_reg1 - fits_norm_reg1
res_norm_reg2 = Y_norm_reg2 - fits_norm_reg2
res_t_reg1 = Y_t_reg1 - fits_t_reg1
res_t_reg2 = Y_t_reg2 - fits_t_reg2
## Fhat pointwise differences.
Fhat_diff_norm_reg1[s, ] =
Fhat(useq, res_norm_reg1) - pnorm(useq, sd=sig0)
Fhat_diff_norm_reg2[s, ] =
Fhat(useq, res_norm_reg2) - pnorm(useq, sd=sig0)
Fhat_diff_t_reg1[s, ] =
Fhat(useq, res_t_reg1) - pt(useq / trscl, df=tdf)
Fhat_diff_t_reg2[s, ] =
Fhat(useq, res_t_reg2) - pt(useq / trscl, df=tdf)
## Fhat ISE values.
Fhat_ISE_norm_reg1[s] =
integrate(diff_sq_norm, -Inf, Inf, res_norm_reg1,
stop.on.error=FALSE)$value
Fhat_ISE_norm_reg2[s] =
integrate(diff_sq_norm, -Inf, Inf, res_norm_reg2,
stop.on.error=FALSE)$value
Fhat_ISE_t_reg1[s] =
integrate(diff_sq_t, -Inf, Inf, res_t_reg1,
stop.on.error=FALSE)$value
Fhat_ISE_t_reg2[s] =
integrate(diff_sq_t, -Inf, Inf, res_t_reg2,
stop.on.error=FALSE)$value
## One step corrected mean estimate of computing time.
avg_time_diff = ( ( s - 1 ) * avg_time_diff +
proc.time()[3] - init_time ) / s
## Estimated time to finish (in hours).
ETF = round(avg_time_diff * ( runs - s ) / 3600, digits=1)
## Updated progress bar information.
pb_msg = paste0("Sample size: ", nsamp, " (", ETF, " hour(s) to go)")
## Update progress bar.
setTkProgressBar(pb, s, label=pb_msg)
}
## htheta IMSE values.
htheta_boot_parm_IMSE_norm_reg1[n_ind] =
mean(htheta_boot_parm_ISE_norm_reg1)
htheta_L2_parm_IMSE_norm_reg1[n_ind] =
mean(htheta_L2_parm_ISE_norm_reg1)
htheta_boot_parm_IMSE_norm_reg2[n_ind] =
mean(htheta_boot_parm_ISE_norm_reg2)
htheta_L2_parm_IMSE_norm_reg2[n_ind] =
mean(htheta_L2_parm_ISE_norm_reg2)
htheta_boot_parm_IMSE_t_reg1[n_ind] =
mean(htheta_boot_parm_ISE_t_reg1)
htheta_L2_parm_IMSE_t_reg1[n_ind] =
mean(htheta_L2_parm_ISE_t_reg1)
htheta_boot_parm_IMSE_t_reg2[n_ind] =
mean(htheta_boot_parm_ISE_t_reg2)
htheta_L2_parm_IMSE_t_reg2[n_ind] =
mean(htheta_L2_parm_ISE_t_reg2)
## Fhat inflated bias.
Fhat_ibias_norm_reg1[n_ind, ] =
sqrt(nsamp) * apply(Fhat_diff_norm_reg1, 2, mean)
Fhat_ibias_norm_reg2[n_ind, ] =
sqrt(nsamp) * apply(Fhat_diff_norm_reg2, 2, mean)
Fhat_ibias_t_reg1[n_ind, ] =
sqrt(nsamp) * apply(Fhat_diff_t_reg1, 2, mean)
Fhat_ibias_t_reg2[n_ind, ] =
sqrt(nsamp) * apply(Fhat_diff_t_reg2, 2, mean)
## Fhat inflated variance.
Fhat_ivar_norm_reg1[n_ind, ] = nsamp * apply(Fhat_diff_norm_reg1, 2, var)
Fhat_ivar_norm_reg2[n_ind, ] = nsamp * apply(Fhat_diff_norm_reg2, 2, var)
Fhat_ivar_t_reg1[n_ind, ] = nsamp * apply(Fhat_diff_t_reg1, 2, var)
Fhat_ivar_t_reg2[n_ind, ] = nsamp * apply(Fhat_diff_t_reg2, 2, var)
## Fhat inflated MSE.
Fhat_iMSE_norm_reg1[n_ind, ] =
Fhat_ibias_norm_reg1[n_ind, ] ^ 2 + Fhat_ivar_norm_reg1[n_ind, ]
Fhat_iMSE_norm_reg2[n_ind, ] =
Fhat_ibias_norm_reg2[n_ind, ] ^ 2 + Fhat_ivar_norm_reg2[n_ind, ]
Fhat_iMSE_t_reg1[n_ind, ] =
Fhat_ibias_t_reg1[n_ind, ] ^ 2 + Fhat_ivar_t_reg1[n_ind, ]
Fhat_iMSE_t_reg2[n_ind, ] =
Fhat_ibias_t_reg2[n_ind, ] ^ 2 + Fhat_ivar_t_reg2[n_ind, ]
## Fhat inflated IMSE.
Fhat_iIMSE_norm_reg1[n_ind] = nsamp * mean(Fhat_ISE_norm_reg1)
Fhat_iIMSE_norm_reg2[n_ind] = nsamp * mean(Fhat_ISE_norm_reg2)
Fhat_iIMSE_t_reg1[n_ind] = nsamp * mean(Fhat_ISE_t_reg1)
Fhat_iIMSE_t_reg2[n_ind] = nsamp * mean(Fhat_ISE_t_reg2)
}
## Close the progress bar.
close(pb)
## Simulation results.
## htheta IMSE results.
htheta_boot_parm_IMSE_norm_reg1_results =
t( cbind(2 * samples + 1, htheta_boot_parm_IMSE_norm_reg1) )
htheta_L2_parm_IMSE_norm_reg1_results =
t( cbind(2 * samples + 1, htheta_L2_parm_IMSE_norm_reg1) )
htheta_boot_parm_IMSE_norm_reg2_results =
t( cbind(2 * samples + 1, htheta_boot_parm_IMSE_norm_reg2) )
htheta_L2_parm_IMSE_norm_reg2_results =
t( cbind(2 * samples + 1, htheta_L2_parm_IMSE_norm_reg2) )
htheta_boot_parm_IMSE_t_reg1_results =
t( cbind(2 * samples + 1, htheta_boot_parm_IMSE_t_reg1) )
htheta_L2_parm_IMSE_t_reg1_results =
t( cbind(2 * samples + 1, htheta_L2_parm_IMSE_t_reg1) )
htheta_boot_parm_IMSE_t_reg2_results =
t( cbind(2 * samples + 1, htheta_boot_parm_IMSE_t_reg2) )
htheta_L2_parm_IMSE_t_reg2_results =
t( cbind(2 * samples + 1, htheta_L2_parm_IMSE_t_reg2) )
## True inflated MSE values for Fhat.
true_iMSE_norm = sapply(useq, function(u) {
return( integrate(function(v, u) {
return( b_norm(v, u) ^ 2 * dnorm(v, sd=sig0) ) }, -Inf, Inf, u,
stop.on.error=FALSE)$value ) } )
true_iMSE_t = sapply(useq, function(u) {
return( integrate(function(v, u) {
return( b_t(v, u) ^ 2 * ( dt(v / trscl, df=tdf) / trscl ) ) },
-Inf, Inf, u, stop.on.error=FALSE)$value ) } )
## True inflated MISE values for Fhat.
true_iIMSE_norm = sum( sapply(seq(-4, 4, by=0.0008), function(u) {
return( integrate(function(v, u) {
return( b_norm(v, u) ^ 2 * dnorm(v, sd=sig0) ) }, -Inf, Inf, u,
stop.on.error=FALSE)$value ) } ) ) * 0.0008
true_iIMSE_t = sum( sapply(seq(-16, 16, by=0.0008), function(u) {
return( integrate(function(v, u) {
return( b_t(v, u) ^ 2 * ( dt(v / trscl, df=tdf) / trscl ) ) },
-Inf, Inf, u, stop.on.error=FALSE)$value ) } ) ) * 0.0008
## Fhat inflated bias results.
Fhat_ibias_norm_reg1_results =
t( cbind(2 * samples + 1, Fhat_ibias_norm_reg1) )
Fhat_ibias_norm_reg2_results =
t( cbind(2 * samples + 1, Fhat_ibias_norm_reg2) )
Fhat_ibias_t_reg1_results =
t( cbind(2 * samples + 1, Fhat_ibias_t_reg1) )
Fhat_ibias_t_reg2_results =
t( cbind(2 * samples + 1, Fhat_ibias_t_reg2) )
## Fhat inflated variance results.
Fhat_ivar_norm_reg1_results = t( cbind(2 * samples + 1, Fhat_ivar_norm_reg1) )
Fhat_ivar_norm_reg2_results = t( cbind(2 * samples + 1, Fhat_ivar_norm_reg2) )
Fhat_ivar_t_reg1_results = t( cbind(2 * samples + 1, Fhat_ivar_t_reg1) )
Fhat_ivar_t_reg2_results = t( cbind(2 * samples + 1, Fhat_ivar_t_reg2) )
## Fhat inflated MSE results.
Fhat_iMSE_norm_reg1_results = t( cbind(c(2 * samples + 1, Inf),
rbind(Fhat_iMSE_norm_reg1, true_iMSE_norm)) )
Fhat_iMSE_norm_reg2_results = t( cbind(c(2 * samples + 1, Inf),
rbind(Fhat_iMSE_norm_reg2, true_iMSE_norm)) )
Fhat_iMSE_t_reg1_results = t( cbind(c(2 * samples + 1, Inf),
rbind(Fhat_iMSE_t_reg1, true_iMSE_t)) )
Fhat_iMSE_t_reg2_results = t( cbind(c(2 * samples + 1, Inf),
rbind(Fhat_iMSE_t_reg2, true_iMSE_t)) )
## Fhat inflated IMSE results.
Fhat_iIMSE_norm_reg1_results = t( cbind(c(2 * samples + 1, Inf),
c(Fhat_iIMSE_norm_reg1, true_iIMSE_norm)) )
Fhat_iIMSE_norm_reg2_results = t( cbind(c(2 * samples + 1, Inf),
c(Fhat_iIMSE_norm_reg2, true_iIMSE_norm)) )
Fhat_iIMSE_t_reg1_results = t( cbind(c(2 * samples + 1, Inf),
c(Fhat_iIMSE_t_reg1, true_iIMSE_t)) )
Fhat_iIMSE_t_reg2_results = t( cbind(c(2 * samples + 1, Inf),
c(Fhat_iIMSE_t_reg2, true_iIMSE_t)) )
## Display simulation summary tables.
round(htheta_boot_parm_IMSE_norm_reg1_results, digits=3)
round(htheta_L2_parm_IMSE_norm_reg1_results, digits=3)
round(htheta_boot_parm_IMSE_norm_reg2_results, digits=3)
round(htheta_L2_parm_IMSE_norm_reg2_results, digits=3)
round(htheta_boot_parm_IMSE_t_reg1_results, digits=3)
round(htheta_L2_parm_IMSE_t_reg1_results, digits=3)
round(htheta_boot_parm_IMSE_t_reg2_results, digits=3)
round(htheta_L2_parm_IMSE_t_reg2_results, digits=3)
round(Fhat_ibias_norm_reg1_results, digits=3)
round(Fhat_ibias_norm_reg2_results, digits=3)
round(Fhat_ibias_t_reg1_results, digits=3)
round(Fhat_ibias_t_reg2_results, digits=3)
round(Fhat_ivar_norm_reg1_results, digits=3)
round(Fhat_ivar_norm_reg2_results, digits=3)
round(Fhat_ivar_t_reg1_results, digits=3)
round(Fhat_ivar_t_reg2_results, digits=3)
round(Fhat_iMSE_norm_reg1_results, digits=3)
round(Fhat_iMSE_norm_reg2_results, digits=3)
round(Fhat_iMSE_t_reg1_results, digits=3)
round(Fhat_iMSE_t_reg2_results, digits=3)
round(Fhat_iIMSE_norm_reg1_results, digits=3)
round(Fhat_iIMSE_norm_reg2_results, digits=3)
round(Fhat_iIMSE_t_reg1_results, digits=3)
round(Fhat_iIMSE_t_reg2_results, digits=3)
## Save results to a file.
save(boot_parm_norm_reg1, L2_parm_norm_reg1,
boot_parm_norm_reg2, L2_parm_norm_reg2,
boot_parm_t_reg1, L2_parm_t_reg1,
boot_parm_t_reg2, L2_parm_t_reg2,
htheta_boot_parm_IMSE_norm_reg1_results,
htheta_L2_parm_IMSE_norm_reg1_results,
htheta_boot_parm_IMSE_norm_reg2_results,
htheta_L2_parm_IMSE_norm_reg2_results,
htheta_boot_parm_IMSE_t_reg1_results,
htheta_L2_parm_IMSE_t_reg1_results,
htheta_boot_parm_IMSE_t_reg2_results,
htheta_L2_parm_IMSE_t_reg2_results,
Fhat_ibias_norm_reg1_results, Fhat_ibias_norm_reg2_results,
Fhat_ibias_t_reg1_results, Fhat_ibias_t_reg2_results,
Fhat_ivar_norm_reg1_results, Fhat_ivar_norm_reg2_results,
Fhat_ivar_t_reg1_results, Fhat_ivar_t_reg2_results,
Fhat_iMSE_norm_reg1_results, Fhat_iMSE_norm_reg2_results,
Fhat_iMSE_t_reg1_results, Fhat_iMSE_t_reg2_results,
Fhat_iIMSE_norm_reg1_results, Fhat_iIMSE_norm_reg2_results,
Fhat_iIMSE_t_reg1_results, Fhat_iIMSE_t_reg2_results,
file = "irerrdf_results.RData")
Python code:
###############################################################################
## Simulation to demonstrate the root-n consistency of Fhat via the MSE and ##
## IMSE for finite samples. The smooth bootstrap approach for parameter ##
## selection is implemented. ##
###############################################################################
## Required libraries.
import numpy as np
import scipy as sp
from scipy.stats import uniform
from scipy.stats import norm
from scipy.stats import t
from progress.bar import Bar
from tabulate import tabulate
## Progress bar class.
class FancyBar(Bar):
suffix='%(percent)d%% - %(ETF).1fh to go.'
@property
def ETF(self):
return self.eta / 3600.0
## Custom functions.
## First case regression function: hill shaped.
def reg1(xs):
return( 6.0 / 5.0 + np.sqrt(2.0) * np.cos( 2.0 * np.pi * xs )
+ np.sqrt(2.0) * np.cos( 4.0 * np.pi * xs ) / 4.0 )
## Second case regression function: wave shaped.
def reg2(xs):
return( 3.0 / 2.0 + np.sqrt(2.0) * np.cos( 2.0 * np.pi * xs ) / 2.0
- np.sqrt(2) * np.cos( 4.0 * np.pi * xs ) / 8.0
- 4.0 * np.sqrt(2) * np.cos( 6.0 * np.pi * xs ) / 3.0 )
## Fourier coefficients of psi.
def psi(ks):
return( ( 1.0 - ( -1.0 ) ** ( np.abs(ks) ) * np.exp( -5.0 ) )
/ ( 1.0 + 4.0 * np.pi ** 2.0 * ks ** 2.0 / 10.0 ** 2.0 )
/ ( 1.0 - np.exp( -5.0 ) ) )
## Fourier basis transformation.
def B_trans(xs, ks):
## n-by-p matrix of transformed covariates.
Kx = np.matmul(xs.reshape( (xs.size, 1) ), ks.reshape( (1, ks.size) ))
## Return the Fourier basis covariates.
return( np.cos( 2.0 * np.pi * Kx ) + 1.0j * np.sin( 2.0 * np.pi * Kx ) )
## Distorted regression function, first case.
def K_reg1(xs):
## Fourier frequences to threshold 2.
freqs = np.arange(-2.0, 3.0)
## Fourier coefficients of psi to threshold 2.
Psi = psi(freqs)
## Fourier basis.
B = B_trans(xs, freqs)
## DFT of reg1.
theta = np.matmul(B.conj().T, reg1(xs).reshape( (xs.size, 1) )) / xs.size
## Distorted Fourier coefficients.
Dk = theta * Psi.reshape( (Psi.size, 1) )
## Return the distorted regression values.
return( np.matmul(B, Dk).real.flatten() )
## Distorted regression function, second case.
def K_reg2(xs):
## Fourier frequencies to threshold 3.
freqs = np.arange(-3.0, 4.0)
## Fourier coefficients of psi to threshold 3.
Psi = psi(freqs)
## Fourier basis.
B = B_trans(xs, freqs)
## DFT of reg2.
theta = np.matmul(B.conj().T, reg2(xs).reshape( (xs.size, 1) )) / xs.size
## Distorted Fourier coefficients.
Dk = theta * Psi.reshape( (Psi.size, 1) )
## Return the distorted regression values.
return( np.matmul(B, Dk).real.flatten() )
## Indirect regression estimator.
def htheta(xs, xdata, ydata, reg_parm):
## Sample size.
nsamp = ydata.size
## Fourier frequencies to threshold reg_parm.
freqs = np.arange(-reg_parm, reg_parm + 1.0)
## Transformed covariates.
B = B_trans(xdata, freqs)
## Transformed predictors.
B_xs = B_trans(xs, freqs)
## Fourier coefficients of psi to threshold reg_parm.
Psi = psi(freqs)
## Diagonal matrix of inverse Fourier coefficients.
Psi_inv = np.diag( Psi.conj() / ( Psi * Psi.conj() ).real )
## Deconvolution operator.
P = np.matmul(B_xs, np.matmul(Psi_inv, B.conj().T)) / nsamp
## Deconvolution regression estimator at xs.
return( np.matmul(P, ydata.reshape( (nsamp, 1) )).real.flatten() )
## Distorted regression estimator.
def hKtheta(xs, xdata, ydata, reg_parm):
## Sample size.
nsamp = ydata.size
## Fourier frequencies to threshold reg_parm.
freqs = np.arange(-reg_parm, reg_parm + 1.0)
## Transformed covariates.
B = B_trans(xdata, freqs)
## Transformed predictors.
B_xs = B_trans(xs, freqs)
## Prediction operator.
P = np.matmul(B_xs, B.conj().T) / nsamp
## Regression estimator at xs.
return( np.matmul(P, ydata.reshape( (nsamp, 1) )).real.flatten() )
## Estimated ISE between htheta_0 and a bootstrap version htheta_boot.
def ISE_htheta_b(reg_parm, es, fits, htheta_0, xdata):
## Sample size
nsamp = es.size
## Bootstrap indices.
ind_boot = np.random.choice(nsamp, nsamp)
## Resampled zero mean residuals.
es_boot = ( es - np.mean(es) )[ind_boot]
## Smooth bootstrap residuals.
es_smboot = ( es_boot + 1.06 * np.std(es) * nsamp ** ( -1.0 / 5.0 )
* norm.rvs(size=nsamp) )
## Smooth bootstrap responses.
ydata_boot = fits + es_smboot
## Bootstrap version of htheta.
htheta_boot = htheta(xdata, xdata, ydata_boot, reg_parm)
## Differences in fitted values.
diffs = htheta_0 - htheta_boot
## Return the estimated ISE.
return( np.dot(diffs, diffs) / nsamp )
## Estimated IMSE between htheta_0 and a bootstrap version.
def IMSE_htheta(reg_parm, es, fits, htheta_0, xdata):
## Estimates of ISE.
ISE_htheta = np.empty(200)
## Compute 200 bootstrap estimates of ISE.
for i in range(200):
ISE_htheta[i] = ISE_htheta_b(reg_parm, es, fits, htheta_0, xdata)
## Return the average of the bootstrap ISE estimates.
return( np.mean(ISE_htheta) )
## Bootstrap regularization parameter selector.
def boot_reg_parm(xdata, ydata, pilot_parm):
## Initial estimate of theta based on pilot parameter.
htheta_0 = htheta(xdata, xdata, ydata, pilot_parm)
## Fitted values.
fits = hKtheta(xdata, xdata, ydata, pilot_parm)
## Residuals.
res = ydata - fits
## Maximum Fourier frequency threshold.
reg_max = 5.0 * np.ceil( ydata.size ** ( 1.0 / 10.0 ) )
## Grid of regularization thresholds.
grid = np.arange(0.0, reg_max + 1.0)
## IMSE estimates.
IMSE = np.empty(grid.size)
## Calculate IMSE for each threshold in the grid.
for i in range(grid.size):
IMSE[i] = IMSE_htheta(grid[i], res, fits, htheta_0, xdata)
## Return the threshold that minimizes IMSE.
return( grid[ np.argmin(IMSE) ] )
## Estimated ISE with respect to true theta_0.
def ISE_htheta(reg_parm, xdata, ydata, theta_0):
## Deconvolution fits.
htheta_fits = htheta(xdata, xdata, ydata, reg_parm)
## Differences in fitted values.
diffs = theta_0 - htheta_fits
## Return the estimated ISE.
return( np.dot(diffs, diffs) / ydata.size )
## L2 regularization parameter selector.
def L2_reg_parm(xdata, ydata, theta_0):
## Maximum Fourier frequency threshold.
reg_max = 5.0 * np.ceil( ydata.size ** ( 1.0 / 10.0 ) )
## Grid of regularization thresholds.
grid = np.arange(0.0, reg_max + 1.0)
## ISE estimates.
ISE = np.empty(grid.size)
## Calculate ISE for each threshold in the grid.
for i in range(grid.size):
ISE[i] = ISE_htheta(grid[i], xdata, ydata, theta_0)
## Return the threshold that minimizes ISE.
return( grid[ np.argmin(ISE) ] )
## Computes Fhat.
def Fhat(us, res):
## Estimates.
est = np.empty(us.size)
## Compute Fhat for each u value in us.
for i in range(us.size):
est[i] = np.mean( res <= us[i] )
## Return the estimates.
return(est)
## Computes the influence function for Fhat in the normal model.
def b_norm(es, u, sig0):
return( 1.0 * ( es <= u ) - norm.cdf(u, scale=sig0)
+ es * norm.pdf(u, scale=sig0) )
## Computes the influence function for Fhat in the t model.
def b_t(es, u, tdf, trscl):
return( 1.0 * ( es <= u ) - t.cdf(u / trscl, df=tdf)
+ es * t.pdf(u / trscl, df=tdf) / trscl )
## Integrand of true inflated MSE in the normal model.
def kern_iMSE_norm(es, u, sig0):
return( b_norm(es, u, sig0) ** 2.0 * norm.pdf(es, scale=sig0) )
## Integrand of true inflated MSE in the t model.
def kern_iMSE_t(es, u, tdf, trscl):
return( b_t(es, u, tdf, trscl) ** 2.0 * t.pdf(es / trscl, df=tdf) / trscl )
## Integrand of true inflated IMSE in the normal model.
def kern_iIMSE_norm(u, sig0):
return( sp.integrate.quad(kern_iMSE_norm, -np.inf, np.inf,
args=(u, sig0))[0] )
## Integrand of true inflated IMSE in the t model.
def kern_iIMSE_t(u, tdf, trscl):
return( sp.integrate.quad(kern_iMSE_t, -np.inf, np.inf,
args=(u, tdf, trscl))[0] )
## Computes ( Fhat - F ) ^ 2 for the normal model.
def diff_sq_norm(u, res, sig0):
return( ( Fhat(np.array([u]), res) - norm.cdf(u, scale=sig0) ) ** 2.0 )
## Computes ( Fhat - F ) ^ 2 for the t model.
def diff_sq_t(u, res, tdf, trscl):
return( ( Fhat(np.array([u]), res) - t.cdf(u / trscl, df=tdf) ) ** 2.0 )
## Simulation.
## Number of simulation runs.
runs = 1000
## Sample sizes 51, 101, 201, 301.
samples = np.array( [25, 50, 100, 150] )
## Normal error standard deviation.
sig0 = 2.0 / 3.0
## Degrees of freedom for the t model.
tdf = 4.0
## Rescaling factor for the t model.
trscl = sig0 * np.sqrt( ( tdf - 2.0 ) / tdf )
## u values to check pointwise inflated MSE.
useq = np.array( [-2.0, -1.0, 0.0, 1.0, 2.0] )
## Bootstrap selected parameters.
boot_parm_norm_reg1 = np.empty( [samples.size, runs] )
boot_parm_norm_reg2 = np.empty( [samples.size, runs] )
boot_parm_t_reg1 = np.empty( [samples.size, runs] )
boot_parm_t_reg2 = np.empty( [samples.size, runs] )
## ISE minimizing parameters.
L2_parm_norm_reg1 = np.empty( [samples.size, runs] )
L2_parm_norm_reg2 = np.empty( [samples.size, runs] )
L2_parm_t_reg1 = np.empty( [samples.size, runs] )
L2_parm_t_reg2 = np.empty( [samples.size, runs] )
## ISE values.
htheta_boot_parm_ISE_norm_reg1 = np.empty(runs)
htheta_L2_parm_ISE_norm_reg1 = np.empty(runs)
htheta_boot_parm_ISE_norm_reg2 = np.empty(runs)
htheta_L2_parm_ISE_norm_reg2 = np.empty(runs)
htheta_boot_parm_ISE_t_reg1 = np.empty(runs)
htheta_L2_parm_ISE_t_reg1 = np.empty(runs)
htheta_boot_parm_ISE_t_reg2 = np.empty(runs)
htheta_L2_parm_ISE_t_reg2 = np.empty(runs)
## IMSE values.
htheta_boot_parm_IMSE_norm_reg1 = np.empty(samples.size)
htheta_L2_parm_IMSE_norm_reg1 = np.empty(samples.size)
htheta_boot_parm_IMSE_norm_reg2 = np.empty(samples.size)
htheta_L2_parm_IMSE_norm_reg2 = np.empty(samples.size)
htheta_boot_parm_IMSE_t_reg1 = np.empty(samples.size)
htheta_L2_parm_IMSE_t_reg1 = np.empty(samples.size)
htheta_boot_parm_IMSE_t_reg2 = np.empty(samples.size)
htheta_L2_parm_IMSE_t_reg2 = np.empty(samples.size)
## Fhat - F values.
Fhat_diff_norm_reg1 = np.empty( [runs, useq.size] )
Fhat_diff_norm_reg2 = np.empty( [runs, useq.size] )
Fhat_diff_t_reg1 = np.empty( [runs, useq.size] )
Fhat_diff_t_reg2 = np.empty( [runs, useq.size] )
## Fhat inflated bias values.
Fhat_ibias_norm_reg1 = np.empty( [samples.size, useq.size] )
Fhat_ibias_norm_reg2 = np.empty( [samples.size, useq.size] )
Fhat_ibias_t_reg1 = np.empty( [samples.size, useq.size] )
Fhat_ibias_t_reg2 = np.empty( [samples.size, useq.size] )
## Fhat inflated variance values.
Fhat_ivar_norm_reg1 = np.empty( [samples.size, useq.size] )
Fhat_ivar_norm_reg2 = np.empty( [samples.size, useq.size] )
Fhat_ivar_t_reg1 = np.empty( [samples.size, useq.size] )
Fhat_ivar_t_reg2 = np.empty( [samples.size, useq.size] )
## Fhat inflated MSE values.
Fhat_iMSE_norm_reg1 = np.empty( [samples.size, useq.size] )
Fhat_iMSE_norm_reg2 = np.empty( [samples.size, useq.size] )
Fhat_iMSE_t_reg1 = np.empty( [samples.size, useq.size] )
Fhat_iMSE_t_reg2 = np.empty( [samples.size, useq.size] )
## Fhat ISE values.
Fhat_ISE_norm_reg1 = np.empty(runs)
Fhat_ISE_norm_reg2 = np.empty(runs)
Fhat_ISE_t_reg1 = np.empty(runs)
Fhat_ISE_t_reg2 = np.empty(runs)
## Fhat inflated IMSE values.
Fhat_iIMSE_norm_reg1 = np.empty(samples.size)
Fhat_iIMSE_norm_reg2 = np.empty(samples.size)
Fhat_iIMSE_t_reg1 = np.empty(samples.size)
Fhat_iIMSE_t_reg2 = np.empty(samples.size)
## Simulation.
for n in samples:
## Sample size.
nsamp = 2 * n + 1
## Get the index for n in samples.
n_ind = np.where( samples == n )[0][0]
## Open a progress bar.
bar = FancyBar(message='Sample size: ' + str(nsamp), max=runs)
for s in range(runs):
## Fixed covariates in [-1 / 2, 1 / 2].
xn = np.arange(-n, n + 1) / ( 2 * n )
## Pilot parameter for reg1.
pilot_parm_reg1 = np.ceil( 1.5 * ( nsamp / np.log(nsamp) )
** ( 1.0 / 11.0 ) )
pilot_parm_reg2 = np.ceil( 2.0 * ( nsamp / np.log(nsamp) )
** ( 1.0 / 11.0 ) )
## Indirect regression values.
reg1_vals = reg1(xn)
reg2_vals = reg2(xn)
## Distorted regression values.
K_reg1_vals = K_reg1(xn)
K_reg2_vals = K_reg2(xn)
## Model responses.
Y_norm_reg1 = K_reg1_vals + norm.rvs(size=nsamp, scale=sig0)
Y_norm_reg2 = K_reg2_vals + norm.rvs(size=nsamp, scale=sig0)
Y_t_reg1 = K_reg1_vals + trscl * t.rvs(size=nsamp, df=tdf)
Y_t_reg2 = K_reg2_vals + trscl * t.rvs(size=nsamp, df=tdf)
## Bootstrap selected parameters.
boot_parm_norm_reg1[n_ind, s] = (
boot_reg_parm(xn, Y_norm_reg1, pilot_parm_reg1) )
boot_parm_norm_reg2[n_ind, s] = (
boot_reg_parm(xn, Y_norm_reg2, pilot_parm_reg2) )
boot_parm_t_reg1[n_ind, s] = (
boot_reg_parm(xn, Y_t_reg1, pilot_parm_reg1) )
boot_parm_t_reg2[n_ind, s] = (
boot_reg_parm(xn, Y_t_reg2, pilot_parm_reg2) )
## L2 selected parameters.
L2_parm_norm_reg1[n_ind, s] = (
L2_reg_parm(xn, Y_norm_reg1, reg1_vals) )
L2_parm_norm_reg2[n_ind, s] = (
L2_reg_parm(xn, Y_norm_reg2, reg2_vals) )
L2_parm_t_reg1[n_ind, s] = (
L2_reg_parm(xn, Y_t_reg1, reg1_vals) )
L2_parm_t_reg2[n_ind, s] = (
L2_reg_parm(xn, Y_t_reg2, reg2_vals) )
## Deconvolution regression fits.
deconv_fits_boot_parm_norm_reg1 = (
htheta(xn, xn, Y_norm_reg1, boot_parm_norm_reg1[n_ind, s]) )
deconv_fits_L2_parm_norm_reg1 = (
htheta(xn, xn, Y_norm_reg1, L2_parm_norm_reg1[n_ind, s]) )
deconv_fits_boot_parm_norm_reg2 = (
htheta(xn, xn, Y_norm_reg2, boot_parm_norm_reg2[n_ind, s]) )
deconv_fits_L2_parm_norm_reg2 = (
htheta(xn, xn, Y_norm_reg2, L2_parm_norm_reg2[n_ind, s]) )
deconv_fits_boot_parm_t_reg1 = (
htheta(xn, xn, Y_t_reg1, boot_parm_t_reg1[n_ind, s]) )
deconv_fits_L2_parm_t_reg1 = (
htheta(xn, xn, Y_t_reg1, L2_parm_t_reg1[n_ind, s]) )
deconv_fits_boot_parm_t_reg2 = (
htheta(xn, xn, Y_t_reg2, boot_parm_t_reg2[n_ind, s]) )
deconv_fits_L2_parm_t_reg2 = (
htheta(xn, xn, Y_t_reg2, L2_parm_t_reg2[n_ind, s]) )
## Differences between indirect regression estimates and values.
diffs_boot_parm_norm_reg1 = (
deconv_fits_boot_parm_norm_reg1 - reg1_vals )
diffs_L2_parm_norm_reg1 = (
deconv_fits_L2_parm_norm_reg1 - reg1_vals )
diffs_boot_parm_norm_reg2 = (
deconv_fits_boot_parm_norm_reg2 - reg2_vals )
diffs_L2_parm_norm_reg2 = (
deconv_fits_L2_parm_norm_reg2 - reg2_vals )
diffs_boot_parm_t_reg1 = (
deconv_fits_boot_parm_t_reg1 - reg1_vals )
diffs_L2_parm_t_reg1 = (
deconv_fits_L2_parm_t_reg1 - reg1_vals )
diffs_boot_parm_t_reg2 = (
deconv_fits_boot_parm_t_reg2 - reg2_vals )
diffs_L2_parm_t_reg2 = (
deconv_fits_L2_parm_t_reg2 - reg2_vals )
## ISE estimates.
htheta_boot_parm_ISE_norm_reg1[s] = (
np.dot(diffs_boot_parm_norm_reg1, diffs_boot_parm_norm_reg1)
/ nsamp )
htheta_L2_parm_ISE_norm_reg1[s] = (
np.dot(diffs_L2_parm_norm_reg1, diffs_L2_parm_norm_reg1) / nsamp )
htheta_boot_parm_ISE_norm_reg2[s] = (
np.dot(diffs_boot_parm_norm_reg2, diffs_boot_parm_norm_reg2)
/ nsamp )
htheta_L2_parm_ISE_norm_reg2[s] = (
np.dot(diffs_L2_parm_norm_reg2, diffs_L2_parm_norm_reg2) / nsamp )
htheta_boot_parm_ISE_t_reg1[s] = (
np.dot(diffs_boot_parm_t_reg1, diffs_boot_parm_t_reg1) / nsamp )
htheta_L2_parm_ISE_t_reg1[s] = (
np.dot(diffs_L2_parm_t_reg1, diffs_L2_parm_t_reg1) / nsamp )
htheta_boot_parm_ISE_t_reg2[s] = (
np.dot(diffs_boot_parm_t_reg2, diffs_boot_parm_t_reg2) / nsamp )
htheta_L2_parm_ISE_t_reg2[s] = (
np.dot(diffs_L2_parm_t_reg2, diffs_L2_parm_t_reg2) / nsamp )
## Fitted values.
fits_norm_reg1 = (
hKtheta(xn, xn, Y_norm_reg1, boot_parm_norm_reg1[n_ind, s]) )
fits_norm_reg2 = (
hKtheta(xn, xn, Y_norm_reg2, boot_parm_norm_reg2[n_ind, s]) )
fits_t_reg1 = hKtheta(xn, xn, Y_t_reg1, boot_parm_t_reg1[n_ind, s])
fits_t_reg2 = hKtheta(xn, xn, Y_t_reg2, boot_parm_t_reg2[n_ind, s])
## Model residuals.
res_norm_reg1 = Y_norm_reg1 - fits_norm_reg1
res_norm_reg2 = Y_norm_reg2 - fits_norm_reg2
res_t_reg1 = Y_t_reg1 - fits_t_reg1
res_t_reg2 = Y_t_reg2 - fits_t_reg2
## Fhat pointwise differences.
Fhat_diff_norm_reg1[s, :] = (
Fhat(useq, res_norm_reg1) - norm.cdf(useq, scale=sig0) )
Fhat_diff_norm_reg2[s, :] = (
Fhat(useq, res_norm_reg2) - norm.cdf(useq, scale=sig0) )
Fhat_diff_t_reg1[s, :] = (
Fhat(useq, res_t_reg1) - t.cdf(useq / trscl, df=tdf) )
Fhat_diff_t_reg2[s, :] = (
Fhat(useq, res_t_reg2) - t.cdf(useq / trscl, df=tdf) )
## Fhat ISE values.
Fhat_ISE_norm_reg1[s] = (
sp.integrate.quad(diff_sq_norm, -np.inf, np.inf,
args=(res_norm_reg1, sig0))[0] )
Fhat_ISE_norm_reg2[s] = (
sp.integrate.quad(diff_sq_norm, -np.inf, np.inf,
args=(res_norm_reg2, sig0))[0] )
Fhat_ISE_t_reg1[s] = (
sp.integrate.quad(diff_sq_t, -np.inf, np.inf,
args=(res_t_reg1, tdf, trscl))[0] )
Fhat_ISE_t_reg2[s] = (
sp.integrate.quad(diff_sq_t, -np.inf, np.inf,
args=(res_t_reg2, tdf, trscl))[0] )
bar.next()
## htheta IMSE values.
htheta_boot_parm_IMSE_norm_reg1[n_ind] = (
np.mean(htheta_boot_parm_ISE_norm_reg1) )
htheta_L2_parm_IMSE_norm_reg1[n_ind] = (
np.mean(htheta_L2_parm_ISE_norm_reg1) )
htheta_boot_parm_IMSE_norm_reg2[n_ind] = (
np.mean(htheta_boot_parm_ISE_norm_reg2) )
htheta_L2_parm_IMSE_norm_reg2[n_ind] = (
np.mean(htheta_L2_parm_ISE_norm_reg2) )
htheta_boot_parm_IMSE_t_reg1[n_ind] = (
np.mean(htheta_boot_parm_ISE_t_reg1) )
htheta_L2_parm_IMSE_t_reg1[n_ind] = (
np.mean(htheta_L2_parm_ISE_t_reg1) )
htheta_boot_parm_IMSE_t_reg2[n_ind] = (
np.mean(htheta_boot_parm_ISE_t_reg2) )
htheta_L2_parm_IMSE_t_reg2[n_ind] = (
np.mean(htheta_L2_parm_ISE_t_reg2) )
## Fhat inflated bias.
Fhat_ibias_norm_reg1[n_ind, :] = (
np.sqrt(nsamp) * np.mean(Fhat_diff_norm_reg1, axis=0) )
Fhat_ibias_norm_reg2[n_ind, :] = (
np.sqrt(nsamp) * np.mean(Fhat_diff_norm_reg2, axis=0) )
Fhat_ibias_t_reg1[n_ind, :] = (
np.sqrt(nsamp) * np.mean(Fhat_diff_t_reg1, axis=0) )
Fhat_ibias_t_reg2[n_ind, :] = (
np.sqrt(nsamp) * np.mean(Fhat_diff_t_reg2, axis=0) )
## Fhat inflated variance.
Fhat_ivar_norm_reg1[n_ind, :] = nsamp * np.var(Fhat_diff_norm_reg1, axis=0)
Fhat_ivar_norm_reg2[n_ind, :] = nsamp * np.var(Fhat_diff_norm_reg2, axis=0)
Fhat_ivar_t_reg1[n_ind, :] = nsamp * np.var(Fhat_diff_t_reg1, axis=0)
Fhat_ivar_t_reg2[n_ind, :] = nsamp * np.var(Fhat_diff_t_reg2, axis=0)
## Fhat inflated MSE.
Fhat_iMSE_norm_reg1[n_ind, :] = (
Fhat_ibias_norm_reg1[n_ind, :] ** 2.0 + Fhat_ivar_norm_reg1[n_ind, :] )
Fhat_iMSE_norm_reg2[n_ind, :] = (
Fhat_ibias_norm_reg2[n_ind, :] ** 2.0 + Fhat_ivar_norm_reg2[n_ind, :] )
Fhat_iMSE_t_reg1[n_ind, :] = (
Fhat_ibias_t_reg1[n_ind, :] ** 2.0 + Fhat_ivar_t_reg1[n_ind, :] )
Fhat_iMSE_t_reg2[n_ind, :] = (
Fhat_ibias_t_reg2[n_ind, :] ** 2.0 + Fhat_ivar_t_reg2[n_ind, :] )
## Fhat inflated IMSE.
Fhat_iIMSE_norm_reg1[n_ind] = nsamp * np.mean(Fhat_ISE_norm_reg1)
Fhat_iIMSE_norm_reg2[n_ind] = nsamp * np.mean(Fhat_ISE_norm_reg2)
Fhat_iIMSE_t_reg1[n_ind] = nsamp * np.mean(Fhat_ISE_t_reg1)
Fhat_iIMSE_t_reg2[n_ind] = nsamp * np.mean(Fhat_ISE_t_reg2)
bar.finish()
## Simulation results.
## htheta IMSE results.
htheta_boot_parm_IMSE_norm_reg1_results = (
np.column_stack( (2 * samples + 1, htheta_boot_parm_IMSE_norm_reg1) ) )
htheta_L2_parm_IMSE_norm_reg1_results = (
np.column_stack( (2 * samples + 1, htheta_L2_parm_IMSE_norm_reg1) ) )
htheta_boot_parm_IMSE_norm_reg2_results = (
np.column_stack( (2 * samples + 1, htheta_boot_parm_IMSE_norm_reg2) ) )
htheta_L2_parm_IMSE_norm_reg2_results = (
np.column_stack( (2 * samples + 1, htheta_L2_parm_IMSE_norm_reg2) ) )
htheta_boot_parm_IMSE_t_reg1_results = (
np.column_stack( (2 * samples + 1, htheta_boot_parm_IMSE_t_reg1) ) )
htheta_L2_parm_IMSE_t_reg1_results = (
np.column_stack( (2 * samples + 1, htheta_L2_parm_IMSE_t_reg1) ) )
htheta_boot_parm_IMSE_t_reg2_results = (
np.column_stack( (2 * samples + 1, htheta_boot_parm_IMSE_t_reg2) ) )
htheta_L2_parm_IMSE_t_reg2_results = (
np.column_stack( (2 * samples + 1, htheta_L2_parm_IMSE_t_reg2) ) )
## True inflated MSE values for Fhat.
iMSE_norm = np.empty(useq.size)
iMSE_t = np.empty(useq.size)
for i in range(useq.size):
iMSE_norm[i] = sp.integrate.quad(kern_iMSE_norm, -np.inf, np.inf,
args=(useq[i], sig0))[0]
iMSE_t[i] = sp.integrate.quad(kern_iMSE_t, -np.inf, np.inf,
args=(useq[i], tdf, trscl))[0]
## True inflated IMSE values for Fhat.
iIMSE_norm = sp.integrate.quad(kern_iIMSE_norm, -np.inf, np.inf,
args=(sig0))[0]
iIMSE_t = sp.integrate.quad(kern_iIMSE_t, -np.inf, np.inf,
args=(tdf, trscl))[0]
## Fhat inflated bias results.
Fhat_ibias_norm_reg1_results = (
np.column_stack( (2 * samples + 1, Fhat_ibias_norm_reg1) ) )
Fhat_ibias_norm_reg2_results = (
np.column_stack( (2 * samples + 1, Fhat_ibias_norm_reg2) ) )
Fhat_ibias_t_reg1_results = (
np.column_stack( (2 * samples + 1, Fhat_ibias_t_reg1) ) )
Fhat_ibias_t_reg2_results = (
np.column_stack( (2 * samples + 1, Fhat_ibias_t_reg2) ) )
## Fhat inflated variance results.
Fhat_ivar_norm_reg1_results = (
np.column_stack( (2 * samples + 1, Fhat_ivar_norm_reg1) ) )
Fhat_ivar_norm_reg2_results = (
np.column_stack( (2 * samples + 1, Fhat_ivar_norm_reg2) ) )
Fhat_ivar_t_reg1_results = (
np.column_stack( (2 * samples + 1, Fhat_ivar_t_reg1) ) )
Fhat_ivar_t_reg2_results = (
np.column_stack( (2 * samples + 1, Fhat_ivar_t_reg2) ) )
## Vector of sample sizes plus infinity.
samp_plus_inf = np.append(samples, np.inf).reshape( (samples.size + 1, 1) )
## Fhat iMSE values with true iMSE values.
iMSE_norm_reg1_vals = np.vstack( (Fhat_iMSE_norm_reg1, iMSE_norm) )
iMSE_norm_reg2_vals = np.vstack( (Fhat_iMSE_norm_reg2, iMSE_norm) )
iMSE_t_reg1_vals = np.vstack( (Fhat_iMSE_t_reg1, iMSE_t) )
iMSE_t_reg2_vals = np.vstack( (Fhat_iMSE_t_reg2, iMSE_t) )
## Fhat inflated MSE results.
Fhat_iMSE_norm_reg1_results = (
np.column_stack( (samp_plus_inf, iMSE_norm_reg1_vals) ) )
Fhat_iMSE_norm_reg2_results = (
np.column_stack( (samp_plus_inf, iMSE_norm_reg2_vals) ) )
Fhat_iMSE_t_reg1_results = (
np.column_stack( (samp_plus_inf, iMSE_t_reg1_vals) ) )
Fhat_iMSE_t_reg2_results = (
np.column_stack( (samp_plus_inf, iMSE_t_reg2_vals) ) )
## Fhat iIMSE values with true iIMSE values.
iIMSE_norm_reg1_vals = np.append(Fhat_iIMSE_norm_reg1, iIMSE_norm)
iIMSE_norm_reg2_vals = np.append(Fhat_iIMSE_norm_reg2, iIMSE_norm)
iIMSE_t_reg1_vals = np.append(Fhat_iIMSE_t_reg1, iIMSE_t)
iIMSE_t_reg2_vals = np.append(Fhat_iIMSE_t_reg2, iIMSE_t)
## Fhat inflated IMSE results.
Fhat_iIMSE_norm_reg1_results = (
np.column_stack( (samp_plus_inf, iIMSE_norm_reg1_vals) ) )
Fhat_iIMSE_norm_reg2_results = (
np.column_stack( (samp_plus_inf, iIMSE_norm_reg2_vals) ) )
Fhat_iIMSE_t_reg1_results = (
np.column_stack( (samp_plus_inf, iIMSE_t_reg1_vals) ) )
Fhat_iIMSE_t_reg2_results = (
np.column_stack( (samp_plus_inf, iIMSE_t_reg2_vals) ) )
## Display simulation summary tables.
print( tabulate(htheta_boot_parm_IMSE_norm_reg1_results) )
print( tabulate(htheta_L2_parm_IMSE_norm_reg1_results) )
print( tabulate(htheta_boot_parm_IMSE_norm_reg2_results) )
print( tabulate(htheta_L2_parm_IMSE_norm_reg2_results) )
print( tabulate(htheta_boot_parm_IMSE_t_reg1_results) )
print( tabulate(htheta_L2_parm_IMSE_t_reg1_results) )
print( tabulate(htheta_boot_parm_IMSE_t_reg2_results) )
print( tabulate(htheta_L2_parm_IMSE_t_reg2_results) )
print( tabulate(Fhat_ibias_norm_reg1_results) )
print( tabulate(Fhat_ibias_norm_reg2_results) )
print( tabulate(Fhat_ibias_t_reg1_results) )
print( tabulate(Fhat_ibias_t_reg2_results) )
print( tabulate(Fhat_ivar_norm_reg1_results) )
print( tabulate(Fhat_ivar_norm_reg2_results) )
print( tabulate(Fhat_ivar_t_reg1_results) )
print( tabulate(Fhat_ivar_t_reg2_results) )
print( tabulate(Fhat_iMSE_norm_reg1_results) )
print( tabulate(Fhat_iMSE_norm_reg2_results) )
print( tabulate(Fhat_iMSE_t_reg1_results) )
print( tabulate(Fhat_iMSE_t_reg2_results) )
print( tabulate(Fhat_iIMSE_norm_reg1_results) )
print( tabulate(Fhat_iIMSE_norm_reg2_results) )
print( tabulate(Fhat_iIMSE_t_reg1_results) )
print( tabulate(Fhat_iIMSE_t_reg2_results) )