Here returns to the code page.
Chown, Bissantz and Dette (2019) propose goodness-of-fit tests for the error distribution in a multivariate indirect regression model.
R code:
###############################################################################
## Simulation to demonstrate martingale transform goodness-of-fit tests for ##
## the error distribution in an indirect regression model. ##
###############################################################################
## Required libraries.
require(parallel)
require(tcltk)
require(VGAM)
require(sn)
## Set the correct working directory.
setwd("~/Documents/Published/ireacr/simulations/")
### Custom functions ###
## Univariate density function based on Fourier coefficients.
g1 = function(x) {
return( 1 - sqrt(2) * cos( 2 * pi * x ) / 4 -
sqrt(2) * cos( 4 * pi * x ) / 8 )
}
## Univariate cumulative distribution function based on Fourier coefficients.
G1 = function(t) {
return( t - sqrt(2) * sin( 2 * pi * t ) / 8 / pi
- sqrt(2) * sin( 4 * pi * t ) / 32 / pi )
}
## A function to generate a random sample of bivariate covariates.
generate_covariates = function(n) {
## Grid of [0, 1] of 1000 points.
tseq = seq(0, 1, length.out = 1000)
## G1 distribution function values at each grid point.
G1_tseq = G1(tseq)
## Generate n random covariates from G1.
X1 = sapply(runif(n), function(U, q_G1, probs_q_G1) {
return( q_G1[ min( which( U <= probs_q_G1 ) ) ] ) },
tseq, G1_tseq)
## Generate another n random covariates from G1.
X2 = sapply(runif(n), function(U, q_G1, probs_q_G1) {
return( q_G1[ min( which( U <= probs_q_G1 ) ) ] ) },
tseq, G1_tseq)
## Return an n-by-2 matrix whose columns are X1 and X2.
return( cbind(X1, X2) )
}
## Distorted regression function for bivariate covariates.
distorted_reg = function(x) {
## Separate covariates x into their parts X1 and X2.
X1 = x[, 1]
X2 = x[, 2]
## Return the distorted regression function values.
return( 5 + cos( 2 * pi * X1 ) + 3 * cos( 2 * pi * X2 ) / 2
+ 3 * cos( 4 * pi * X1 ) / 2 - 2 * cos( 4 * pi * X2 )
- 2 * cos( 2 * pi * ( X1 + X2 ) )
- cos( 2 * pi * ( X1 - X2 ) ) / 2 )
}
## A function to transform covariates xs into Fourier basis covariates.
B_trans = function(xs, K_mat) {
## Inner products between xs and K_mat.
xs_K = xs %*% t(K_mat)
## Return the Fourier basis covariates.
return( cos( 2 * pi * xs_K ) + 1i * sin( 2 * pi * xs_K ) )
}
## A function to compute the distorted regression estimator.
hKtheta = function(xs, xdata, ydata, reg_parm) {
## When reg_parm = 0 simply return the mean of ydata for each xs.
if( reg_parm == 0 ) { return( rep(mean(ydata), length.out=nrow(xs)) ) }
else {
## Sample size.
nsamp = length(ydata)
## Fourier frequencies.
K_temp = expand.grid(-reg_parm : reg_parm, -reg_parm : reg_parm)
K = cbind(K_temp[[1]], K_temp[[2]])
## Transformed covariates.
B = B_trans(xdata, K)
## Gram matrix of transformed covariates.
G = t( Conj(B) ) %*% B
## Vector of inner products between transformed covariates and ydata.
v = t( Conj(B) ) %*% matrix(ydata, nrow=nsamp)
## Estimated Fourier coefficients.
b = qr.solve(G, v)
## Transformed predictors.
B_xs = B_trans(xs, K)
## Return the predicted values.
return( Re( B_xs %*% b ) )
}
}
## A function to compute the squared prediction error of a leave-one-out
## distorted regression estimate leaving out index di.
LOO_SPE_hKtheta = function(di, xdata, ydata, reg_parm) {
## Subset of xdata excluding row index di.
xdata_di = xdata[-di, ]
## Subset of ydata excluding entry di.
ydata_di = ydata[ -di ]
## Leave-one-out prediction of the excluded ydata[di].
Y_pred = hKtheta(matrix(xdata[di, ], nrow=1), xdata_di, ydata_di, reg_parm)
## Return the squared difference between ydata[di] and Y_pred.
return( ( ydata[di] - Y_pred ) ^ 2 )
}
## A function to compute the mean squared prediction error.
LOOCV_MSPE_hKtheta = function(reg_parm, xdata, ydata) {
## Sample size.
nsamp = length(ydata)
## Compute the leave-one-out squared prediction error for each sample index.
SPE = sapply(1 : nsamp, LOO_SPE_hKtheta, xdata, ydata, reg_parm)
## Return the MSPE value.
return( mean(SPE) )
}
## LOOCV smoothing parameter selector.
LOOCV_MSPE_reg_parm = function(xdata, ydata) {
## Sample size.
nsamp = length(ydata)
## Calculate the upper edge of regularization parameter window.
upper = ceiling( 2.5 * ( nsamp / log(nsamp) ) ^ ( 1 / 8 ) )
## A window of Fourier frequency thresholds.
window = 0 : upper
## Compute the LOOCV estimate of MSPE for each cutoff in the window.
MSPE = sapply(window, LOOCV_MSPE_hKtheta, xdata, ydata)
## Return the threshold that minimizes MSPE.
return( window[ which.min(MSPE) ] )
}
## Incomplete Beta function.
inc_beta = function(x, a, b) {
return( pbeta(x, a, b) * beta(a, b) )
}
## Degrees of freedom in the t-distribution.
tdf = 15
## Rescaling of t errors to have scale of 1.
trscl = sqrt( ( tdf - 2 ) / tdf )
## Constant of proportionality for the t-distribution.
C_nu = 1 / ( sqrt(tdf) * inc_beta(1, 1 / 2, tdf / 2) )
## Components of the vector-valued score function h_0
## (first component is omitted because it is the constant 1).
## Second component.
h0_2 = function(x) {
## Numerator.
numer = x * ( tdf + 1 ) / trscl ^ 2
## Denominator.
denom = tdf + x ^ 2 / trscl ^ 2
## Return the ratio.
return( numer / denom )
}
## Third component.
h0_3 = function(x) {
return( x * h0_2(x) - 1 )
}
## The augmented score vector.
h0 = function(x) {
return( matrix(c(1, h0_2(x), h0_3(x)), ncol=1) )
}
## Required components of the 3-by-3 incomplete information matrix.
## (1,1) element.
G0_11 = function(x) {
return( 1 - pt(x / trscl, df=tdf) )
}
## (2, 2) element.
G0_22 = function(x) {
## Parameters of the incomplete beta function.
par1 = ( tdf + 1 ) / 2 + 1 / 2
par2 = 3 / 2
return( 2 * C_nu / ( trscl ^ 2 * sqrt(tdf) ) * ( ( tdf + 1 ) / 2 ) ^ 2 *
( 2 * ( x < 0 ) * inc_beta(1, par1, par2) +
( 1 * ( x >= 0 ) - 1 * ( x < 0 ) ) *
inc_beta(1 / ( 1 + x ^ 2 / ( trscl ^ 2 * tdf ) ), par1, par2) ) )
}
## (3, 3) element.
G0_33 = function(x) {
## Parameters of the incomplete beta function.
par1 = ( tdf + 1 ) / 2 - 1 / 2
par2 = 5 / 2
return( 2 * C_nu * sqrt(tdf) * ( ( tdf + 1 ) / 2 ) ^ 2 *
( 2 * ( x < 0 ) * inc_beta(1, par1, par2) +
( 1 * ( x >= 0 ) - 1 * ( x < 0 ) ) *
inc_beta(1 / ( 1 + x ^ 2 / ( trscl ^ 2 * tdf ) ), par1, par2) ) +
pt(x / trscl, df=tdf) - 1 - 2 * x * dt(x / trscl, df=tdf) / trscl )
}
## (1, 2) element.
G0_12 = function(x) {
return( dt(x / trscl, df=tdf) / trscl )
}
## (1, 3) element.
G0_13 = function(x) {
return( x * dt(x / trscl, df=tdf) / trscl )
}
## (2, 3) element.
G0_23 = function(x) {
## Parameters of the incomplete beta function.
par1 = ( tdf + 1 ) / 2
par2 = 2
return( 2 * C_nu / trscl * ( ( tdf + 1 ) / 2 ) ^ 2 *
inc_beta(1 / ( 1 + x ^ 2 / ( trscl ^ 2 * tdf ) ), par1, par2) -
dt(x / trscl, df=tdf) / trscl )
}
## The incomplete information matrix (to verify correct calculation of its
## inverse).
GMat = function(x) {
## Diagonal terms.
g0_11 = G0_11(x)
g0_22 = G0_22(x)
g0_33 = G0_33(x)
## Off-diagonal terms.
g0_12_21 = G0_12(x)
g0_13_31 = G0_13(x)
g0_23_32 = G0_23(x)
## Vector of matrix terms.
mat_vec = c(g0_11, g0_12_21, g0_13_31,
g0_12_21, g0_22, g0_23_32,
g0_13_31, g0_23_32, g0_33)
return( matrix(mat_vec, nrow=3, byrow=TRUE) )
}
## The inverse of the incomplete information matrix.
GMat_inv = function(x) {
## GMat diagonal terms.
g_11 = G0_11(x)
g_22 = G0_22(x)
g_33 = G0_33(x)
## GMat off-diagonal terms.
g_12_21 = G0_12(x)
g_13_31 = G0_13(x)
g_23_32 = G0_23(x)
## Determinant.
g_det = g_11 * ( g_22 * g_33 - g_23_32 ^ 2 ) -
g_12_21 * ( g_12_21 * g_33 - g_23_32 * g_13_31 ) +
g_13_31 * ( g_12_21 * g_23_32 - g_22 * g_13_31 )
## GMat inverse diagonal terms.
g_inv_11 = g_22 * g_33 - g_23_32 ^ 2
g_inv_22 = g_11 * g_33 - g_13_31 ^ 2
g_inv_33 = g_11 * g_22 - g_12_21 ^ 2
## GMat inverse off-diagonal terms.
g_inv_12_21 = g_23_32 * g_13_31 - g_12_21 * g_33
g_inv_13_31 = g_12_21 * g_23_32 - g_13_31 * g_22
g_inv_23_32 = g_13_31 * g_12_21 - g_11 * g_23_32
## Vector of matrix terms.
mat_vec = c(g_inv_11, g_inv_12_21, g_inv_13_31,
g_inv_12_21, g_inv_22, g_inv_23_32,
g_inv_13_31, g_inv_23_32, g_inv_33) / g_det
return( matrix(mat_vec, nrow=3, byrow=TRUE) )
}
## The integrand of the K-transform.
I0_x = function(x, es_j) {
## Vector of terms depending on x.
v1 = GMat_inv(x) %*% h0(x) * dt(x / trscl, df=tdf) / trscl
## Return the integrand.
return( crossprod(v1, h0(es_j)) )
}
## A wrapper function to compute I0_x at many x.
I0 = function(xs, es_j) {
return( sapply(xs, I0_x, es_j) )
}
## The K-transform term.
K0 = function(es_j, u) {
## Minimum between u and es_j.
v = min(es_j, u)
## Return the value of the K-transform term.
return( integrate(I0, -Inf, v, es_j, stop.on.error=FALSE)$value )
}
## K-transformed empirical process of standardized residuals.
xi_0 = function(u, es) {
## Compute Fhat at this u value.
hF = mean( es <= u )
## Compute the terms of K0 at this u value.
K0_terms = sapply(es, K0, u)
## Return the absolute value of the K-transformed empirical process.
return( sqrt( length(es) ) * abs( hF - mean(K0_terms) ) )
}
## Martingale transform GOF test for t errors.
MT_t_test = function(stdres) {
## Value of u0.
u0 = max(stdres)
## Grid to compute test statistics.
u_grid = sort(stdres)
## Test statistic values.
TS = parSapply(compcl, u_grid, xi_0, stdres)
## Return the largest test statistic value (standardized).
return( max(TS) / sqrt( mean( stdres <= u0 ) ) )
}
## Simulation runs to perform.
runs = 1000
## Sample sizes.
samples = c(100, 200)
## Asymptotic quantile for an approximate 5% level test.
as_quan = 2.2414
## Storage for test decisions at each sample size.
decision_null = rep(NA, runs)
decision_alt1 = rep(NA, runs)
decision_alt2 = rep(NA, runs)
decision_alt3 = rep(NA, runs)
## Storage for the powers of each test.
power_null = rep(NA, length(samples))
power_alt1 = rep(NA, length(samples))
power_alt2 = rep(NA, length(samples))
power_alt3 = rep(NA, length(samples))
## Auto-detect number of available cores.
ncores = detectCores()
## Initiate cluster on this machine.
compcl = makeCluster(ncores, type="FORK")
## Open a progress bar.
pb = tkProgressBar(title="t errors test simulation", min=0, max=runs)
## Main simulation loop.
for(n in samples) {
## Fill the progress bar with current information.
setTkProgressBar(pb, 0, label=paste0("Sample size: ", n))
## Set initial time difference to zero.
avg_time_diff = 0
## Get the index of n in samples.
n_ind = which( samples == n )
## Sub-simulation loop.
for(s in 1 : runs) {
## Start a computing timer.
init_time = proc.time()[3]
## Generate a random sample of n covariates.
X = matrix(runif( 2 * n ), ncol=2)
## X = generate_covariates(n)
## Responses via model equation.
Y_null = distorted_reg(X) + rt(n, df=tdf)
Y_alt1 = distorted_reg(X) + rlaplace(n, s=0.5)
Y_alt2 = distorted_reg(X) + as.vector( rsn(n, alpha=3) ) -
3 / sqrt(10) * sqrt( 2 / pi )
Y_alt3 = distorted_reg(X) + rnorm(n, sd=0.5)
## Smoothing parameters.
reg_parm_null = LOOCV_MSPE_reg_parm(X, Y_null)
reg_parm_alt1 = LOOCV_MSPE_reg_parm(X, Y_alt1)
reg_parm_alt2 = LOOCV_MSPE_reg_parm(X, Y_alt2)
reg_parm_alt3 = LOOCV_MSPE_reg_parm(X, Y_alt3)
## Fitted values.
fits_null = hKtheta(X, X, Y_null, reg_parm_null)
fits_alt1 = hKtheta(X, X, Y_alt1, reg_parm_alt1)
fits_alt2 = hKtheta(X, X, Y_alt2, reg_parm_alt2)
fits_alt3 = hKtheta(X, X, Y_alt3, reg_parm_alt3)
## Residuals.
res_null = Y_null - fits_null
res_alt1 = Y_alt1 - fits_alt1
res_alt2 = Y_alt2 - fits_alt2
res_alt3 = Y_alt3 - fits_alt3
## Standardized residuals.
stdres_null = res_null / sd(res_null)
stdres_alt1 = res_alt1 / sd(res_alt1)
stdres_alt2 = res_alt2 / sd(res_alt2)
stdres_alt3 = res_alt3 / sd(res_alt3)
## Test statistics T0.
T0_null = MT_t_test(stdres_null)
T0_alt1 = MT_t_test(stdres_alt1)
T0_alt2 = MT_t_test(stdres_alt2)
T0_alt3 = MT_t_test(stdres_alt3)
## Store the test decisions from each model for this run.
decision_null[s] = 1 * ( T0_null > as_quan )
decision_alt1[s] = 1 * ( T0_alt1 > as_quan )
decision_alt2[s] = 1 * ( T0_alt2 > as_quan )
decision_alt3[s] = 1 * ( T0_alt3 > as_quan )
## One-step corrected mean estimate of computing time.
avg_time_diff = ( ( s - 1 ) * avg_time_diff +
proc.time()[3] - init_time ) / s
## Estimated time to finish (in hours).
ETF = round( avg_time_diff * ( runs - s ) / 3600, digits=1)
## Progress bar update message.
pb_msg = paste0("Sample size: ", n, " (", ETF, " hour(s) to go.)")
## Update the progress bar with the time remaining.
setTkProgressBar(pb, s, label=pb_msg)
}
## Simulated powers of each test.
power_null[n_ind] = mean(decision_null, na.rm=TRUE)
power_alt1[n_ind] = mean(decision_alt1, na.rm=TRUE)
power_alt2[n_ind] = mean(decision_alt2, na.rm=TRUE)
power_alt3[n_ind] = mean(decision_alt3, na.rm=TRUE)
## Print the power results for this sample size to the screen.
msg_power = paste0("Power results at sample size: ", n,
"\nNull: ", power_null[n_ind],
"\nAlt1: ", power_alt1[n_ind],
"\nAlt2: ", power_alt2[n_ind],
"\nAlt3: ", power_alt3[n_ind], "\n\n")
cat(msg_power)
}
## Close the progress bar.
close(pb)
## Results.
results = rbind(samples, power_null, power_alt1, power_alt2, power_alt3)
## Save results.
save(results, file="ireacr_t_15df.RData")
## Stop the computing cluster and free up system resources.
stopCluster(compcl)
Python code:
###############################################################################
## A python simulation to demonstrate martingale transform goodness-of-fit ##
## tests for the error distribution in an indirect regression model. ##
###############################################################################
## Required libraries.
import numpy as np
import scipy as sp
import scipy.special
from scipy.stats import uniform
from scipy.stats import t
from scipy.stats import norm
from scipy.stats import laplace
from scipy.stats import skewnorm
from progress.bar import Bar
from tabulate import tabulate
## Progress bar class.
class FancyBar(Bar):
suffix='%(percent)d%% - %(ETF).1fh to go.'
@property
def ETF(self):
return self.eta / 3600.0
## Custom functions.
## Distorted regression function for bivariate covariates.
def distorted_reg(x):
## Separate covariates x into their parts X1 and X2.
X1 = x[:, 0]
X2 = x[:, 1]
## Return the distorted regression function values.
return( 5.0 + np.cos( 2.0 * np.pi * X1 )
+ 3.0 * np.cos( 2.0 * np.pi * X2 ) / 2.0
+ 3.0 * np.cos( 4.0 * np.pi * X1 ) / 2.0
- 2.0 * np.cos( 4.0 * np.pi * X2 )
- 2.0 * np.cos( 2.0 * np.pi * ( X1 + X2 ) )
- np.cos( 2.0 * np.pi * ( X1 - X2 ) ) / 2.0 )
## A function to transform covariates xs into Fourier basis covariates.
def B_trans(xs, K_mat):
## Inner products between xs and K_mat.
xs_K = np.matmul(xs, K_mat.T)
## Return the Fourier basis covariates.
return( np.cos( 2.0 * np.pi * xs_K )
+ 1.0j * np.sin( 2.0 * np.pi * xs_K ) )
## A function to compute the distorted regression estimator.
def hKtheta(xs, xdata, ydata, reg_parm):
## Frequencies.
freqs = np.arange(-reg_parm, reg_parm + 1)
## Fourier frequency matrix.
K = np.array( [(k1, k2) for k1 in freqs for k2 in freqs] )
## Transformed covariates.
B = B_trans(xdata, K)
## Gram matrix of transformed covariates.
G = np.matmul(B.conj().T, B)
## Vector of inner products between transformed covariates and ydata.
v = np.matmul(B.conj().T, ydata.reshape( (ydata.size, 1) ))
## Estimated Fourier coefficients.
b = sp.linalg.solve(G, v, assume_a='pos')
## Transformed predictors.
B_xs = B_trans(xs, K)
## Return the predicted values.
return( np.matmul(B_xs, b).real.flatten() )
## A function to compute the squared prediction error of a leave-one-out
## distorted regression estimate leaving out index di.
def LOO_SPE_hKtheta(di, xdata, ydata, reg_parm):
## Subset of xdata excluding row index di.
xdata_di = np.delete(xdata, di, axis=0)
## Subset of ydata excluding index di.
ydata_di = np.delete(ydata, di)
## Leave-one-out prediction of the excluded ydata[di].
Y_pred = hKtheta(xdata[di, :], xdata_di, ydata_di, reg_parm)
## Return the squared difference between ydata[di] and Y_pred.
return( ( ydata[di] - Y_pred ) ** 2.0 )
## A function to compute the mean squared prediction error.
def LOOCV_MSPE_hKtheta(reg_parm, xdata, ydata):
## Storage for the squared prediction errors.
SPE = np.empty(ydata.size)
## Calculate squared prediction error for each response.
for di in range(ydata.size):
SPE[di] = LOO_SPE_hKtheta(di, xdata, ydata, reg_parm)
## Return the mean squared prediction error value.
return( np.mean(SPE) )
## LOOCV smoothing parameter selector.
def LOOCV_MSPE_reg_parm(xdata, ydata):
## Sample size.
nsamp = ydata.size
## Upper edge of smoothing parameter window.
upper = sp.ceil( 2.5 * ( nsamp / np.log(nsamp) ) ** ( 1.0 / 8.0 ) )
## A window of Fourier frequency thresholds.
window = np.arange(int(upper) + 1)
## Storage for the MSPE values for each frequency threshold.
MSPE = np.empty(window.size)
## For each threshold in the window, calculate the MSPE value.
for i in window:
MSPE[i] = LOOCV_MSPE_hKtheta(i, xdata, ydata)
## Return the threshold that minimizes MSPE.
return( window[ np.argmin(MSPE) ] )
## Degrees of freedom in the t-distribution.
tdf = 15.0
## Rescaling factor.
trscl = np.sqrt( ( tdf - 2.0 ) / tdf )
## Unregularized incomplete beta function.
def inc_beta(x, a, b):
return( sp.special.beta(a, b) * sp.special.betainc(a, b, x) )
## Constant of proportionality for the t-distribution.
C_nu = 1.0 / ( np.sqrt(tdf) * inc_beta(1.0, 1.0 / 2.0, tdf / 2.0) )
## Components of the vector-valued score function h_0
## (first component is omitted because it is the constant 1).
## Second component.
def h0_2(x):
## Numerator.
numer = x * ( tdf + 1.0 )
## Denominator.
denom = tdf * trscl ** 2.0 + x ** 2.0
## Return the ratio.
return( numer / denom )
## Third component.
def h0_3(x):
return( x * h0_2(x) - 1 )
## The augmented score vector.
def h0(x):
return( np.array( [[1], [h0_2(x)], [h0_3(x)]] ) )
## Required components of the 3-by-3 incomplete information matrix.
## (1, 1) element.
def G0_11(x):
return( 1 - t.cdf(x / trscl, tdf) )
## (2, 2) element.
def G0_22(x):
## Parameters of the incomplete beta function.
par1 = ( tdf + 1.0 ) / 2.0 + 1.0 / 2.0
par2 = 3.0 / 2.0
return( 2.0 * C_nu / ( trscl ** 2.0 * np.sqrt(tdf) )
* ( ( tdf + 1.0 ) / 2.0 ) ** 2.0
* ( 2.0 * ( x < 0 ) * inc_beta(1.0, par1, par2)
+ ( 1.0 * ( x >= 0 ) - 1.0 * ( x < 0 ) )
* inc_beta(1.0 / ( 1.0 + x ** 2 / ( trscl ** 2 * tdf ) ),
par1, par2) ) )
## (3, 3) element.
def G0_33(x):
## Parameters of the incomplete beta funcion.
par1 = ( tdf + 1.0 ) / 2.0 - 1.0 / 2.0
par2 = 5.0 / 2.0
return( 2.0 * C_nu * np.sqrt(tdf) * ( ( tdf + 1.0 ) / 2.0 ) ** 2.0
* ( 2.0 * ( x < 0.0 ) * inc_beta(1.0, par1, par2)
+ ( 1.0 * ( x >= 0.0 ) - 1.0 * ( x < 0 ) )
* inc_beta(1.0 / ( 1.0 + x ** 2.0 / ( trscl ** 2.0 * tdf ) ),
par1, par2) )
+ t.cdf(x / trscl, tdf) - 1.0
- 2.0 * x * t.pdf(x / trscl, tdf) / trscl )
## (1, 2) element.
def G0_12(x):
return( t.pdf(x / trscl, tdf) / trscl )
## (1, 3) element.
def G0_13(x):
return( x * t.pdf(x / trscl, tdf) / trscl )
## (2, 3) element.
def G0_23(x):
## Parameters of the incomplete beta function.
par1 = ( tdf + 1.0 ) / 2.0
par2 = 2.0
return( 2.0 * C_nu / trscl * ( ( tdf + 1.0 ) / 2.0 ) ** 2.0
* inc_beta(1.0 / ( 1.0 + x ** 2 / ( trscl ** 2 * tdf ) ),
par1, par2)
- t.pdf(x / trscl, tdf) / trscl )
## The incomplete information matrix (to verify correct calculation of its
## inverse).
def GMat(x):
## Diagonal terms.
g0_11 = G0_11(x)
g0_22 = G0_22(x)
g0_33 = G0_33(x)
## Off-diagonal terms.
g0_12_21 = G0_12(x)
g0_13_31 = G0_13(x)
g0_23_32 = G0_23(x)
## Return the 3-by-3 matrix.
return( np.array( [[g0_11, g0_12_21, g0_13_31],
[g0_12_21, g0_22, g0_23_32],
[g0_13_31, g0_23_32, g0_33]] ) )
## The inverse of the incomplete information matrix.
def GMat_inv(x):
## Diagonal terms.
g0_11 = G0_11(x)
g0_22 = G0_22(x)
g0_33 = G0_33(x)
## Off-diagonal terms.
g0_12_21 = G0_12(x)
g0_13_31 = G0_13(x)
g0_23_32 = G0_23(x)
## Determinant.
g0_det = ( g0_11 * ( g0_22 * g0_22 - g0_23_32 ** 2.0 )
- g0_12_21 * ( g0_12_21 * g0_33 - g0_23_32 * g0_13_31 )
+ g0_13_31 * ( g0_12_21 * g0_23_32 - g0_22 * g0_13_31 ) )
## GMat inverse diagonal terms.
g0_inv_11 = ( g0_22 * g0_22 - g0_23_32 ** 2.0 ) / g0_det
g0_inv_22 = ( g0_11 * g0_33 - g0_13_31 ** 2.0 ) / g0_det
g0_inv_33 = ( g0_11 * g0_22 - g0_12_21 ** 2.0 ) / g0_det
## Gmat inverse off-diagonal terms.
g0_inv_12_21 = ( g0_23_32 * g0_13_31 - g0_12_21 * g0_33 ) / g0_det
g0_inv_13_31 = ( g0_12_21 * g0_23_32 - g0_13_31 * g0_22 ) / g0_det
g0_inv_23_32 = ( g0_13_31 * g0_12_21 - g0_11 * g0_23_32 ) / g0_det
## Return the 3-by-3 matrix.
return( np.array( [[g0_inv_11, g0_inv_12_21, g0_inv_13_31],
[g0_inv_12_21, g0_inv_22, g0_inv_23_32],
[g0_inv_13_31, g0_inv_23_32, g0_inv_33]] ) )
## The integrand of the K-transform.
def I0_x(x, es_j):
## Vector of terms depending on x.
v1 = np.matmul(GMat_inv(x), h0(x)) * t.pdf(x / trscl, tdf) / trscl
## Return the integrand.
return( np.dot(v1.flatten(), h0(es_j).flatten()) )
## The K-transform term.
def K0(u, es_j):
## Minimum between u and es_j.
v = np.min((u, es_j))
## Return the value of the K-transform term.
return( sp.integrate.quad(I0_x, -np.inf, v, args=(es_j))[0] )
## K-transformed empirical process of standardized residuals.
def xi_0(u, es):
## Compute Fhat at this u value.
hF = np.mean( es <= u )
## Storage for the K0 terms.
K0_terms = np.empty(es.size)
## Compute the K0 terms.
for j in range(es.size):
K0_terms[j] = K0(u, es[j])
## Return the absolute value of the K-transformed empirical process.
return( np.sqrt(es.size) * np.abs( hF - np.mean(K0_terms) ) )
## Martingale transform GOF test for t errors.
def MT_t_test(stdres):
## Value of u0.
u0 = np.max(stdres)
## Grid to compute test statistics.
u_grid = np.sort(stdres)
## Storage of test statistic values.
TS = np.empty(stdres.size)
## Calculate the values of the test statistics.
for j in range(stdres.size):
TS[j] = xi_0(u_grid[j], stdres)
## Return the largest test statistic value (standardized).
return( np.max(TS) / np.sqrt( np.mean( stdres <= u0 ) ) )
## Simulation.
## Number of simulation runs.
runs = 1000
## Sample sizes.
samples = np.array( [100, 200] )
## Asymptotic quantile for an approximate 5% level test.
as_quan = 2.2414
## Storage for test decisions at each sample size.
decision_null = np.empty(runs)
decision_alt1 = np.empty(runs)
decision_alt2 = np.empty(runs)
decision_alt3 = np.empty(runs)
## Storage for the powers of each test.
power_null = np.empty(samples.size)
power_alt1 = np.empty(samples.size)
power_alt2 = np.empty(samples.size)
power_alt3 = np.empty(samples.size)
## Main simulation loop.
for n in samples:
## Get the index for n in samples.
n_ind = np.where( samples == n )[0][0]
## Open a progress bar.
bar = FancyBar(message='Sample size: ' + str(n), max=runs)
## Sub-simulation loop.
for s in range(runs):
## Generate a random sample of n covariates.
x1 = uniform.rvs(size=n)
x2 = uniform.rvs(size=n)
X = np.column_stack( (x1, x2) )
## Responses via model equation.
Y_null = distorted_reg(X) + t.rvs(size=n, df=tdf)
Y_alt1 = distorted_reg(X) + laplace.rvs(size=n, scale=0.5)
Y_alt2 = ( distorted_reg(X) + skewnorm.rvs(size=n, a=3.0)
- 3.0 / np.sqrt(10.0) * np.sqrt( 2.0 / np.pi ) )
Y_alt3 = distorted_reg(X) + norm.rvs(size=n, scale=0.5)
## Smoothing parameters.
reg_parm_null = LOOCV_MSPE_reg_parm(X, Y_null)
reg_parm_alt1 = LOOCV_MSPE_reg_parm(X, Y_null)
reg_parm_alt2 = LOOCV_MSPE_reg_parm(X, Y_null)
reg_parm_alt3 = LOOCV_MSPE_reg_parm(X, Y_null)
## Fitted values.
fits_null = hKtheta(X, X, Y_null, reg_parm_null)
fits_alt1 = hKtheta(X, X, Y_alt1, reg_parm_alt1)
fits_alt2 = hKtheta(X, X, Y_alt2, reg_parm_alt2)
fits_alt3 = hKtheta(X, X, Y_alt3, reg_parm_alt3)
## Residuals.
res_null = Y_null - fits_null
res_alt1 = Y_alt1 - fits_alt1
res_alt2 = Y_alt2 - fits_alt2
res_alt3 = Y_alt3 - fits_alt3
## Standardized residuals.
stdres_null = res_null / np.std(res_null)
stdres_alt1 = res_alt1 / np.std(res_alt1)
stdres_alt2 = res_alt2 / np.std(res_alt2)
stdres_alt3 = res_alt3 / np.std(res_alt3)
## Test statistics T0.
T0_null = MT_t_test(stdres_null)
T0_alt1 = MT_t_test(stdres_alt1)
T0_alt2 = MT_t_test(stdres_alt2)
T0_alt3 = MT_t_test(stdres_alt3)
## Store the test decisions from each model for this run.
decision_null[s] = 1 * ( T0_null > as_quan )
decision_alt1[s] = 1 * ( T0_alt1 > as_quan )
decision_alt2[s] = 1 * ( T0_alt2 > as_quan )
decision_alt3[s] = 1 * ( T0_alt3 > as_quan )
## Increment progress bar.
bar.next()
## Simulated powers of each test.
power_null[n_ind] = np.mean(decision_null)
power_alt1[n_ind] = np.mean(decision_alt1)
power_alt2[n_ind] = np.mean(decision_alt2)
power_alt3[n_ind] = np.mean(decision_alt2)
## Close progress bar.
bar.finish()
## Simulation results.
## Power results.
results = np.column_stack( (samples, power_null, power_alt1, power_alt2,
power_alt3) ).T
## Display the simulation summary tables.
print( tabulate(results) )