[code:ireacr]

Here returns to the code page.

Chown, Bissantz and Dette (2019) propose goodness-of-fit tests for the error distribution in a multivariate indirect regression model.

R code:

###############################################################################

## Simulation to demonstrate martingale transform goodness-of-fit tests for ##

## the error distribution in an indirect regression model. ##

###############################################################################

## Required libraries.

require(parallel)

require(tcltk)

require(VGAM)

require(sn)

## Set the correct working directory.

setwd("~/Documents/Published/ireacr/simulations/")

### Custom functions ###

## Univariate density function based on Fourier coefficients.

g1 = function(x) {

return( 1 - sqrt(2) * cos( 2 * pi * x ) / 4 -

sqrt(2) * cos( 4 * pi * x ) / 8 )

}

## Univariate cumulative distribution function based on Fourier coefficients.

G1 = function(t) {

return( t - sqrt(2) * sin( 2 * pi * t ) / 8 / pi

- sqrt(2) * sin( 4 * pi * t ) / 32 / pi )

}

## A function to generate a random sample of bivariate covariates.

generate_covariates = function(n) {

## Grid of [0, 1] of 1000 points.

tseq = seq(0, 1, length.out = 1000)

## G1 distribution function values at each grid point.

G1_tseq = G1(tseq)

## Generate n random covariates from G1.

X1 = sapply(runif(n), function(U, q_G1, probs_q_G1) {

return( q_G1[ min( which( U <= probs_q_G1 ) ) ] ) },

tseq, G1_tseq)

## Generate another n random covariates from G1.

X2 = sapply(runif(n), function(U, q_G1, probs_q_G1) {

return( q_G1[ min( which( U <= probs_q_G1 ) ) ] ) },

tseq, G1_tseq)

## Return an n-by-2 matrix whose columns are X1 and X2.

return( cbind(X1, X2) )

}

## Distorted regression function for bivariate covariates.

distorted_reg = function(x) {

## Separate covariates x into their parts X1 and X2.

X1 = x[, 1]

X2 = x[, 2]

## Return the distorted regression function values.

return( 5 + cos( 2 * pi * X1 ) + 3 * cos( 2 * pi * X2 ) / 2

+ 3 * cos( 4 * pi * X1 ) / 2 - 2 * cos( 4 * pi * X2 )

- 2 * cos( 2 * pi * ( X1 + X2 ) )

- cos( 2 * pi * ( X1 - X2 ) ) / 2 )

}

## A function to transform covariates xs into Fourier basis covariates.

B_trans = function(xs, K_mat) {

## Inner products between xs and K_mat.

xs_K = xs %*% t(K_mat)

## Return the Fourier basis covariates.

return( cos( 2 * pi * xs_K ) + 1i * sin( 2 * pi * xs_K ) )

}

## A function to compute the distorted regression estimator.

hKtheta = function(xs, xdata, ydata, reg_parm) {

## When reg_parm = 0 simply return the mean of ydata for each xs.

if( reg_parm == 0 ) { return( rep(mean(ydata), length.out=nrow(xs)) ) }

else {

## Sample size.

nsamp = length(ydata)

## Fourier frequencies.

K_temp = expand.grid(-reg_parm : reg_parm, -reg_parm : reg_parm)

K = cbind(K_temp[[1]], K_temp[[2]])

## Transformed covariates.

B = B_trans(xdata, K)

## Gram matrix of transformed covariates.

G = t( Conj(B) ) %*% B

## Vector of inner products between transformed covariates and ydata.

v = t( Conj(B) ) %*% matrix(ydata, nrow=nsamp)

## Estimated Fourier coefficients.

b = qr.solve(G, v)

## Transformed predictors.

B_xs = B_trans(xs, K)

## Return the predicted values.

return( Re( B_xs %*% b ) )

}

## A function to compute the squared prediction error of a leave-one-out

## distorted regression estimate leaving out index di.

LOO_SPE_hKtheta = function(di, xdata, ydata, reg_parm) {

## Subset of xdata excluding row index di.

xdata_di = xdata[-di, ]

## Subset of ydata excluding entry di.

ydata_di = ydata[ -di ]

## Leave-one-out prediction of the excluded ydata[di].

Y_pred = hKtheta(matrix(xdata[di, ], nrow=1), xdata_di, ydata_di, reg_parm)

## Return the squared difference between ydata[di] and Y_pred.

return( ( ydata[di] - Y_pred ) ^ 2 )

}

## A function to compute the mean squared prediction error.

LOOCV_MSPE_hKtheta = function(reg_parm, xdata, ydata) {

## Sample size.

nsamp = length(ydata)

## Compute the leave-one-out squared prediction error for each sample index.

SPE = sapply(1 : nsamp, LOO_SPE_hKtheta, xdata, ydata, reg_parm)

## Return the MSPE value.

return( mean(SPE) )

}

## LOOCV smoothing parameter selector.

LOOCV_MSPE_reg_parm = function(xdata, ydata) {

## Sample size.

nsamp = length(ydata)

## Calculate the upper edge of regularization parameter window.

upper = ceiling( 2.5 * ( nsamp / log(nsamp) ) ^ ( 1 / 8 ) )

## A window of Fourier frequency thresholds.

window = 0 : upper

## Compute the LOOCV estimate of MSPE for each cutoff in the window.

MSPE = sapply(window, LOOCV_MSPE_hKtheta, xdata, ydata)

## Return the threshold that minimizes MSPE.

return( window[ which.min(MSPE) ] )

}

## Incomplete Beta function.

inc_beta = function(x, a, b) {

return( pbeta(x, a, b) * beta(a, b) )

}

## Degrees of freedom in the t-distribution.

tdf = 15

## Rescaling of t errors to have scale of 1.

trscl = sqrt( ( tdf - 2 ) / tdf )

## Constant of proportionality for the t-distribution.

C_nu = 1 / ( sqrt(tdf) * inc_beta(1, 1 / 2, tdf / 2) )

## Components of the vector-valued score function h_0

## (first component is omitted because it is the constant 1).

## Second component.

h0_2 = function(x) {

## Numerator.

numer = x * ( tdf + 1 ) / trscl ^ 2

## Denominator.

denom = tdf + x ^ 2 / trscl ^ 2

## Return the ratio.

return( numer / denom )

}

## Third component.

h0_3 = function(x) {

return( x * h0_2(x) - 1 )

}

## The augmented score vector.

h0 = function(x) {

return( matrix(c(1, h0_2(x), h0_3(x)), ncol=1) )

}

## Required components of the 3-by-3 incomplete information matrix.

## (1,1) element.

G0_11 = function(x) {

return( 1 - pt(x / trscl, df=tdf) )

}

## (2, 2) element.

G0_22 = function(x) {

## Parameters of the incomplete beta function.

par1 = ( tdf + 1 ) / 2 + 1 / 2

par2 = 3 / 2

return( 2 * C_nu / ( trscl ^ 2 * sqrt(tdf) ) * ( ( tdf + 1 ) / 2 ) ^ 2 *

( 2 * ( x < 0 ) * inc_beta(1, par1, par2) +

( 1 * ( x >= 0 ) - 1 * ( x < 0 ) ) *

inc_beta(1 / ( 1 + x ^ 2 / ( trscl ^ 2 * tdf ) ), par1, par2) ) )

}

## (3, 3) element.

G0_33 = function(x) {

## Parameters of the incomplete beta function.

par1 = ( tdf + 1 ) / 2 - 1 / 2

par2 = 5 / 2

return( 2 * C_nu * sqrt(tdf) * ( ( tdf + 1 ) / 2 ) ^ 2 *

( 2 * ( x < 0 ) * inc_beta(1, par1, par2) +

( 1 * ( x >= 0 ) - 1 * ( x < 0 ) ) *

inc_beta(1 / ( 1 + x ^ 2 / ( trscl ^ 2 * tdf ) ), par1, par2) ) +

pt(x / trscl, df=tdf) - 1 - 2 * x * dt(x / trscl, df=tdf) / trscl )

}

## (1, 2) element.

G0_12 = function(x) {

return( dt(x / trscl, df=tdf) / trscl )

}

## (1, 3) element.

G0_13 = function(x) {

return( x * dt(x / trscl, df=tdf) / trscl )

}

## (2, 3) element.

G0_23 = function(x) {

## Parameters of the incomplete beta function.

par1 = ( tdf + 1 ) / 2

par2 = 2

return( 2 * C_nu / trscl * ( ( tdf + 1 ) / 2 ) ^ 2 *

inc_beta(1 / ( 1 + x ^ 2 / ( trscl ^ 2 * tdf ) ), par1, par2) -

dt(x / trscl, df=tdf) / trscl )

}

## The incomplete information matrix (to verify correct calculation of its

## inverse).

GMat = function(x) {

## Diagonal terms.

g0_11 = G0_11(x)

g0_22 = G0_22(x)

g0_33 = G0_33(x)

## Off-diagonal terms.

g0_12_21 = G0_12(x)

g0_13_31 = G0_13(x)

g0_23_32 = G0_23(x)

## Vector of matrix terms.

mat_vec = c(g0_11, g0_12_21, g0_13_31,

g0_12_21, g0_22, g0_23_32,

g0_13_31, g0_23_32, g0_33)

return( matrix(mat_vec, nrow=3, byrow=TRUE) )

}

## The inverse of the incomplete information matrix.

GMat_inv = function(x) {

## GMat diagonal terms.

g_11 = G0_11(x)

g_22 = G0_22(x)

g_33 = G0_33(x)

## GMat off-diagonal terms.

g_12_21 = G0_12(x)

g_13_31 = G0_13(x)

g_23_32 = G0_23(x)

## Determinant.

g_det = g_11 * ( g_22 * g_33 - g_23_32 ^ 2 ) -

g_12_21 * ( g_12_21 * g_33 - g_23_32 * g_13_31 ) +

g_13_31 * ( g_12_21 * g_23_32 - g_22 * g_13_31 )

## GMat inverse diagonal terms.

g_inv_11 = g_22 * g_33 - g_23_32 ^ 2

g_inv_22 = g_11 * g_33 - g_13_31 ^ 2

g_inv_33 = g_11 * g_22 - g_12_21 ^ 2

## GMat inverse off-diagonal terms.

g_inv_12_21 = g_23_32 * g_13_31 - g_12_21 * g_33

g_inv_13_31 = g_12_21 * g_23_32 - g_13_31 * g_22

g_inv_23_32 = g_13_31 * g_12_21 - g_11 * g_23_32

## Vector of matrix terms.

mat_vec = c(g_inv_11, g_inv_12_21, g_inv_13_31,

g_inv_12_21, g_inv_22, g_inv_23_32,

g_inv_13_31, g_inv_23_32, g_inv_33) / g_det

return( matrix(mat_vec, nrow=3, byrow=TRUE) )

}

## The integrand of the K-transform.

I0_x = function(x, es_j) {

## Vector of terms depending on x.

v1 = GMat_inv(x) %*% h0(x) * dt(x / trscl, df=tdf) / trscl

## Return the integrand.

return( crossprod(v1, h0(es_j)) )

}

## A wrapper function to compute I0_x at many x.

I0 = function(xs, es_j) {

return( sapply(xs, I0_x, es_j) )

}

## The K-transform term.

K0 = function(es_j, u) {

## Minimum between u and es_j.

v = min(es_j, u)

## Return the value of the K-transform term.

return( integrate(I0, -Inf, v, es_j, stop.on.error=FALSE)$value )

}

## K-transformed empirical process of standardized residuals.

xi_0 = function(u, es) {

## Compute Fhat at this u value.

hF = mean( es <= u )

## Compute the terms of K0 at this u value.

K0_terms = sapply(es, K0, u)

## Return the absolute value of the K-transformed empirical process.

return( sqrt( length(es) ) * abs( hF - mean(K0_terms) ) )

}

## Martingale transform GOF test for t errors.

MT_t_test = function(stdres) {

## Value of u0.

u0 = max(stdres)

## Grid to compute test statistics.

u_grid = sort(stdres)

## Test statistic values.

TS = parSapply(compcl, u_grid, xi_0, stdres)

## Return the largest test statistic value (standardized).

return( max(TS) / sqrt( mean( stdres <= u0 ) ) )

}

## Simulation runs to perform.

runs = 1000

## Sample sizes.

samples = c(100, 200)

## Asymptotic quantile for an approximate 5% level test.

as_quan = 2.2414

## Storage for test decisions at each sample size.

decision_null = rep(NA, runs)

decision_alt1 = rep(NA, runs)

decision_alt2 = rep(NA, runs)

decision_alt3 = rep(NA, runs)

## Storage for the powers of each test.

power_null = rep(NA, length(samples))

power_alt1 = rep(NA, length(samples))

power_alt2 = rep(NA, length(samples))

power_alt3 = rep(NA, length(samples))

## Auto-detect number of available cores.

ncores = detectCores()

## Initiate cluster on this machine.

compcl = makeCluster(ncores, type="FORK")

## Open a progress bar.

pb = tkProgressBar(title="t errors test simulation", min=0, max=runs)

## Main simulation loop.

for(n in samples) {

## Fill the progress bar with current information.

setTkProgressBar(pb, 0, label=paste0("Sample size: ", n))

## Set initial time difference to zero.

avg_time_diff = 0

## Get the index of n in samples.

n_ind = which( samples == n )

## Sub-simulation loop.

for(s in 1 : runs) {

## Start a computing timer.

init_time = proc.time()[3]

## Generate a random sample of n covariates.

X = matrix(runif( 2 * n ), ncol=2)

## X = generate_covariates(n)

## Responses via model equation.

Y_null = distorted_reg(X) + rt(n, df=tdf)

Y_alt1 = distorted_reg(X) + rlaplace(n, s=0.5)

Y_alt2 = distorted_reg(X) + as.vector( rsn(n, alpha=3) ) -

3 / sqrt(10) * sqrt( 2 / pi )

Y_alt3 = distorted_reg(X) + rnorm(n, sd=0.5)

## Smoothing parameters.

reg_parm_null = LOOCV_MSPE_reg_parm(X, Y_null)

reg_parm_alt1 = LOOCV_MSPE_reg_parm(X, Y_alt1)

reg_parm_alt2 = LOOCV_MSPE_reg_parm(X, Y_alt2)

reg_parm_alt3 = LOOCV_MSPE_reg_parm(X, Y_alt3)

## Fitted values.

fits_null = hKtheta(X, X, Y_null, reg_parm_null)

fits_alt1 = hKtheta(X, X, Y_alt1, reg_parm_alt1)

fits_alt2 = hKtheta(X, X, Y_alt2, reg_parm_alt2)

fits_alt3 = hKtheta(X, X, Y_alt3, reg_parm_alt3)

## Residuals.

res_null = Y_null - fits_null

res_alt1 = Y_alt1 - fits_alt1

res_alt2 = Y_alt2 - fits_alt2

res_alt3 = Y_alt3 - fits_alt3

## Standardized residuals.

stdres_null = res_null / sd(res_null)

stdres_alt1 = res_alt1 / sd(res_alt1)

stdres_alt2 = res_alt2 / sd(res_alt2)

stdres_alt3 = res_alt3 / sd(res_alt3)

## Test statistics T0.

T0_null = MT_t_test(stdres_null)

T0_alt1 = MT_t_test(stdres_alt1)

T0_alt2 = MT_t_test(stdres_alt2)

T0_alt3 = MT_t_test(stdres_alt3)

## Store the test decisions from each model for this run.

decision_null[s] = 1 * ( T0_null > as_quan )

decision_alt1[s] = 1 * ( T0_alt1 > as_quan )

decision_alt2[s] = 1 * ( T0_alt2 > as_quan )

decision_alt3[s] = 1 * ( T0_alt3 > as_quan )

## One-step corrected mean estimate of computing time.

avg_time_diff = ( ( s - 1 ) * avg_time_diff +

proc.time()[3] - init_time ) / s

## Estimated time to finish (in hours).

ETF = round( avg_time_diff * ( runs - s ) / 3600, digits=1)

## Progress bar update message.

pb_msg = paste0("Sample size: ", n, " (", ETF, " hour(s) to go.)")

## Update the progress bar with the time remaining.

setTkProgressBar(pb, s, label=pb_msg)

}

## Simulated powers of each test.

power_null[n_ind] = mean(decision_null, na.rm=TRUE)

power_alt1[n_ind] = mean(decision_alt1, na.rm=TRUE)

power_alt2[n_ind] = mean(decision_alt2, na.rm=TRUE)

power_alt3[n_ind] = mean(decision_alt3, na.rm=TRUE)

## Print the power results for this sample size to the screen.

msg_power = paste0("Power results at sample size: ", n,

"\nNull: ", power_null[n_ind],

"\nAlt1: ", power_alt1[n_ind],

"\nAlt2: ", power_alt2[n_ind],

"\nAlt3: ", power_alt3[n_ind], "\n\n")

cat(msg_power)

}

## Close the progress bar.

close(pb)

## Results.

results = rbind(samples, power_null, power_alt1, power_alt2, power_alt3)

## Save results.

save(results, file="ireacr_t_15df.RData")

## Stop the computing cluster and free up system resources.

stopCluster(compcl)

Python code:

###############################################################################

## A python simulation to demonstrate martingale transform goodness-of-fit ##

## tests for the error distribution in an indirect regression model. ##

###############################################################################

## Required libraries.

import numpy as np

import scipy as sp

import scipy.special

from scipy.stats import uniform

from scipy.stats import t

from scipy.stats import norm

from scipy.stats import laplace

from scipy.stats import skewnorm

from progress.bar import Bar

from tabulate import tabulate

## Progress bar class.

class FancyBar(Bar):

suffix='%(percent)d%% - %(ETF).1fh to go.'

@property

def ETF(self):

return self.eta / 3600.0

## Custom functions.

## Distorted regression function for bivariate covariates.

def distorted_reg(x):

## Separate covariates x into their parts X1 and X2.

X1 = x[:, 0]

X2 = x[:, 1]

## Return the distorted regression function values.

return( 5.0 + np.cos( 2.0 * np.pi * X1 )

+ 3.0 * np.cos( 2.0 * np.pi * X2 ) / 2.0

+ 3.0 * np.cos( 4.0 * np.pi * X1 ) / 2.0

- 2.0 * np.cos( 4.0 * np.pi * X2 )

- 2.0 * np.cos( 2.0 * np.pi * ( X1 + X2 ) )

- np.cos( 2.0 * np.pi * ( X1 - X2 ) ) / 2.0 )

## A function to transform covariates xs into Fourier basis covariates.

def B_trans(xs, K_mat):

## Inner products between xs and K_mat.

xs_K = np.matmul(xs, K_mat.T)

## Return the Fourier basis covariates.

return( np.cos( 2.0 * np.pi * xs_K )

+ 1.0j * np.sin( 2.0 * np.pi * xs_K ) )

## A function to compute the distorted regression estimator.

def hKtheta(xs, xdata, ydata, reg_parm):

## Frequencies.

freqs = np.arange(-reg_parm, reg_parm + 1)

## Fourier frequency matrix.

K = np.array( [(k1, k2) for k1 in freqs for k2 in freqs] )

## Transformed covariates.

B = B_trans(xdata, K)

## Gram matrix of transformed covariates.

G = np.matmul(B.conj().T, B)

## Vector of inner products between transformed covariates and ydata.

v = np.matmul(B.conj().T, ydata.reshape( (ydata.size, 1) ))

## Estimated Fourier coefficients.

b = sp.linalg.solve(G, v, assume_a='pos')

## Transformed predictors.

B_xs = B_trans(xs, K)

## Return the predicted values.

return( np.matmul(B_xs, b).real.flatten() )

## A function to compute the squared prediction error of a leave-one-out

## distorted regression estimate leaving out index di.

def LOO_SPE_hKtheta(di, xdata, ydata, reg_parm):

## Subset of xdata excluding row index di.

xdata_di = np.delete(xdata, di, axis=0)

## Subset of ydata excluding index di.

ydata_di = np.delete(ydata, di)

## Leave-one-out prediction of the excluded ydata[di].

Y_pred = hKtheta(xdata[di, :], xdata_di, ydata_di, reg_parm)

## Return the squared difference between ydata[di] and Y_pred.

return( ( ydata[di] - Y_pred ) ** 2.0 )

## A function to compute the mean squared prediction error.

def LOOCV_MSPE_hKtheta(reg_parm, xdata, ydata):

## Storage for the squared prediction errors.

SPE = np.empty(ydata.size)

## Calculate squared prediction error for each response.

for di in range(ydata.size):

SPE[di] = LOO_SPE_hKtheta(di, xdata, ydata, reg_parm)

## Return the mean squared prediction error value.

return( np.mean(SPE) )

## LOOCV smoothing parameter selector.

def LOOCV_MSPE_reg_parm(xdata, ydata):

## Sample size.

nsamp = ydata.size

## Upper edge of smoothing parameter window.

upper = sp.ceil( 2.5 * ( nsamp / np.log(nsamp) ) ** ( 1.0 / 8.0 ) )

## A window of Fourier frequency thresholds.

window = np.arange(int(upper) + 1)

## Storage for the MSPE values for each frequency threshold.

MSPE = np.empty(window.size)

## For each threshold in the window, calculate the MSPE value.

for i in window:

MSPE[i] = LOOCV_MSPE_hKtheta(i, xdata, ydata)

## Return the threshold that minimizes MSPE.

return( window[ np.argmin(MSPE) ] )

## Degrees of freedom in the t-distribution.

tdf = 15.0

## Rescaling factor.

trscl = np.sqrt( ( tdf - 2.0 ) / tdf )

## Unregularized incomplete beta function.

def inc_beta(x, a, b):

return( sp.special.beta(a, b) * sp.special.betainc(a, b, x) )

## Constant of proportionality for the t-distribution.

C_nu = 1.0 / ( np.sqrt(tdf) * inc_beta(1.0, 1.0 / 2.0, tdf / 2.0) )

## Components of the vector-valued score function h_0

## (first component is omitted because it is the constant 1).

## Second component.

def h0_2(x):

## Numerator.

numer = x * ( tdf + 1.0 )

## Denominator.

denom = tdf * trscl ** 2.0 + x ** 2.0

## Return the ratio.

return( numer / denom )

## Third component.

def h0_3(x):

return( x * h0_2(x) - 1 )

## The augmented score vector.

def h0(x):

return( np.array( [[1], [h0_2(x)], [h0_3(x)]] ) )

## Required components of the 3-by-3 incomplete information matrix.

## (1, 1) element.

def G0_11(x):

return( 1 - t.cdf(x / trscl, tdf) )

## (2, 2) element.

def G0_22(x):

## Parameters of the incomplete beta function.

par1 = ( tdf + 1.0 ) / 2.0 + 1.0 / 2.0

par2 = 3.0 / 2.0

return( 2.0 * C_nu / ( trscl ** 2.0 * np.sqrt(tdf) )

* ( ( tdf + 1.0 ) / 2.0 ) ** 2.0

* ( 2.0 * ( x < 0 ) * inc_beta(1.0, par1, par2)

+ ( 1.0 * ( x >= 0 ) - 1.0 * ( x < 0 ) )

* inc_beta(1.0 / ( 1.0 + x ** 2 / ( trscl ** 2 * tdf ) ),

par1, par2) ) )

## (3, 3) element.

def G0_33(x):

## Parameters of the incomplete beta funcion.

par1 = ( tdf + 1.0 ) / 2.0 - 1.0 / 2.0

par2 = 5.0 / 2.0

return( 2.0 * C_nu * np.sqrt(tdf) * ( ( tdf + 1.0 ) / 2.0 ) ** 2.0

* ( 2.0 * ( x < 0.0 ) * inc_beta(1.0, par1, par2)

+ ( 1.0 * ( x >= 0.0 ) - 1.0 * ( x < 0 ) )

* inc_beta(1.0 / ( 1.0 + x ** 2.0 / ( trscl ** 2.0 * tdf ) ),

par1, par2) )

+ t.cdf(x / trscl, tdf) - 1.0

- 2.0 * x * t.pdf(x / trscl, tdf) / trscl )

## (1, 2) element.

def G0_12(x):

return( t.pdf(x / trscl, tdf) / trscl )

## (1, 3) element.

def G0_13(x):

return( x * t.pdf(x / trscl, tdf) / trscl )

## (2, 3) element.

def G0_23(x):

## Parameters of the incomplete beta function.

par1 = ( tdf + 1.0 ) / 2.0

par2 = 2.0

return( 2.0 * C_nu / trscl * ( ( tdf + 1.0 ) / 2.0 ) ** 2.0

* inc_beta(1.0 / ( 1.0 + x ** 2 / ( trscl ** 2 * tdf ) ),

par1, par2)

- t.pdf(x / trscl, tdf) / trscl )

## The incomplete information matrix (to verify correct calculation of its

## inverse).

def GMat(x):

## Diagonal terms.

g0_11 = G0_11(x)

g0_22 = G0_22(x)

g0_33 = G0_33(x)

## Off-diagonal terms.

g0_12_21 = G0_12(x)

g0_13_31 = G0_13(x)

g0_23_32 = G0_23(x)

## Return the 3-by-3 matrix.

return( np.array( [[g0_11, g0_12_21, g0_13_31],

[g0_12_21, g0_22, g0_23_32],

[g0_13_31, g0_23_32, g0_33]] ) )

## The inverse of the incomplete information matrix.

def GMat_inv(x):

## Diagonal terms.

g0_11 = G0_11(x)

g0_22 = G0_22(x)

g0_33 = G0_33(x)

## Off-diagonal terms.

g0_12_21 = G0_12(x)

g0_13_31 = G0_13(x)

g0_23_32 = G0_23(x)

## Determinant.

g0_det = ( g0_11 * ( g0_22 * g0_22 - g0_23_32 ** 2.0 )

- g0_12_21 * ( g0_12_21 * g0_33 - g0_23_32 * g0_13_31 )

+ g0_13_31 * ( g0_12_21 * g0_23_32 - g0_22 * g0_13_31 ) )

## GMat inverse diagonal terms.

g0_inv_11 = ( g0_22 * g0_22 - g0_23_32 ** 2.0 ) / g0_det

g0_inv_22 = ( g0_11 * g0_33 - g0_13_31 ** 2.0 ) / g0_det

g0_inv_33 = ( g0_11 * g0_22 - g0_12_21 ** 2.0 ) / g0_det

## Gmat inverse off-diagonal terms.

g0_inv_12_21 = ( g0_23_32 * g0_13_31 - g0_12_21 * g0_33 ) / g0_det

g0_inv_13_31 = ( g0_12_21 * g0_23_32 - g0_13_31 * g0_22 ) / g0_det

g0_inv_23_32 = ( g0_13_31 * g0_12_21 - g0_11 * g0_23_32 ) / g0_det

## Return the 3-by-3 matrix.

return( np.array( [[g0_inv_11, g0_inv_12_21, g0_inv_13_31],

[g0_inv_12_21, g0_inv_22, g0_inv_23_32],

[g0_inv_13_31, g0_inv_23_32, g0_inv_33]] ) )

## The integrand of the K-transform.

def I0_x(x, es_j):

## Vector of terms depending on x.

v1 = np.matmul(GMat_inv(x), h0(x)) * t.pdf(x / trscl, tdf) / trscl

## Return the integrand.

return( np.dot(v1.flatten(), h0(es_j).flatten()) )

## The K-transform term.

def K0(u, es_j):

## Minimum between u and es_j.

v = np.min((u, es_j))

## Return the value of the K-transform term.

return( sp.integrate.quad(I0_x, -np.inf, v, args=(es_j))[0] )

## K-transformed empirical process of standardized residuals.

def xi_0(u, es):

## Compute Fhat at this u value.

hF = np.mean( es <= u )

## Storage for the K0 terms.

K0_terms = np.empty(es.size)

## Compute the K0 terms.

for j in range(es.size):

K0_terms[j] = K0(u, es[j])

## Return the absolute value of the K-transformed empirical process.

return( np.sqrt(es.size) * np.abs( hF - np.mean(K0_terms) ) )

## Martingale transform GOF test for t errors.

def MT_t_test(stdres):

## Value of u0.

u0 = np.max(stdres)

## Grid to compute test statistics.

u_grid = np.sort(stdres)

## Storage of test statistic values.

TS = np.empty(stdres.size)

## Calculate the values of the test statistics.

for j in range(stdres.size):

TS[j] = xi_0(u_grid[j], stdres)

## Return the largest test statistic value (standardized).

return( np.max(TS) / np.sqrt( np.mean( stdres <= u0 ) ) )

## Simulation.

## Number of simulation runs.

runs = 1000

## Sample sizes.

samples = np.array( [100, 200] )

## Asymptotic quantile for an approximate 5% level test.

as_quan = 2.2414

## Storage for test decisions at each sample size.

decision_null = np.empty(runs)

decision_alt1 = np.empty(runs)

decision_alt2 = np.empty(runs)

decision_alt3 = np.empty(runs)

## Storage for the powers of each test.

power_null = np.empty(samples.size)

power_alt1 = np.empty(samples.size)

power_alt2 = np.empty(samples.size)

power_alt3 = np.empty(samples.size)

## Main simulation loop.

for n in samples:

## Get the index for n in samples.

n_ind = np.where( samples == n )[0][0]

## Open a progress bar.

bar = FancyBar(message='Sample size: ' + str(n), max=runs)

## Sub-simulation loop.

for s in range(runs):

## Generate a random sample of n covariates.

x1 = uniform.rvs(size=n)

x2 = uniform.rvs(size=n)

X = np.column_stack( (x1, x2) )

## Responses via model equation.

Y_null = distorted_reg(X) + t.rvs(size=n, df=tdf)

Y_alt1 = distorted_reg(X) + laplace.rvs(size=n, scale=0.5)

Y_alt2 = ( distorted_reg(X) + skewnorm.rvs(size=n, a=3.0)

- 3.0 / np.sqrt(10.0) * np.sqrt( 2.0 / np.pi ) )

Y_alt3 = distorted_reg(X) + norm.rvs(size=n, scale=0.5)

## Smoothing parameters.

reg_parm_null = LOOCV_MSPE_reg_parm(X, Y_null)

reg_parm_alt1 = LOOCV_MSPE_reg_parm(X, Y_null)

reg_parm_alt2 = LOOCV_MSPE_reg_parm(X, Y_null)

reg_parm_alt3 = LOOCV_MSPE_reg_parm(X, Y_null)

## Fitted values.

fits_null = hKtheta(X, X, Y_null, reg_parm_null)

fits_alt1 = hKtheta(X, X, Y_alt1, reg_parm_alt1)

fits_alt2 = hKtheta(X, X, Y_alt2, reg_parm_alt2)

fits_alt3 = hKtheta(X, X, Y_alt3, reg_parm_alt3)

## Residuals.

res_null = Y_null - fits_null

res_alt1 = Y_alt1 - fits_alt1

res_alt2 = Y_alt2 - fits_alt2

res_alt3 = Y_alt3 - fits_alt3

## Standardized residuals.

stdres_null = res_null / np.std(res_null)

stdres_alt1 = res_alt1 / np.std(res_alt1)

stdres_alt2 = res_alt2 / np.std(res_alt2)

stdres_alt3 = res_alt3 / np.std(res_alt3)

## Test statistics T0.

T0_null = MT_t_test(stdres_null)

T0_alt1 = MT_t_test(stdres_alt1)

T0_alt2 = MT_t_test(stdres_alt2)

T0_alt3 = MT_t_test(stdres_alt3)

## Store the test decisions from each model for this run.

decision_null[s] = 1 * ( T0_null > as_quan )

decision_alt1[s] = 1 * ( T0_alt1 > as_quan )

decision_alt2[s] = 1 * ( T0_alt2 > as_quan )

decision_alt3[s] = 1 * ( T0_alt3 > as_quan )

## Increment progress bar.

bar.next()

## Simulated powers of each test.

power_null[n_ind] = np.mean(decision_null)

power_alt1[n_ind] = np.mean(decision_alt1)

power_alt2[n_ind] = np.mean(decision_alt2)

power_alt3[n_ind] = np.mean(decision_alt2)

## Close progress bar.

bar.finish()

## Simulation results.

## Power results.

results = np.column_stack( (samples, power_null, power_alt1, power_alt2,

power_alt3) ).T

## Display the simulation summary tables.

print( tabulate(results) )

Google Sites

Report abuse