KaufmanEtAl_Fig3_2005.R
## load gss
library(gss)
## For reproducibility set the seed and type of the RNG
set.seed(20061001,"Mersenne-Twister")
## Define the theoretical Poisson intensity
lambdaKaufman <- function(theta)
50 + 25 * sin(theta-1.2*pi)+75*exp(-10*(15*(theta-pi)/2/pi)^2)
## create a vector with 100 elements evenly spaced between 0 and 2pi
THETA <- (0:99)*2*pi/100
## compute the theoretical intensity at each of the 100 positions
lambdaTRUE <- lambdaKaufman(THETA)
## get the square root of the theoretical intensity
lambdaTRUE.sqrt <- sqrt(lambdaTRUE)
## Define function mkfit which given data (counts at the 100 locations of
## vector THETA) returns the predicted intensity using function gssanova
## of gss. The parameter alpha can also be specified
mkfit <- function(data,alpha=1) {
myfit <- gssanova(data ~ THETA,
family="poisson",
list(type="linear.per",range=c(0,2*pi)),
nbasis=100,alpha=alpha)
exp(predict(myfit,newdata=data.frame(THETA=THETA)))
}
## To make computation quicker use the fact that as soon as the
## expected count of a Poisson distribution is 20 to 25 (or more)
## the propbability of observing n events is very closely approximated
## by a normal distribution with mean and variance equal to the Poisson
## expected value. Use moroever the fact that a sqaure root transform
## will satbilize the variance and make it eaqual to 0.5^2. Then use
## a "normal" smoothing spline (ssanova0) with a fixed variance.
## Fct mkfit0 returns the expected value as well as its SD on the
## square root scale
mkfit0 <- function(data) {
myfit <- ssanova0(sqrt(data) ~ THETA,
list(type="cubic.per",range=c(0,2*pi)),
method="u",
varht=0.5^2
)
predict(myfit,newdata=data.frame(THETA=THETA),se=TRUE)
}
## function MISE retur, the mean integrated squared error
MISE <- function(mypred) sum((mypred-lambdaTRUE)^2)/length(THETA)/2/pi
## the number of replicate to simulate
nbRep <- 5000
## The following run takes 370 s on my laptop PC (dual core 2 GHz)
## everything being compiled with gcc 3.4
result <- sapply(1:nbRep,
function(idx,alpha=1) {
## Generate the sample
mydata <- rpois(length(THETA),lambdaTRUE)
## get the perdiction using one of the 2 fit functions
## mypred <- mkfit(mydata,alpha)
mypred <- mkfit0(mydata)
## get the mise
mise <- MISE(mypred$fit^2)
## save the best and worst case (from the mise view point)
## and the corresponding data
if (idx==1) {
best <<- mypred
best.mise <<- mise
best.data <<- mydata
worst <<- mypred
worst.mise <<- mise
worst.data <<- mydata
} else {
if (mise < best.mise) {
best <<- mypred
best.mise <<- mise
best.data <<- mydata
}
if (mise > worst.mise) {
worst <<- mypred
worst.mise <<- mise
worst.data <<- mydata
}
}
## Get the number of locations (among the 100 used) at which the
## true intensity is out of the nominal 95% CI
outBound <- sum(lambdaTRUE.sqrt < mypred$fit - 1.96*mypred$se.fit |
lambdaTRUE.sqrt > mypred$fit + 1.96*mypred$se.fit)
## print out the result
cat(paste("rep: ",idx,", MISE: ",mise,", out: ",outBound,"\n",sep=""))
## return mise and out count values
c(mise,outBound)
}
)
## Get the lower bound (95% nominal CI) in the worst case
worst.L <- (worst$fit-1.96*worst$se.fit)^2
## Get the upper bound (95% nominal CI) in the worst case
worst.U <- (worst$fit+1.96*worst$se.fit)^2
## Get the lower bound (95% nominal CI) in the best case
best.L <- (best$fit-1.96*best$se.fit)^2
## Get the upper bound (95% nominal CI) in the best case
best.U <- (best$fit+1.96*best$se.fit)^2
## set the limits of the ordinate of the graph
## and graph best and worst cases with corresponding
## data and target (ie, true) intensity.
gmin <- 0
gmax <- max(c(worst.U,best.U,best.data,worst.data))
X11()
layout(matrix(1:4,nrow=2,byrow=TRUE))
## For the worst case, draw the data as circles,
## the 95% accross the function coverage band and
## the actual intensity
plot(THETA,worst.data,
type="n",
ylim=c(gmin,gmax),
xlab=expression(theta),
ylab="Counts",
main=paste("Worst case out of",nbRep)
)
polygon(c(THETA,rev(THETA)),c(worst.L,rev(worst.U)),col="grey30",lty=0)
lines(THETA,lambdaTRUE,col=2,lwd=2)
points(THETA,worst.data)
## For the best case, draw the data as circles,
## the 95% accross the function coverage band and
## the actual intensity
plot(THETA,best.data,
type="n",
ylim=c(gmin,gmax),
xlab=expression(theta),
ylab="Counts",
main=paste("Best case out of",nbRep)
)
polygon(c(THETA,rev(THETA)),c(best.L,rev(best.U)),col="grey30",lty=0)
lines(THETA,lambdaTRUE,col=2,lwd=2)
points(THETA,best.data)
## plot a smoothing spline estimate of the density of
## the MISE.
myMISE <- result[1,]
mise.fit <- ssden(~myMISE)
plot(xx <- seq(min(myMISE),max(myMISE),len=100),
dssden(mise.fit,xx),
type="l",
xlab="Mean integrated squared error",
ylab="Estimated density",
main="Empirical MISE density")
## Add a vertical dashed line with the mean value
abline(v=mean(result[1,]),lty=2)
## plot the empirical probability mass function of
## the number of points of the actual intensity (out of 100)
## which are out of the nominal accross the function 95% coverage
## band
mOUT <- mean(result[2,])
seOUT <- sd(result[2,])/sqrt(1000)
plot(table(result[2,]),
xlab="# of points out (among 100)",
ylab="frequency",
main="Empirical Coverage Probability")
## Add a red band with estimated coverage probability*100
polygon(mOUT+1.96*seOUT*c(-1,1,1,-1),
max(table(result[2,]))*c(0,0,1,1),
col=2,lty=0)
lines(c(5,5),c(0,table(result[2,])[5]),lwd=2)