KaufmanEtAl_Fig3_2005.R

## load gss

library(gss)

## For reproducibility set the seed and type of the RNG

set.seed(20061001,"Mersenne-Twister")

## Define the theoretical Poisson intensity

lambdaKaufman <- function(theta)

50 + 25 * sin(theta-1.2*pi)+75*exp(-10*(15*(theta-pi)/2/pi)^2)

## create a vector with 100 elements evenly spaced between 0 and 2pi

THETA <- (0:99)*2*pi/100

## compute the theoretical intensity at each of the 100 positions

lambdaTRUE <- lambdaKaufman(THETA)

## get the square root of the theoretical intensity

lambdaTRUE.sqrt <- sqrt(lambdaTRUE)

## Define function mkfit which given data (counts at the 100 locations of

## vector THETA) returns the predicted intensity using function gssanova

## of gss. The parameter alpha can also be specified

mkfit <- function(data,alpha=1) {

myfit <- gssanova(data ~ THETA,

family="poisson",

list(type="linear.per",range=c(0,2*pi)),

nbasis=100,alpha=alpha)

exp(predict(myfit,newdata=data.frame(THETA=THETA)))

}

## To make computation quicker use the fact that as soon as the

## expected count of a Poisson distribution is 20 to 25 (or more)

## the propbability of observing n events is very closely approximated

## by a normal distribution with mean and variance equal to the Poisson

## expected value. Use moroever the fact that a sqaure root transform

## will satbilize the variance and make it eaqual to 0.5^2. Then use

## a "normal" smoothing spline (ssanova0) with a fixed variance.

## Fct mkfit0 returns the expected value as well as its SD on the

## square root scale

mkfit0 <- function(data) {

myfit <- ssanova0(sqrt(data) ~ THETA,

list(type="cubic.per",range=c(0,2*pi)),

method="u",

varht=0.5^2

)

predict(myfit,newdata=data.frame(THETA=THETA),se=TRUE)

}

## function MISE retur, the mean integrated squared error

MISE <- function(mypred) sum((mypred-lambdaTRUE)^2)/length(THETA)/2/pi

## the number of replicate to simulate

nbRep <- 5000

## The following run takes 370 s on my laptop PC (dual core 2 GHz)

## everything being compiled with gcc 3.4

result <- sapply(1:nbRep,

function(idx,alpha=1) {

## Generate the sample

mydata <- rpois(length(THETA),lambdaTRUE)

## get the perdiction using one of the 2 fit functions

## mypred <- mkfit(mydata,alpha)

mypred <- mkfit0(mydata)

## get the mise

mise <- MISE(mypred$fit^2)

## save the best and worst case (from the mise view point)

## and the corresponding data

if (idx==1) {

best <<- mypred

best.mise <<- mise

best.data <<- mydata

worst <<- mypred

worst.mise <<- mise

worst.data <<- mydata

} else {

if (mise < best.mise) {

best <<- mypred

best.mise <<- mise

best.data <<- mydata

}

if (mise > worst.mise) {

worst <<- mypred

worst.mise <<- mise

worst.data <<- mydata

}

}

## Get the number of locations (among the 100 used) at which the

## true intensity is out of the nominal 95% CI

outBound <- sum(lambdaTRUE.sqrt < mypred$fit - 1.96*mypred$se.fit |

lambdaTRUE.sqrt > mypred$fit + 1.96*mypred$se.fit)

## print out the result

cat(paste("rep: ",idx,", MISE: ",mise,", out: ",outBound,"\n",sep=""))

## return mise and out count values

c(mise,outBound)

}

)

## Get the lower bound (95% nominal CI) in the worst case

worst.L <- (worst$fit-1.96*worst$se.fit)^2

## Get the upper bound (95% nominal CI) in the worst case

worst.U <- (worst$fit+1.96*worst$se.fit)^2

## Get the lower bound (95% nominal CI) in the best case

best.L <- (best$fit-1.96*best$se.fit)^2

## Get the upper bound (95% nominal CI) in the best case

best.U <- (best$fit+1.96*best$se.fit)^2

## set the limits of the ordinate of the graph

## and graph best and worst cases with corresponding

## data and target (ie, true) intensity.

gmin <- 0

gmax <- max(c(worst.U,best.U,best.data,worst.data))

X11()

layout(matrix(1:4,nrow=2,byrow=TRUE))

## For the worst case, draw the data as circles,

## the 95% accross the function coverage band and

## the actual intensity

plot(THETA,worst.data,

type="n",

ylim=c(gmin,gmax),

xlab=expression(theta),

ylab="Counts",

main=paste("Worst case out of",nbRep)

)

polygon(c(THETA,rev(THETA)),c(worst.L,rev(worst.U)),col="grey30",lty=0)

lines(THETA,lambdaTRUE,col=2,lwd=2)

points(THETA,worst.data)

## For the best case, draw the data as circles,

## the 95% accross the function coverage band and

## the actual intensity

plot(THETA,best.data,

type="n",

ylim=c(gmin,gmax),

xlab=expression(theta),

ylab="Counts",

main=paste("Best case out of",nbRep)

)

polygon(c(THETA,rev(THETA)),c(best.L,rev(best.U)),col="grey30",lty=0)

lines(THETA,lambdaTRUE,col=2,lwd=2)

points(THETA,best.data)

## plot a smoothing spline estimate of the density of

## the MISE.

myMISE <- result[1,]

mise.fit <- ssden(~myMISE)

plot(xx <- seq(min(myMISE),max(myMISE),len=100),

dssden(mise.fit,xx),

type="l",

xlab="Mean integrated squared error",

ylab="Estimated density",

main="Empirical MISE density")

## Add a vertical dashed line with the mean value

abline(v=mean(result[1,]),lty=2)

## plot the empirical probability mass function of

## the number of points of the actual intensity (out of 100)

## which are out of the nominal accross the function 95% coverage

## band

mOUT <- mean(result[2,])

seOUT <- sd(result[2,])/sqrt(1000)

plot(table(result[2,]),

xlab="# of points out (among 100)",

ylab="frequency",

main="Empirical Coverage Probability")

## Add a red band with estimated coverage probability*100

polygon(mOUT+1.96*seOUT*c(-1,1,1,-1),

max(table(result[2,]))*c(0,0,1,1),

col=2,lty=0)

lines(c(5,5),c(0,table(result[2,])[5]),lwd=2)