Binomial distribution, known N

The following is an R script that exercises and checks the Quick Bayes c-box for the discrete binomial distribution with known number of trials. It demonstrates that the c-box for the binomial rate generalizes the notion of a confidence distribution such that it straddles the uniform distribution in a Singh plot, and that one can compute with confidence boxes in conjunctions, disjunctions and Boolean expressions and obtain results that also have the confidence interpretation. This script was created as a part of our quality assurance efforts for the Quick Bayes software. You can run the script and see its output by sequentially pasting its sections (which are delimited by rows of pound signs #) into the R console. Alternatively, you can open the Word file "binomialN where cboxes work.docx" attached to the bottom of this page to see the output with some explanatory annotations.

# As always, you should be able to copy the content between the rows of pound signs

# and paste them into R to replicate simulations and see the graphical results, or

# you can turn on History/Recording for the plot window, then paste the entire

# contents of this file into R and use PageUp and PageDown to see the output. The

# results depend on stochastic simulations, so it is useful to run them a few times

# to observe the range of variation in the outputs.

############################################################################################ red

# BINOMIAL RATE

# demonstrate how Balch's confidence box for the binomial rate generalizes a confidence distribution (CD)

many = 5000 # increase for more accuracy

some = 10000 # increase for more accuracy

n = 8

p = 0.2

N = 20

cd <- function(x,N) {n <- length(x); k <- sum(x); return(env(beta(k, n*N-k+1), beta(k+1, n*N-k)))}

beta <- function(v,w) if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)

env <- function(x,y) if (all(x==y)) return(x) else return(c(x,y))

u = w = array(0, some)

for (i in 1:some) {

x = rbinom(n,N,p) # sample data

d = cd(x,N)

u[i] = sum((d < p)[1:many]) / many

w[i] = sum((d < p)[(many+1):(2*many)]) / many

}

edf = function(x,...) lines(sort(x), 1:length(x)/length(x),type = 's',...)

plot(NULL,xlim=c(0,1),ylim=c(0,1))

lines(c(0,1),c(0,1),col='gray',lwd=4)

edf(u)

edf(w,col='red')

############################################################################################ green

# BINOMIAL RATE C-BOX FOR VARIOUS TRUE RATE VALUES

many = 2000 # increase for more accuracy

some = 10000 # increase for more accuracy

edf = function(x,...) lines(sort(x), 1:length(x)/length(x),type = 's',...)

cd <- function(x,N) {n <- length(x); k <- sum(x); return(env(beta(k, n*N-k+1), beta(k+1, n*N-k)))}

beta <- function(v,w) if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)

env <- function(x,y) if (all(x==y)) return(x) else return(c(x,y))

par(mfrow=c(3,4))

N = 20

n = 8

mus = c(1e-12, 1e-5, 1e-3, 0.05, 0.1, 0.3, 0.7, 0.9, 0.95, 0.999, 0.99999, 1-1e-12)

m = length(mus)

for (k in 1:m) {

mu = mus[[k]]

u = w = array(0, some)

for (i in 1:some) {

x = rbinom(n,N,mu) # sample data

d = cd(x,N)

u[i] = sum((d < mu)[1:many]) / many

w[i] = sum((d < mu)[(many+1):(2*many)]) / many

}

plot(c(0,1),c(0,1),col='red',type='l', lwd=2)

edf(u)

edf(w,col='blue')

title(paste('True p =',mu))

}

############################################################################################ blue

# CONJUNCTION UNDER INDEPENDENCE, and MULTIPLICATION

f = AND = TIMES = function(x,y) {

nx = length(x)

ny = length(y)

if ((nx==1) && (ny==1)) return(x*y)

c(x[1:many]*y[1:many], x[(many+1):nx]*y[(many+1):ny])

}

many = 2000 # increase for more accuracy

some = 1000 # increase for more accuracy

# x[i] ~ binomial(N, p), for known N, x[i] is a nonnegative integer less than or equal to N

cd <- function(x,N) {n <- length(x); k <- sum(x); return(env(beta(k, n*N-k+1), beta(k+1, n*N-k)))}

beta <- function(v,w) if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)

env <- function(x,y) if (all(x==y)) return(x) else return(c(x,y))

edf = function(x,...) lines(sort(x), 1:length(x)/length(x),type = 's',...)

areametric = function(u) sum(abs(sort(u)-((1:length(u))/length(u))))/length(u)

#par(mfrow=c(1,1))

par(mfrow=c(5,5))

N = 20

n1 = 8

n2 = 8

mus = c(1e-5, 1e-3, 1e-1, 0.3, 0.5, 0.7, 0.9, 0.999, 0.99999)

mus = c(1e-3, 1e-1, 0.5, 0.9, 0.999)

m = length(mus)

A = matrix(Inf, nrow=length(mus),ncol=length(mus))

B = matrix(Inf, nrow=length(mus),ncol=length(mus))

for (j in 1:m) for (k in 1:m) {

mu1 = mus[[j]]

mu2 = mus[[k]]

u = w = array(0, some)

for (i in 1:some) {

x = rbinom(n1,N,mu1) # sample data

y = rbinom(n2,N,mu2) # sample data

d = f(cd(x,N), cd(y,N))

u[i] = sum((d < f(mu1,mu2))[1:many]) / many

w[i] = sum((d < f(mu1,mu2))[(many+1):(2*many)]) / many

}

a = areametric(u)

b = areametric(w)

A[j,k] = a

B[j,k] = b

plot(c(0,1),c(0,1),col='red',type='l', lwd=2)

edf(u)

edf(w,col='blue')

title(paste(mu1,mu2,signif(a,3),signif(b,3)))

cat(j, k, a, b, n1,mu1,'times', n2,mu2,'\n')

}

#persp(mus,mus,A,theta=85)

############################################################################################ black

# DISJUNCTION UNDER INDEPENDENCE

f = OR = function(x,y) {

nx = length(x)

ny = length(y)

if ((nx==1) && (ny==1)) return(1-(1-x)*(1-y))

1-c((1-x[1:many])*(1-y[1:many]), (1-x[(many+1):nx])*(1-y[(many+1):ny]))

}

many = 2000 # increase for more accuracy

some = 1000 # increase for more accuracy

# x[i] ~ binomial(N, p), for known N, x[i] is a nonnegative integer less than or equal to N

cd <- function(x,N) {n <- length(x); k <- sum(x); return(env(beta(k, n*N-k+1), beta(k+1, n*N-k)))}

beta <- function(v,w) if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)

env <- function(x,y) if (all(x==y)) return(x) else return(c(x,y))

edf = function(x,...) lines(sort(x), 1:length(x)/length(x),type = 's',...)

areametric = function(u) sum(abs(sort(u)-((1:length(u))/length(u))))/length(u)

#par(mfrow=c(1,1))

par(mfrow=c(5,5))

N = 20

n1 = 8

n2 = 8

mus = c(1e-5, 1e-3, 1e-1, 0.3, 0.5, 0.7, 0.9, 0.999, 0.99999)

mus = c(1e-3, 1e-1, 0.5, 0.9, 0.999)

m = length(mus)

A = matrix(Inf, nrow=length(mus),ncol=length(mus))

B = matrix(Inf, nrow=length(mus),ncol=length(mus))

for (j in 1:m) for (k in 1:m) {

mu1 = mus[[j]]

mu2 = mus[[k]]

u = w = array(0, some)

for (i in 1:some) {

x = rbinom(n1,N,mu1) # sample data

y = rbinom(n2,N,mu2) # sample data

d = f(cd(x,N), cd(y,N))

u[i] = sum((d < f(mu1,mu2))[1:many]) / many

w[i] = sum((d < f(mu1,mu2))[(many+1):(2*many)]) / many

}

a = areametric(u)

b = areametric(w)

A[j,k] = a

B[j,k] = b

plot(c(0,1),c(0,1),col='red',type='l', lwd=2)

edf(u)

edf(w,col='blue')

title(paste(mu1,mu2,signif(a,3),signif(b,3)))

cat(j, k, a, b, n1,mu1,'times', n2,mu2,'\n')

}

#persp(mus,mus,A,theta=85)

############################################################################################ brown

# A BOOLEAN EXPRESSION UNDER INDEPENDENCE

some = 1000 # increase for more accuracy

many = 2000 # increase for more accuracy

K = 9

f = function(x1,x2,x3,x4,x5,x6,x7,x8,x9) {

OR(AND(x1,OR(x2, AND(OR(x3,x4), OR(x5,x6)))),OR(x9, AND(x7,x8)))

}

cb = function(i) cd(rbinom(n[[i]],N[[i]],p[[i]]), N[[i]])

cd <- function(x,N) {n <- length(x); k <- sum(x); return(env(beta(k, n*N-k+1), beta(k+1, n*N-k)))}

beta <- function(v,w) if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)

env <- function(x,y) if (all(x==y)) return(x) else return(c(x,y))

edf = function(x,...) lines(sort(x), 1:length(x)/length(x),type = 's',...)

areametric = function(u) sum(abs(sort(u)-((1:length(u))/length(u))))/length(u)

p = runif(K)

N = round(25*runif(K))+1

true = f(p[[1]],p[[2]],p[[3]],p[[4]],p[[5]],p[[6]],p[[7]],p[[8]],p[[9]])

LEFT = 1:many

RIGHT = (many+1):(2*many)

n = rep(0,K)

steps = 0

par(mfrow=c(3,3))

for (k in 1:9) {

u = w = array(0, some)

steps = steps + 5

n = n + round(steps*runif(K))

for (i in 1:some) {

d = f(cb(1),cb(2),cb(3),cb(4),cb(5),cb(6),cb(7),cb(8),cb(9))

u[i] = sum((d < true)[LEFT]) / many

w[i] = sum((d < true)[RIGHT]) / many

}

plot(c(0,1),c(0,1),col='red',type='l', lwd=2)

edf(u)

edf(w,col='blue')

tit = ''

for (kk in 1:9) tit = paste(tit,n[[kk]])

title(tit)

}

############################################################################################ brown

# A BOOLEAN EXPRESSION WITHOUT DEPENDENCE ASSUMPTIONS

<<more code to be added>>

Page updated

Google Sites

Report abuse