More trials

Post date: Mar 11, 2014 5:15:56 PM

#################################################################################

#

# The MORE TRIALS approach to estimating possible information gain

# via empirical study of the five components of the bridge system

#

# We measure the improvement of uncertainty in the bridge's reliability

# as its fractional reduction when we "pinch" one component's uncertainty

# by hypothetically increasing the number of trials used to compute its

# c-box. This measure is

#

# (unc(B) - unc(P)) / unc(B) = 1 - unc(P) / unc(B)

#

# where B denotes the baseline calculation that includes the uncertainties

# of all the components, and P denotes the pinched calculation with one or

# some of those uncertainties reduced. The unc function characterizes

# the imprecision uncertainty of a c-box or p-box. We use breadth, i.e.,

# the area between the box's left and right bounds, as the unc function.

# The functions below use a Monte-Carlo approach rather than a rigorous

# one in order to simply certain constructions (such as when k=0 or k=n)

# and also to effect the left-right trick in evaluating uncertainties of

# monotone functions. This use is only a matter of convenience; Monte

# Carlo methods are not required for this analysis, for either reason.

# infrastructural functions

#

#many = 10000 # increase for more accuracy

#

#precise <- function(b) if (length(b)==many) TRUE else FALSE

#

#leftside <- function(b) if (precise(b)) return(b) else return(b[1:many])

#

#rightside <- function(b) if (precise(b)) return(b) else return(b[(many+1):(2*many)])

#

#env <- function(x,y) if ((precise(x) && precise(y))) c(x,y) else stop('env error') # serves as env, but only works for precise distributional inputs

#

#beta <- function(v,w) if ((v==0) && (w==0)) env(rep(0,many),rep(1,many)) else if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)

#

#kn <- function(k,n) return(env(beta(k, n-k+1), beta(k+1, n-k))) # c-box for Bernoulli parameter: x[i] ~ Bernoulli(parameter), x[i] is either 0 or 1, n = length(x), k = sum(x)

#

#data = function(p, trials) sum(runif(trials) <= p)

#

#L <- function(x,random=FALSE) if (!random) leftside(x) else leftside(x)[round(runif(many)*(many-1))+1]

#

#R <- function(x,random=FALSE) if (!random) rightside(x) else rightside(x)[round(runif(many)*(many-1))+1]

#

#breadth <- function(b) if (precise(b)) 0 else mean(sort(rightside(b))-sort(leftside(b)))

#

#plotbox <- function(b,new=TRUE,col='blue',lwd=2,xlim=range(b[is.finite(b)]),ylim=c(0,1),xlab='',ylab='Prob',...) {

# edf <- function (x, col, lwd, ...) {

# n <- length(x)

# s <- sort(x)

# lines(c(s[[1]],s[[1]]),c(0,1/n),lwd=lwd,col=col,...)

# for (i in 2:n) lines(c(s[[i-1]],s[[i]],s[[i]]),c(i-1,i-1,i)/n,col=col,lwd=lwd,...)

# }

# b = ifelse(b==-Inf, xlim[1] - 10, b)

# b = ifelse(b==Inf, xlim[2] + 10, b)

# if (new) plot(NULL, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)

# if (length(b) < many) edf(b,col,lwd) else

# edf(c(min(b),max(b),b[1:min(length(b),many)]),col,lwd)

# if (many < length(b)) edf(c(min(b),max(b),b[(many+1):length(b)]),col,lwd)

# }

#

# functions specifically for the bridge calculations

#

#BridgeI = function(a,b,c,d,e) (a*c+b*d+a*d*e+b*c*e-a*b*c*e-a*c*d*e-a*b*d*e-b*c*d*e-a*b*c*d+2*a*b*c*d*e)

#

#bridgeI = function(a,b,c,d,e) {

# return(env(BridgeI(L(a),L(b),L(c),L(d),L(e)), BridgeI(R(a),R(b),R(c),R(d),R(e))))

# }

control <- function(p) { # these functions are not suitable for use with intervals or boxes because they have repeated variables

a = p[[1]]

b = p[[2]]

c = p[[3]]

d = p[[4]]

e = p[[5]]

da = c + d*e - b*c*e - c*d*e - b*d*e - b*c*d + 2*b*c*d*e

db = d + c*e - a*c*e - a*d*e - c*d*e - a*c*d + 2*a*c*d*e

dc = a + b*e - a*b*e - a*d*e - b*d*e - a*b*d + 2*a*b*d*e

dd = b + a*e - a*c*e - a*b*e - b*c*e - a*b*c + 2*a*b*c*e

de = a*d + b*c - a*b*c - a*c*d - a*b*d - b*c*d + 2*a*b*c*d

c(da,db,dc,dd,de)

}

improve <- function(p,n,m) {

pa = p[[1]]; na = n[[1]]; ma = m[[1]]

pb = p[[2]]; nb = n[[2]]; mb = m[[2]]

pc = p[[3]]; nc = n[[3]]; mc = m[[3]]

pd = p[[4]]; nd = n[[4]]; md = m[[4]]

pe = p[[5]]; ne = n[[5]]; me = m[[5]]

ka = data(pa,na)

kb = data(pb,nb)

kc = data(pc,nc)

kd = data(pd,nd)

ke = data(pe,ne)

a = kn(ka,na) # to avoid making the c-boxes perfectly dependent, do not use a construction like "a = b = c = d = e = kn(2,60)"

b = kn(kb,nb)

c = kn(kc,nc)

d = kn(kd,nd)

e = kn(kd,ne)

BB = breadth(bridgeI(a,b,c,d,e))

better <- function(k,n,p,m) kn(k+data(p,m), n+m)

BBi = breadth(bridgeI(better(ka,na,pa,ma),better(kb,nb,pb,mb),better(kc,nc,pc,mc),better(kd,nd,pd,md),better(ke,ne,pe,me)))

i = 100*(1-BBi/BB)

cat('investing', sum(m),'samples yields a',i,'% improvement\n')

i

}

study <- function(p,n,effort,new=TRUE) {

N = length(p)

# compute the true Birnbaum importance measures, based on true probabilities

c = control(p)

s = rep(0,N+1)

# allocate the samples to the each component, in turn, to be studied, and to a sixth strategy to share the samples among all components

d = diag(rep(effort,N))

for (i in 1:N) s[[i]] = improve(p,n,d[i,])

sharedeffort = rep(round(effort/N),N)

# in case there are too many samples because effort is not divisible by N, reduce the samples randomly until there's the right number

while (effort < sum(sharedeffort)) { i=round(1+runif(1)*N); sharedeffort[[i]] = sharedeffort[[i]]-1; }

# compute the improvement for each sampling strategy

s[[N+1]] = improve(p,n,sharedeffort)

# output the results as plots and printouts

names = c(letters[1:5], 'share')

myplot <- function(x, y, txt, ...) { if (new) plot(NULL,xlim=range(x),ylim=range(y),...); text(x, y, txt, adj = c(0.5, 0.5), cex=2, ...) }

#myplot(c(p,mean(p)),s,names)

myplot(c(c,mean(c)),s,names,col=c(1:5,40))

cat(rev(names[order(s)]),'\n')

}

studies <- function(p,n,m,xlim=c(0,1),ylim=c(-100,100),count=20) {

plot(NULL,xlim=xlim,ylim=ylim,xlab='True Birnbaum importance',ylab='Uncertainty reduction (%)')

t = 'p ='; for (i in p) t=paste(t,i);title(t)

for (i in 1:count) study(p, n, m, FALSE)

}

samples = 60

moresamples = 60

par(mfrow=c(2,3))

studies(c(0.564, 0.234, 0.685, 0.199, 0.292),rep(samples,5), moresamples, xlim=c(0.05,0.7),ylim=c(-20,30))

studies(c(0.9, 0.9, 0.8, 0.8, 0.99),rep(samples,5), moresamples,xlim=c(0,.25),ylim=c(0,50))

studies(c(0.9, 0.2, 0.2, 0.9, 0.5),rep(samples,5), moresamples,xlim=c(.4,.55),ylim=c(-10,30))

studies(rep(0.5,5), rep(samples,5), moresamples,xlim=c(0.1,0.4),ylim=c(-10,30))

studies(rep(0.99,5),rep(samples,5), moresamples,xlim=c(0.0,0.015),ylim=c(-20,50))

studies(signif(runif(5),3),rep(samples,5), moresamples,xlim=c(0.0,1),ylim=c(-20,50))