Post date: Mar 11, 2014 5:15:56 PM
#################################################################################
#################################################################################
#################################################################################
#################################################################################
#
# The MORE TRIALS approach to estimating possible information gain
# via empirical study of the five components of the bridge system
#
# We measure the improvement of uncertainty in the bridge's reliability
# as its fractional reduction when we "pinch" one component's uncertainty
# by hypothetically increasing the number of trials used to compute its
# c-box. This measure is
#
# (unc(B) - unc(P)) / unc(B) = 1 - unc(P) / unc(B)
#
# where B denotes the baseline calculation that includes the uncertainties
# of all the components, and P denotes the pinched calculation with one or
# some of those uncertainties reduced. The unc function characterizes
# the imprecision uncertainty of a c-box or p-box. We use breadth, i.e.,
# the area between the box's left and right bounds, as the unc function.
# The functions below use a Monte-Carlo approach rather than a rigorous
# one in order to simply certain constructions (such as when k=0 or k=n)
# and also to effect the left-right trick in evaluating uncertainties of
# monotone functions. This use is only a matter of convenience; Monte
# Carlo methods are not required for this analysis, for either reason.
# infrastructural functions
#
#many = 10000 # increase for more accuracy
#
#precise <- function(b) if (length(b)==many) TRUE else FALSE
#
#leftside <- function(b) if (precise(b)) return(b) else return(b[1:many])
#
#rightside <- function(b) if (precise(b)) return(b) else return(b[(many+1):(2*many)])
#
#env <- function(x,y) if ((precise(x) && precise(y))) c(x,y) else stop('env error') # serves as env, but only works for precise distributional inputs
#
#beta <- function(v,w) if ((v==0) && (w==0)) env(rep(0,many),rep(1,many)) else if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)
#
#kn <- function(k,n) return(env(beta(k, n-k+1), beta(k+1, n-k))) # c-box for Bernoulli parameter: x[i] ~ Bernoulli(parameter), x[i] is either 0 or 1, n = length(x), k = sum(x)
#
#data = function(p, trials) sum(runif(trials) <= p)
#
#L <- function(x,random=FALSE) if (!random) leftside(x) else leftside(x)[round(runif(many)*(many-1))+1]
#
#R <- function(x,random=FALSE) if (!random) rightside(x) else rightside(x)[round(runif(many)*(many-1))+1]
#
#breadth <- function(b) if (precise(b)) 0 else mean(sort(rightside(b))-sort(leftside(b)))
#
#plotbox <- function(b,new=TRUE,col='blue',lwd=2,xlim=range(b[is.finite(b)]),ylim=c(0,1),xlab='',ylab='Prob',...) {
# edf <- function (x, col, lwd, ...) {
# n <- length(x)
# s <- sort(x)
# lines(c(s[[1]],s[[1]]),c(0,1/n),lwd=lwd,col=col,...)
# for (i in 2:n) lines(c(s[[i-1]],s[[i]],s[[i]]),c(i-1,i-1,i)/n,col=col,lwd=lwd,...)
# }
# b = ifelse(b==-Inf, xlim[1] - 10, b)
# b = ifelse(b==Inf, xlim[2] + 10, b)
# if (new) plot(NULL, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
# if (length(b) < many) edf(b,col,lwd) else
# edf(c(min(b),max(b),b[1:min(length(b),many)]),col,lwd)
# if (many < length(b)) edf(c(min(b),max(b),b[(many+1):length(b)]),col,lwd)
# }
#
#
# functions specifically for the bridge calculations
#
#BridgeI = function(a,b,c,d,e) (a*c+b*d+a*d*e+b*c*e-a*b*c*e-a*c*d*e-a*b*d*e-b*c*d*e-a*b*c*d+2*a*b*c*d*e)
#
#bridgeI = function(a,b,c,d,e) {
# return(env(BridgeI(L(a),L(b),L(c),L(d),L(e)), BridgeI(R(a),R(b),R(c),R(d),R(e))))
# }
control <- function(p) { # these functions are not suitable for use with intervals or boxes because they have repeated variables
a = p[[1]]
b = p[[2]]
c = p[[3]]
d = p[[4]]
e = p[[5]]
da = c + d*e - b*c*e - c*d*e - b*d*e - b*c*d + 2*b*c*d*e
db = d + c*e - a*c*e - a*d*e - c*d*e - a*c*d + 2*a*c*d*e
dc = a + b*e - a*b*e - a*d*e - b*d*e - a*b*d + 2*a*b*d*e
dd = b + a*e - a*c*e - a*b*e - b*c*e - a*b*c + 2*a*b*c*e
de = a*d + b*c - a*b*c - a*c*d - a*b*d - b*c*d + 2*a*b*c*d
c(da,db,dc,dd,de)
}
improve <- function(p,n,m) {
pa = p[[1]]; na = n[[1]]; ma = m[[1]]
pb = p[[2]]; nb = n[[2]]; mb = m[[2]]
pc = p[[3]]; nc = n[[3]]; mc = m[[3]]
pd = p[[4]]; nd = n[[4]]; md = m[[4]]
pe = p[[5]]; ne = n[[5]]; me = m[[5]]
ka = data(pa,na)
kb = data(pb,nb)
kc = data(pc,nc)
kd = data(pd,nd)
ke = data(pe,ne)
a = kn(ka,na) # to avoid making the c-boxes perfectly dependent, do not use a construction like "a = b = c = d = e = kn(2,60)"
b = kn(kb,nb)
c = kn(kc,nc)
d = kn(kd,nd)
e = kn(kd,ne)
BB = breadth(bridgeI(a,b,c,d,e))
better <- function(k,n,p,m) kn(k+data(p,m), n+m)
BBi = breadth(bridgeI(better(ka,na,pa,ma),better(kb,nb,pb,mb),better(kc,nc,pc,mc),better(kd,nd,pd,md),better(ke,ne,pe,me)))
i = 100*(1-BBi/BB)
cat('investing', sum(m),'samples yields a',i,'% improvement\n')
i
}
study <- function(p,n,effort,new=TRUE) {
N = length(p)
# compute the true Birnbaum importance measures, based on true probabilities
c = control(p)
s = rep(0,N+1)
# allocate the samples to the each component, in turn, to be studied, and to a sixth strategy to share the samples among all components
d = diag(rep(effort,N))
for (i in 1:N) s[[i]] = improve(p,n,d[i,])
sharedeffort = rep(round(effort/N),N)
# in case there are too many samples because effort is not divisible by N, reduce the samples randomly until there's the right number
while (effort < sum(sharedeffort)) { i=round(1+runif(1)*N); sharedeffort[[i]] = sharedeffort[[i]]-1; }
# compute the improvement for each sampling strategy
s[[N+1]] = improve(p,n,sharedeffort)
# output the results as plots and printouts
names = c(letters[1:5], 'share')
myplot <- function(x, y, txt, ...) { if (new) plot(NULL,xlim=range(x),ylim=range(y),...); text(x, y, txt, adj = c(0.5, 0.5), cex=2, ...) }
#myplot(c(p,mean(p)),s,names)
myplot(c(c,mean(c)),s,names,col=c(1:5,40))
cat(rev(names[order(s)]),'\n')
}
studies <- function(p,n,m,xlim=c(0,1),ylim=c(-100,100),count=20) {
plot(NULL,xlim=xlim,ylim=ylim,xlab='True Birnbaum importance',ylab='Uncertainty reduction (%)')
t = 'p ='; for (i in p) t=paste(t,i);title(t)
for (i in 1:count) study(p, n, m, FALSE)
}
samples = 60
moresamples = 60
par(mfrow=c(2,3))
studies(c(0.564, 0.234, 0.685, 0.199, 0.292),rep(samples,5), moresamples, xlim=c(0.05,0.7),ylim=c(-20,30))
studies(c(0.9, 0.9, 0.8, 0.8, 0.99),rep(samples,5), moresamples,xlim=c(0,.25),ylim=c(0,50))
studies(c(0.9, 0.2, 0.2, 0.9, 0.5),rep(samples,5), moresamples,xlim=c(.4,.55),ylim=c(-10,30))
studies(rep(0.5,5), rep(samples,5), moresamples,xlim=c(0.1,0.4),ylim=c(-10,30))
studies(rep(0.99,5),rep(samples,5), moresamples,xlim=c(0.0,0.015),ylim=c(-20,50))
studies(signif(runif(5),3),rep(samples,5), moresamples,xlim=c(0.0,1),ylim=c(-20,50))