Post date: Feb 08, 2014 1:40:35 PM
Jason wrote:
Scott,
Very cool.
Maybe you could send us some R code demonstrating the Bernoulli parameter case. What are the robust Bayes conventions? Having the ability to incorporate prior information and to iteratively update while still retaining the confidence interpretation (and prescribed performance) is probably really important for machine learning and other applied data science(y) stuff.
Scott wrote:
Jason:
Everything seems to work fine for Bernoulli, binomial, Poisson, exponential, but then I'm not smart enough to understand what the Bayesians actually recommend for the normal case. They always wanna talk about the cases where we "know" the variance or the mean. It seems to me like these aren't realistic cases at all.
I'm really very fond of the idea that updating is equivalent to pooling the old and new data sets. That's definitely not something that Bayesians would be fond of. Let's see how long it can be sustained.
The extension of c-boxes to updating problems might not work so nicely for the normal case. It looks like Michael gives a scaled t-distribution for the mu parameter, but some Bayesian authorities are talking about an inverse gamma for that. As there could be alternative c-boxes, maybe this inconsistency is not insurmountable.
The R code you asked for is below. In each case, the blue c-box is the updated one, that should overwrite (and therefore obscure) the pooled-data c-box.
Maybe you could do the honors of installing your requesting email and this response in a (single?) announcement on the new Say page of the c-box website. You need to use Sitemap from the links on the right side of the screen to find the Say page.
Cheers,
Scott
# assumes you've already loaded pbox.r
# we need to regularize the gamma calling syntax of pbox.r to be consistent with R
gamma.RC = function(scale, shape, ...) env2.4(Sgamma,scale,shape, ...)
Sgamma.RC = function(scale, shape, name=''){ # N.B. the parameters are scale and shape, which are reordered and different from rgamma's parameters which are shape and rate (1/scale)
m <- scale*shape
v <- scale^2*shape
pbox(u=qgamma(ii(), shape=shape, scale=scale), d=qgamma(jjj(), shape=shape, scale=scale), shape='gamma', name=name, ml=m, mh=m, vl=v, vh=v)
}
gamma <- function(shape,rate=1,scale=1/rate, ...) {rate = 1/scale; env2.4(Sgamma, shape,rate, ...)}
Sgamma = function(shape, rate, scale=1/rate, name=''){ # these parameters are conformant with those of R's rgamma function
m <- scale*shape
v <- scale^2*shape
pbox(u=qgamma(ii(), shape=shape, scale=scale), d=qgamma(jjj(), shape=shape, scale=scale), shape='gamma', name=name, ml=m, mh=m, vl=v, vh=v)
}
##################################################################
##################################################################
# x[i] ~ Bernoulli(parameter), x[i] is either 0 or 1
parameter.bernoulli <- function(x) {n <- length(x); k <- sum(x); return(env(beta(k, n-k+1), beta(k+1, n-k)))}
d1 = runif(5) < 0.3
d2 = runif(10) < 0.3
# pooled data
post12 = parameter.bernoulli(c(d1,d2))
# data sets in sequence
post1 = parameter.bernoulli(d1)
n <- length(d1)
k <- sum(d1)
# L = c(k, n-k+1)
# R = c(k+1, n-k)
a = interval(k, k+1)
b = interval(n-k,n-k+1)
post2 = beta(a + sum(d2), b + length(d2) - sum(d2))
# compare results
post12
post2
plot(post12)
lines(post2,col='blue')
##################################################################
##################################################################
# x[i] ~ binomial(N, p), for known N, x[i] is a nonnegative integer less than or equal to N
parameter.binomial <- function(x,N) {n <- length(x); k <- sum(x); return(env(beta(k, n*N-k+1), beta(k+1, n*N-k)))}
N = 20
d1 = rbinom(5,N,0.3)
d2 = rbinom(10,N,0.3)
# pooled data
post12 = parameter.binomial(c(d1,d2),N)
# data sets in sequence
post1 = parameter.binomial(d1,N)
n <- length(d1)
k <- sum(d1)
# L = c(k, n*N-k+1)
# R = c(k+1, n*N-k)
a = interval(k, k+1)
b = interval(n*N-k,n*N-k+1)
post2 = beta(a + sum(d2), b + length(d2)*N - sum(d2))
# compare results
post12
post2
plot(post12)
lines(post2,col='blue')
##################################################################
##################################################################
# x[i] ~ exponential(parameter), x[i] is a nonnegative integer
parameter.exponential <- function(x) {n <- length(x); k = sum(x); return(gamma(shape=n, rate=k))}
d1 = rexp(5, 0.3)
d2 = rexp(10, 0.3)
# pooled data
post12 = parameter.exponential(c(d1,d2))
# data sets in sequence
post1 = parameter.exponential(d1)
n <- length(d1)
k <- sum(d1)
# shape = n
# rate = k
a = n
b = k
post2 = gamma(shape=a + length(d2), rate=b + sum(d2) )
# compare results
post12
post2
plot(post12)
lines(post2,col='blue')
##################################################################
##################################################################
# x[i] ~ Poisson(parameter), x[i] is a nonnegative integer
parameter.poisson <- function(x) {n <- length(x); k = sum(x); return(env(gamma(shape=k, rate=n),gamma(shape=k+1, rate=n)))}
d1 = rpois(8, 12)
d2 = rpois(16, 12)
# pooled data
post12 = parameter.poisson(c(d1,d2))
# data sets in sequence
post1 = parameter.poisson(d1)
n <- length(d1)
k = sum(d1);
# L = c(shape=k, rate=n)
# R = c(shape=k+1, rate=n)
a = interval(k, k+1) # shape
b = interval(n,n) # rate ############### could this really be true?
# gamma(alpha+sum(x),beta+n)
post2 = gamma(a + sum(d2),b + length(d2))
# compare results
post12
post2
plot(post12)
lines(post2,col='blue')
##################################################################
# NOT WORKING
##################################################################
# x[i] ~ normal(mu, sigma)
parameter.normal.mu <- function(x) {n <- length(x); return(mean(x) + sd(x) * student(n - 1) / sqrt(n))}
parameter.normal.sigma <- function(x) {n <- length(x); return(sqrt(var(x)*(n-1)/chisquared(n-1)))}
d1 = rnorm(4, 12, 3)
d2 = rnorm(10, 12, 3)
# pooled data
post12.mu = parameter.normal.mu(c(d1,d2))
post12.sigma = parameter.normal.sigma(c(d1,d2))
# data sets in sequence
post1.mu = parameter.normal.mu(d1)
m = mean(d1)
s = sd(d1)
n = length(d1)
# m + s * student(n - 1) / sqrt(n)
#
a =
b =
A = a + sum(d2)
B = b + length(d2)
post2 = gamma(A,B)
# compare results
post12
post2
plot(post12)
lines(post2,col='blue')