Uniform distribution

The following is an R script that exercises and checks the confidence boxes for the continuous uniform distribution which is parameterized either by specifying its minimum and maximum or by specifying its midpoint and width. This script was created as a part of our quality assurance efforts for c-box software. You can run the script and see its output by sequentially pasting its sections (which are delimited by rows of pound signs #) into the R console.

# As always, you should be able to copy the content between the rows of pound signs

# and paste them into R to replicate simulations and see the graphical results, or

# you can turn on History/Recording for the plot window, then paste the entire

# contents of this file into R and use PageUp and PageDown to see the output. The

# results depend on stochastic simulations, so it is useful to run them a few times

# to observe the range of variation in the outputs.

####################################################################################

# Below are the R functions implementing c-boxes for uniform distributions assuming

# x[i] ~ uniform(minimum, maximum), or

# x[i] ~ uniform(midpoint, width).

nextvalue.uniform <- function(x) {

w=(max(x)-min(x))/beta(length(x)-1,2);

m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1);

return(uniform(m-w/2, m+w/2))

}

parameter.uniform.minimum <- function(x) {

w=(max(x)-min(x))/beta(length(x)-1,2);

m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1);

return(m-w/2)

}

parameter.uniform.maximum <- function(x) {

w=(max(x)-min(x))/beta(length(x)-1,2);

m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1);

return(m+w/2)

}

parameter.uniform.midpoint <- function(x) {

w=(max(x)-min(x))/beta(length(x)-1,2);

return((max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1))

}

parameter.uniform.width <- function(x) return((max(x)-min(x))/beta(length(x)-1,2))

uniform <- function(a,b) runif(many, a,b)

# ancillary functions

many = 2000 # increase for more accuracy

beta <- function(v,w) if (v==0) rep(0,many) else if (w==0) rep(1,many) else rbeta(many, v, w)

env <- function(x,y) c(x,y)

edf <- function(x, ...) lines(c(min(x),sort(x)),seq(0,1,length.out=1+length(x)),type='s', ...)

plotbox <- function(b,new=TRUE,col='blue',lwd=2,xlim=range(b[is.finite(b)]),ylim=c(0,1),xlab='',ylab='Prob',...) {

edf <- function (x, col, lwd, ...) {

n <- length(x)

s <- sort(x)

lines(c(s[[1]],s[[1]]),c(0,1/n),lwd=lwd,col=col,...)

for (i in 2:n) lines(c(s[[i-1]],s[[i]],s[[i]]),c(i-1,i-1,i)/n,col=col,lwd=lwd,...)

}

b = ifelse(b==-Inf, xlim[1] - 10, b)

b = ifelse(b==Inf, xlim[2] + 10, b)

if (new) plot(NULL, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)

if (length(b) < many) edf(b,col,lwd) else

edf(c(min(b),max(b),b[1:min(length(b),many)]),col,lwd)

if (many < length(b)) edf(c(min(b),max(b),b[(many+1):length(b)]),col,lwd)

}

scalar = function(s,...) abline(v=s,...)

# example

par(mfrow=c(2,1))

n = 8

a = 0

b = 10

width = (b - a)

midpoint = (a + b) / 2

x = runif(n,a,b)

L = -10

w = parameter.uniform.width(x)

m = parameter.uniform.midpoint(x)

plot(NULL,xlim=c(L,20),ylim=c(0,1))

plotbox(w,FALSE); scalar(midpoint,col='red',lwd=2); text(L, 0.9, 'midpoint',col='red')

plotbox(m,FALSE,col='red'); scalar(width,col='blue',lwd=2); text(L, 0.75, 'width',col='blue')

m = parameter.uniform.minimum(x)

M = parameter.uniform.maximum(x)

plot(NULL,xlim=c(L,20),ylim=c(0,1))

edf(x,lwd=3)

edf(m,lwd=3,col=3); scalar(a,col=3,lwd=2); text(L,0.9,'minimum',col=3)

edf(M,lwd=3,col=6); scalar(b,col=6,lwd=2); text(L,0.75,'maximum',col=6)

scalar(max(x),col='gray',lty=3)

scalar(min(x),col='gray',lty=3)

# The colored c-boxes are estimating the true parameter values which are

# depicted as the vertical lines of the same color.

# Notice that the estimates of width and maximum are different, even when

# the minimum a=0. For instance, the blue c-box for width does not start

# at max(x) like the magenta c-box for the maximum.

#########################################################################

# comparison with frequentist MLE, MVUE and MMSEE estimators for maximum

if (a!=0) cat('Formulas for MLE, MVUE, MMSEE estimates are wrong\n') else {

t = 10:1000/100

t[t<max(x)] = 0

#plot(t,log(1/(t^some)), type='l')

MLE = max(x)

MVUE = ((n+1)/n)*max(x)

MMSEE = ((n+2)/(n+1))*max(x)

scalar(MLE,col='gray',lwd=3)

scalar(MVUE,col='gray',lwd=3)

scalar(MMSEE,col='gray',lwd=3)

cat('MLE',MLE,'\nMVUE',MVUE,'\nMMSEE', MMSEE,'\n')

}

# If shown, the gray vertical lines depict several frequentist estimators

# for the maximum.

#########################################################################

# repeated sampling plots for MINIMUM and MAXIMUM

shomid = TRUE

showid = TRUE

par(mfrow=c(5,5))

nn = round(runif(5*5,2,25))

mm = c(-20,-1, 0, 1, 20)

ww = c(0.01, 0.1, 1, 5, 10)

for (i in 1:5) for (j in 1:5) {

a = mm[[i]] - ww[[j]]/2 # true minimum, a distribution parameter

b = mm[[i]] + ww[[j]]/2 # true maximum, a distribution parameter

n = nn[[(i-1)*5 + j]] # sample size

if (shomid & showid)

xlim=c(0,1.5*ww[[j]], mm[[i]]-ww[[j]], mm[[i]]+ww[[j]]) else

if (shomid)

xlim=c(mm[[i]]-ww[[j]], mm[[i]]+ww[[j]]) else

if (showid)

xlim=c(0,1.5*ww[[j]]) else

cat('Nothing to show\n')

plot(NULL,xlim=range(xlim),ylim=c(0,1))

if (shomid) lines(rep(mm[[i]],2),c(0,1),col='red',lwd=3)

if (showid) lines(rep(ww[[j]],2),c(0,1),col='blue',lwd=3)

title(paste(n,'~U(',a,',',b,')'))

for (k in 1:10) {

x = runif(n,a,b) # some sample data

m = parameter.uniform.midpoint(x)

w = parameter.uniform.width(x)

if (shomid) edf(m,lwd=2)

if (showid) edf(w,col='gray',lwd=2)

}

if (shomid) lines(rep(mm[[i]],2),c(0,1),col='red',lwd=1)

if (showid) lines(rep(ww[[j]],2),c(0,1),col='blue',lwd=1)

}

# midpoint is blue, estimated by gray c-boxes

# width is red, estimated by black c-boxes

# the estimates are often close to perfect

#########################################################################

# repeated sampling plots for NEXTVALUE

par(mfrow=c(3,3))

nn = c(2,3,4,5,7,10,15,20,200);

a = 1

b = 3

for (n in nn) {

plot(c(a,b),c(0,1), xlim=c(a-(a+b)/2,b+(a+b)/2),ylim=c(0,1),type='l',col='red')

title(paste('n =',n))

for (i in 1:500) {

x = runif(n,a,b)

nv = nextvalue.uniform(x)

edf(nv)

}

lines(c(a,b),c(0,1), col='red',lwd=3)

}

#########################################################################

# repeated sampling plots for NEXTVALUE for just TWO SAMPLE POINTS

par(mfrow=c(1,1))

n = 2

a = 1

b = 3

many=10000

plot(c(a,b),c(0,1), xlim=c(a-(a+b)/2,b+(a+b)/2),ylim=c(0,1),type='l',col='red')

title(paste('n =',n))

for (i in 1:100) {

x = runif(n,a,b)

nv = nextvalue.uniform(x)

edf(nv)

}

lines(c(a,b),c(0,1), col='red',lwd=3)

#########################################################################

# the data sets yielding the extremes are obvious

many=10000

plot(c(a,b),c(0,1), xlim=c(a-(a+b)/2,b+(a+b)/2),ylim=c(0,1),type='l',col='red')

title(paste('n =',n))

for (i in 1:1000) {

x = runif(n,a,b)

nv = nextvalue.uniform(x)

edf(nv)

}

lines(c(a,b),c(0,1), col='red',lwd=3)

x = range(a,b)

nv = nextvalue.uniform(x)

edf(nv, col='magenta', lwd=3)

x = range(a,a)

nv = nextvalue.uniform(x)

edf(nv, col='green', lwd=3)

x = range(b,b)

nv = nextvalue.uniform(x)

edf(nv, col='blue', lwd=3)

#W = 1

#for (i in seq(a,b,length.out=30)) {

# x = range(i, b)

# nv = nextvalue.uniform(x)

# edf(nv, col='yellow', lwd=3)

# msg = paste('x = c(',signif(i,3),',',b,')')

# text(0,0.5,msg)

# Sys.sleep(W)

# text(0,0.5,msg,col='white')

# }

# green x = c(1,1)

# magenta x = c(1,3)

# blue x = c(3,3)

# black x = runif(2,1,3)

# red the true distribution

# if any c-box is above the magenta c-box on the left or below it on the right,

# increase the constant 'many' so the approximation is better

#########################################################################

# Michael's email seems to suggest this formulation for the Singh plot for

# c-boxes for WIDTH of a uniform distribution (one of its two parameters)

some = 1e3 # reducing to 1e2 will show the approximation

par(mfrow=c(5,5))

nn = round(runif(5*5,2,25))

mm = c(-20,-1, 0, 1, 20)

ww = c(0.01, 0.1, 1, 4, 8)

for (i in 1:5) for (j in 1:5) {

a = mm[[i]] - ww[[j]]/2

b = mm[[i]] + ww[[j]]/2

n = nn[[(i-1)*5 + j]]

ccfTrk = array(0, some)

for (kk in 1:some) {

x = rbeta( n , 1 , 1 )*(b-a)

qLoc = ( max(x) - min(x) ) / (b-a)

ccfTrk[[kk]] = 1 - qLoc^(n-1) * ( n - (n-1)*qLoc )

}

plot(c(0,1),c(0,1),type='l',col=2)

lines( sort( ccfTrk) , (1:some)/some)

title(paste(n,'~U(',a,',',b,')'))

}

#########################################################################

# Scott converted Michael's rather cryptic code above to the (slower but)

# more transparent consideration below

# Singh plots for the c-box for the WIDTH of a uniform distribution

some = 1e3 # increase to show perfection

many = 2000

par(mfrow=c(5,5))

nn = round(runif(5*5,2,25))

mm = c(-20,-1, 0, 1, 20)

ww = c(0.01, 0.1, 1, 4, 8)

for (i in 1:5) for (j in 1:5) {

a = mm[[i]] - ww[[j]]/2

b = mm[[i]] + ww[[j]]/2

n = nn[[(i-1)*5 + j]]

ccf = array(0, some)

for (k in 1:some) {

x = runif(n,a,b) # sample data

w = parameter.uniform.width(x)

ccf[[k]] = sum(w < (b-a)) / many # (b-a) is ww[[j]]

}

plot(c(0,1),c(0,1),type='l',col=2)

lines( sort( ccf) , (1:some)/some)

title(paste(n,'~U(',a,',',b,')'))

}

#########################################################################

# Singh plots for the c-box for the MIDPOINT of a uniform distribution

# which is the other of its two parameters

some = 1e3 # increase to show perfection

par(mfrow=c(5,5))

nn = round(runif(5*5,2,25))

mm = c(-20,-1, 0, 1, 20)

ww = c(0.01, 0.1, 1, 4, 8)

for (i in 1:5) for (j in 1:5) {

a = mm[[i]] - ww[[j]]/2

b = mm[[i]] + ww[[j]]/2

n = nn[[(i-1)*5 + j]]

ccf = array(0, some)

for (k in 1:some) {

x = runif(n,a,b) # sample data

m = parameter.uniform.midpoint(x)

ccf[[k]] = sum(m < mm[[i]]) / many

}

plot(c(0,1),c(0,1),type='l',col=2)

lines( sort( ccf) , (1:some)/some)

title(paste(n,'~U(',a,',',b,')'))

}

#########################################################################

# Can we estimate the minimum and maximum of a uniform distribution too?

# Yes, but we must properly represent the dependence in the calculation.

# Singh plots for a NAIVE c-box for the maximum estimated by convolution

some = 1e3 # increase to show perfection

par(mfrow=c(5,5))

nn = round(runif(5*5,2,25))

mm = c(-20,-1, 0, 1, 20)

ww = c(0.01, 0.1, 1, 4, 8)

for (i in 1:5) for (j in 1:5) {

a = mm[[i]] - ww[[j]]/2

b = mm[[i]] + ww[[j]]/2

n = nn[[(i-1)*5 + j]]

ccf = array(0, some)

for (k in 1:some) {

x = runif(n,a,b) # sample data

m = parameter.uniform.midpoint(x)

w = parameter.uniform.width(x)

mw = m + w/2 # convolution

ccf[[k]] = sum(mw < b) / many

}

plot(c(0,1),c(0,1),type='l',col=2)

lines( sort( ccf) , (1:some)/some)

title(paste(n,'~U(',a,',',b,')'))

}

# these plots show that this approach is not quite correct

#########################################################################

# a better way to estimate the min and max of uniforms

# x[i] ~ uniform(minimum, maximum)

parameter.uniform.minimum <- function(x) {n=length(x); w=(max(x)-min(x))/beta(length(x)-1,2); m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1); return(m-w/2)}

parameter.uniform.maximum <- function(x) {n=length(x); w=(max(x)-min(x))/beta(length(x)-1,2); m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1); return(m+w/2)}

# Singh plot for the c-box for the MAXIMUM of a uniform distribution

some = 1e3 # increase to show perfection

par(mfrow=c(5,5))

nn = round(runif(5*5,2,25))

mm = c(-20,-1, 0, 1, 20)

ww = c(0.01, 0.1, 1, 4, 8)

for (i in 1:5) for (j in 1:5) {

a = mm[[i]] - ww[[j]]/2

b = mm[[i]] + ww[[j]]/2

n = nn[[(i-1)*5 + j]]

ccf = array(0, some)

for (k in 1:some) {

x = runif(n,a,b) # sample data

mw = parameter.uniform.maximum(x)

ccf[[k]] = sum(mw < b) / many

}

plot(c(0,1),c(0,1),type='l',col=2)

lines( sort( ccf) , (1:some)/some)

title(paste(n,'~U(',a,',',b,')'))

}

#########################################################################

# Singh plot for the c-box for the MINIMUM of a uniform distribution

some = 5e3 # increase to show perfection

par(mfrow=c(5,5))

nn = round(runif(5*5,2,25))

mm = c(-20,-1, 0, 1, 20)

ww = c(0.01, 0.1, 1, 4, 8)

for (i in 1:5) for (j in 1:5) {

a = mm[[i]] - ww[[j]]/2

b = mm[[i]] + ww[[j]]/2

n = nn[[(i-1)*5 + j]]

ccf = array(0, some)

for (k in 1:some) {

x = runif(n,a,b) # sample data

mw = parameter.uniform.minimum(x)

ccf[[k]] = sum(mw < a) / many

}

plot(c(0,1),c(0,1),type='l',col=2)

lines( sort( ccf) , (1:some)/some)

title(paste(n,'~U(',a,',',b,')'))

}

######################################################################################

# What is the dependence between the midpoint and width estimator?

# ------------------------------------------------------ parameter.uniform.width -----

width = (max(x)-min(x)) / rbeta(many, length(x)-1,2)

# ------------------------------------------------------ parameter.uniform.midpoint --

w = (max(x)-min(x)) / rbeta(many, length(x)-1,2)

midpoint = (max(x)-w/2)+(w-(max(x)-min(x)))*runif(many)

# ------------------------------------------------------ nextvalue.uniform -----------

w = (max(x)-min(x)) / rbeta(many, length(x)-1,2)

m = (max(x)-w/2)+(w-(max(x)-min(x)))*runif(many)

x = runif(8,0,1)

many = 15000

par(mfrow=c(3,1))

plot(width,midpoint); title('parameter.uniform')

plot(w,m,col='red'); title('nextvalue.uniform')

plot(width,midpoint)

points(w,m,col='red'); title('(superimposed)')

cor(width,midpoint)

cor(w,m)

####################################################################################

# What is the dependence between the minimum and maximum parameters

x = runif(20,0,1)

many = 15000

w=(max(x)-min(x))/beta(length(x)-1,2)

m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1)

minimum = m-w/2

w=(max(x)-min(x))/beta(length(x)-1,2) # indep. of earlier w

m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1) # indep. of earlier m

maximum = m+w/2

w=(max(x)-min(x))/beta(length(x)-1,2) # indep. of earlier w

m=(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1) # indep. of earlier m

m1 = m-w/2

m2 = m+w/2

par(mfrow=c(3,1))

plot(minimum,maximum); title('parameter.uniform')

plot(m1,m2,col='red'); title('nextvalue.uniform')

plot(m1,m2,col='red'); title('(superimposed)')

points(minimum,maximum)

cor(minimum,maximum)

cor(m1,m2)

# the correlations are quite different, but the graphs only look really different for small n

######################################################################################

# the following function alters the dependence of m and w inside the nextvalue function

bad.nextvalue.uniform <- function(x) {

n=length(x);

w =(max(x)-min(x))/beta(length(x)-1,2);

ww=(max(x)-min(x))/beta(length(x)-1,2); # independent of w

m =(max(x)-w/2)+(w-(max(x)-min(x)))*uniform(0,1); # midpoint depends on w

return(uniform(m-ww/2, m+ww/2)) # width is given as ww

}

many = 50000

par(mfrow=c(3,1))

for (n in c(2,4,6)) {

x = runif(n,0,1)

nv = nextvalue.uniform(x)

bnv = bad.nextvalue.uniform(x)

plotbox(bnv,xlim=c(-2,3),col='red')

plotbox(nv,col='blue',new=FALSE)

title(paste('n =',length(x)))

}

# seems only to matter when n is very small, and even then, it doesn't matter much

# Scott is only guessing that bad.nextvalue.uniform is bad...is there an analog of

# the Singh plot for the nextvalue distributions with which we can check?

####################################################################################

# compare to the dependence between a normal's mu and sigma

many=10000

x = rnorm(2,10,1)

n=length(x)

xlim = c(0,20) # depends on n, mu, sigma

ylim = c(0,15) # depends on n, mu, sigma

mu = mean(x)+sd(x)*rt(many,n-1)/sqrt(n)

sigma = sqrt(var(x)*(n-1)/rchisq(many,n-1))

s = sqrt(var(x)*(n-1)/rchisq(many,n-1))

m = rnorm(many,mean(x), s/sqrt(n)) # uses s

par(mfrow=c(3,1))

plot(mu,sigma,xlim=xlim,ylim=ylim)

plot(m,s,col='red',xlim=xlim,ylim=ylim)

plot(mu,sigma,xlim=xlim,ylim=ylim)

points(m,s,col='red')

cor(mu,sigma) # uncorrelated

cor(m,s) # strongly dependent

####################################################################################

# alter the dependence between a normal's mu and sigma

nextvalue.normal <- function(x) {

n <- length(x);

return(mean(x) + sd(x) * student(n - 1) * sqrt(1 + 1 / n))

}

student <- function(v) rt(many, v)

nv = nextvalue.normal(x)

n1 = rnorm(many, m, s) # preserves dependence

n2 = rnorm(many, mu, sigma) # alters dependence

par(mfrow=c(1,1))

xlim = range(c(nv,n1,n2))

xlim = c(5,15)

plot(NULL,xlim=xlim,ylim=c(0,1))

edf(nv,col='gray')

edf(n1,col='blue')

edf(n2,col='red')

signif(var(nv),2)

signif(var(n1),2)

signif(var(n2),2)

# gray and blue calculations preserve dependence

# red calculation alters dependence, and may be erroneous

Page updated

Google Sites

Report abuse