appendixA.jl

# Chapter Appendix A


#import Pkg

#Pkg.add("Plots")

#Pkg.add("Optim")

#Pkg.add("StatsKit")

#Pkg.add("GLM")

#Pkg.add("DataFrames")

#Pkg.add("StatsPlots")

#Pkg.add("StatFiles")

#Pkg.add("NLSolversBase")

#Pkg.add("Distributions")

using Random

using Statistics

using Plots

using Optim

using NLSolversBase

using StatsKit

using CSV

using GLM

using DataFrames

using LinearAlgebra

using StatsPlots

using StatFiles

using Distributions




# A.1

# A.2

# A.2.1

# A.2.2


rng = MersenneTwister(123456789)

N = 10

μ = -3

σ = 5

x = randn!(rng, zeros(N)).*σ .+ μ

dens_x = kde(x)


mean(x)

std(x)


plot(dens_x)

plot!(kde(randn!(rng, zeros(10000)).*σ .+ μ))


# A.2.3

rng = MersenneTwister(123456789)

M = 500

sample_est = zeros(M, N)

for m = 1:M

sample_est[m,1:end] = randn!(rng, zeros(N)).*σ .+ μ

end


means = zeros(M)

for m = 1:M

means[m] = mean(sample_est[m,1:end])

end


histogram(means)

vline!(fill(-3,M))


# A.2.4

rng = MersenneTwister(123456789)

N = 100000

x = randn!(rng, zeros(N)).*σ .+ μ

mean(x)


# A.2.5

rng = MersenneTwister(123456789)

M = 1000

sample_means = zeros(M,4)

for m = 1:M

sample_means[m,1] = mean(randn!(rng, zeros(10)).*σ .+ μ)

sample_means[m,2] = mean(randn!(rng, zeros(100)).*σ .+ μ)

sample_means[m,3] = mean(randn!(rng, zeros(1000)).*σ .+ μ)

sample_means[m,4] = mean(randn!(rng, zeros(10000)).*σ .+ μ)

end


plot(kde((sample_means[1:end,1] .- μ).*(√9/σ)))

plot!(kde((sample_means[1:end,2] .- μ).*(√99/σ)))

plot!(kde((sample_means[1:end,3] .- μ).*(√999/σ)))

plot!(kde((sample_means[1:end,4] .- μ).*(√9999/σ)))

# Note here I have adjusted for degrees of freedom.


# A.2.6

rng = MersenneTwister(123456789)

M = 5

N = 10

sample_means = zeros(M,2)

for m = 1:M

x₁ = randn!(rng, zeros(N)).*σ .+ μ

sample_means[m,1:end] = [mean(x₁); std(x₁)/(√(N-1))]

end


plot(kde((randn!(rng, zeros(1000)) .+ sample_means[1,1]).*sample_means[1,2]))

plot!(kde((randn!(rng, zeros(1000)) .+ sample_means[2,1]).*sample_means[2,2]))

plot!(kde((randn!(rng, zeros(1000)) .+ sample_means[3,1]).*sample_means[3,2]))

plot!(kde((randn!(rng, zeros(1000)) .+ sample_means[4,1]).*sample_means[4,2]))

plot!(kde((randn!(rng, zeros(1000)) .+ sample_means[5,1]).*sample_means[5,2]))


# A.2.8

rng = MersenneTwister(123456789)

Random.seed!(123456789)

N = 10

x₁ = randn!(rng, zeros(N)).*σ .+ μ

M = 1000

bs_mean = zeros(M)

for m = 1:M

bs_mean[m] = mean(rand(x₁, N))

end


plot(kde(bs_mean))

plot!(kde(randn!(rng, zeros(100000)).*(σ/√(N-1)) .+ μ))


# A.2.9

rng = MersenneTwister(123456789)

α = 0.05

cr = std(x).*quantile(randn!(rng, zeros(10000)), α)./√9

# Again adjusting for degrees of freedom.


plot(kde(randn!(rng, zeros(10000)).*(std(x)/√9)))

vline!(fill(cr,10))

vline!(fill(mean(x),10))


mean(randn!(rng, zeros(10000)) .< √9*mean(x)/std(x))


# A.3

# A.3.1

# A.3.2

# A.3.3


logϕ = function(z)

-z.^2 - √π

end


log_norm = function(x, μ, σ)

z = (x .- μ)./σ

sum(logϕ.(z) .- log(σ))

end


Θ = ((1:100).*15)./100 .- 10

g = (1:100)./100

h = []

for i = 1:100

h = [h; log_norm(x₁, Θ[i], 5)]

end

γ = exp.(h .+ log.(g))./sum(exp.(h .+ log.(g)))


plot(Θ, γ)

vline!(fill(-3,100))

vline!(fill(mean(x₁), 100))


# A.4

# A.4.1

# A.4.2

# A.4.3


Θ = (maximum(sample_est) - minimum(sample_est)).*((1:1000)./1000) .+ minimum(sample_est)

H = zeros(500,1000)

for i = 1:500

x = sample_est[i,1:end]

for j = 1:1000

H[i,j] = log_norm(x, Θ[j], 5)

end

end


g₀ = fill(1/1000, 1000)

log_G0 = log.(zeros(500, 1000) .+ 1/1000)

γ₀ = exp.(H + log_G0)

for i = 1:500

γ₀[i,1:end] = γ₀[i,1:end]./sum(γ₀[i,1:end])

end

g₁ = []

for i = 1:1000

g₁ = [g₁; sum(γ₀[1:end,i])]

end

g₁ = g₁./sum(g₁)


log_G1 = log.(g₁)

for i = 2:500

log_G1 = hcat(log_G1, log.(g₁))

end

γ₁ = exp.(H + log_G1')

for i = 1:500

γ₁[i,1:end] = γ₁[i,1:end]./sum(γ₁[i,1:end])

end

g₂ = []

for i = 1:1000

g₂ = [g₂; sum(γ₁[1:end,i])]

end

g₂ = g₂./sum(g₂)


plot(Θ, g₀)

plot!(Θ, g₁)

plot!(Θ, g₂)


ϵ = 1e-3 # small number

g_old = g₁

g_new = g₂

diff = sum(abs.(g_old - g_new))

while diff > ϵ

g_old = g_new

log_G = log.(g_old)

for i = 2:500

log_G = hcat(log_G, log.(g_old))

end

γ = exp.(H + log_G')

for i = 1:500

γ[i,1:end] = γ[i,1:end]./sum(γ[i,1:end])

end

g_new = []

for i = 1:1000

g_new = [g_new; sum(γ[1:end,i])]

end

g_new = g_new./sum(g_new)

diff = sum(abs.(g_old - g_new))

println(diff)

end


μ_est = []

for i = 1:500

μ_est = [μ_est; mean(sample_est[i])]

end

J = 100000

g_dist = []

for i = 1:1000

if round(J*g_new[i]) > 0

g_dist = [g_dist; fill(Θ[i], Int(round(J*g_new[i])))]

end

end


plot(density(g_dist))

vline!(fill(-3, 100))

plot!(density!(μ_est))



# A.4.4


h_x1 = []

for i = 1:1000

h_x1 = [h_x1; log_norm(x₁, Θ[i], 5)]

end


γ_x1 = (exp.(h_x1) + g_new)./sum(exp.(h_x1) + g_new)


J = 100000

g_dist_x1 = []

for i = 1:1000

g_dist_x1 = [g_dist_x1; fill(Θ[i], Int(round(J*γ_x1[i])))]

end


plot(density(g_dist_x1))

vline!(fill(-3, 100))

vline!(fill(mean(x₁), 100))



# A.5

# A.5.1

# A.5.2


log_bin = function(N, p̂, p)

K = Int(round(p̂*N))

return log(binomial(big(N), big(K))) + K*log(p) + (N-K)*log(1 - p)

end


Θ = (1:999)./1000

log_g = fill(log(1/1000), 999)

h_jp = []

for i = 1:999

h_jp = [h_jp; log_bin(3, 1, Θ[i])]

end

γ_jp = exp.(h_jp + log_g)./sum(exp.(h_jp + log_g))


J = 100000

γ_dist_jp = []

for i = 1:999

γ_dist_jp = [γ_dist_jp; fill(i, Int(round(J*γ_jp[i])))]

end



# mean

γ_jp'*Θ


plot(density(γ_dist_jp))

vline!(fill(342,100))

xlabel!("batting average")



sum(γ_jp[342:999])


# A.5.3

M = 1000

path = "ENTER PATH HERE"

x = CSV.read(string(path,"/bat_ave.csv"))

# data set created from Lahman's data


H = zeros(M, 999)

for m = 1:M

for i = 1:999

H[m,i] = log_bin(x.AB[m], x.AVE[m], Θ[i])

end

#println(m)

end


log_G = fill(log(1/1000), M, 999)

γ₀ = exp.(H + log_G)

for m = 1:M

γ₀[m,1:end] = γ₀[m,1:end]./sum(γ₀[m,1:end])

end

g₁ = []

for i = 1:999

g₁ = [g₁; sum(γ₀[1:end,i])]

end

g₁ = g₁/sum(g₁)


ϵ = 0.001

g_old = exp.(log_g)

g_new = g₁

diff = sum(abs.(g_new - g_old))

while diff > ϵ

g_old = g_new

log_G = log.(g_old)

for i = 2:M

log_G = hcat(log_G, log.(g_old))

end

Γ = exp.(H + log_G')

for m = 1:M

Γ[m,1:end] = Γ[m,1:end]./sum(Γ[m, 1:end])

end

g_new = []

for i = 1:999

g_new = [g_new; sum(Γ[1:end,i])]

end

g_new = g_new./sum(g_new)

diff = sum(abs.(g_old - g_new))

println(diff)

end


# A.5.4

γ_jp2 = exp.(h_jp + log.(g_new))

γ_jp2 = γ_jp2./sum(γ_jp2)

J = 100000

g_dist_mlb = []

for i = 1:999

g_dist_mlb = [g_dist_mlb; fill(Θ[i], Int(round(J*g_new[i])))]

end

γ_dist_jp2 = []

for i = 1:999

γ_dist_jp2 = [γ_dist_jp2; fill(Θ[i], Int(round(J*γ_jp2[i])))]

end


# mean

γ_jp2'*Θ


sum(γ_jp2[342:999])


plot(density(g_dist_mlb), label="Prior")

plot!(density!(γ_dist_jp2), label="JPs Posterior")

vline!(fill(0.342, 100))

xlabel!("batting average")



# A.6

# A.6.1

# A.6.2

# A.6.3

# A.6.4


# A.7