chapter5.jl

# Chapter 5


#import Pkg

#Pkg.add("Plots")

#Pkg.add("Optim")

#Pkg.add("StatsKit")

#Pkg.add("GLM")

#Pkg.add("DataFrames")

#Pkg.add("StatsPlots")

#Pkg.add("StatFiles")

#Pkg.add("NLSolversBase")

#Pkg.add("Distributions")

using Random

using Statistics

using Plots

using Optim

using NLSolversBase

using StatsKit

using CSV

using GLM

using DataFrames

using LinearAlgebra

using StatsPlots

using StatFiles

using Distributions


# 5.1


# 5.2

# 5.2.1

# 5.2.2


rng = MersenneTwister(123457689)

N = 1000

υ = randn!(rng, zeros(N)).*3 .+ 1


p = 2

mean(υ .- p .> 0)

1 - cdf(Normal(1, 3), p)


# 5.2.3

p = rand!(rng, zeros(9)).*20 .- 10

s = Vector{Union{Missing, Float64}}(missing, 9)

for i = 1:9

s[i] = mean(υ .- p[i] .> 0)

end


a = ecdf(υ)

scatter(p, 1 .- s)

scatter!(ecdf(υ))

# note the R code has survival function rather than the cdf.


# 5.3.1

# 5.3.2

N = 1000

a = fill(2, N)

b = -3

υ = randn!(rng, zeros(N))

x = rand!(rng, zeros(N))

y_star = a + b.*x + υ

y = y_star .> 0

data1 = DataFrame(y=y, x=x)

lm1 = lm(@formula(y ~ x), data1)


scatter(x, y)

scatter!(x, coef(lm1)[1] .+ coef(lm1)[2].*x)

scatter!(x, a + b.*x)


# 5.3.3


# 5.4

# 5.4.1

N = 100

p = 0.367

Head = rand!(rng, zeros(N)) .< p

mean(Head)


factorial(5)/(factorial(3)*factorial(2))


factorial(big(100))/(factorial(big(34))*factorial(big(66)))*(0.5.^100)


# 5.4.2

binom = function(p, N, p̂)

return (p̂*N*log(p) + (1 - p̂)*N*log(1 - p))

end


binom_wrap = function(p)

return -binom(p, N, mean(Head))

end



a = optimize(binom_wrap, 0, 1)

a.minimum

a.minimizer


p = 1:1000

log_lik = binom.(p./1000, N, mean(Head))


scatter(p./1000, log_lik)

vline!([mean(Head), 0.367])


# 5.4.3

# 5.4.4

# note that the codes is quite a bit different from the R code.


# standard normal density

ϕ = function(z)

(exp.(-z.^2))./√π

end


# log standard normal density

logϕ = function(z)

-z.^2 - √π

end


ols_ml = function(par, y, X)

σ = exp(par[1])

β = par[2:3]

z = (y - X*β)./σ

return -sum(logϕ.(z) .- log(σ))

end


ols_ml_wrap = function(par)

return ols_ml(par, y, X)

end


global y = y_star

global X = hcat(ones(length(y)), data1.x)

init = [0.0; 2.0; -3.0]

a = optimize(ols_ml_wrap, init)

exp(a.minimizer[1])

a.minimizer[2:3]


# 5.4.5


# 5.4.6


Φ = function(v)

cdf(Normal(), v)

end


logΦ = function(v)

logcdf(Normal(), v)

end


probit = function(β, y, X)

return (1 .- y).*logΦ.(-X*β) + y.*logΦ.(X*β)

end


probit_wrap = function(β)

return -sum(probit(β, y, X))

end


global y = data1.y

global X = hcat(ones(length(y)), data1.x)


a = optimize(probit_wrap, [2.0, -3.0])

a.minimizer


# 5.4.7


glm1 = glm(@formula(y ~ x), data1, Binomial(), ProbitLink())



# 5.5.

# 5.5.1

# 5.5.2

# 5.5.3


Random.seed!(123456789)

N = 5000

X_A = hcat(ones(N), rand(rng, N), rand(rng, N))

X_B = hcat(ones(N), rand(rng, N), rand(rng, N))

β = [1; -2; 3]


# Probit

υ_A = randn(rng, N)

y = (X_A .- X_B)*β + υ_A .> 0

data1 = DataFrame(y = y, x₁=X_A[1:end,2] .- X_B[1:end,2], x₂=X_A[1:end,3] .- X_B[1:end,3])

glm1 = glm(@formula(y ~ x₁ + x₂), data1, Binomial(), ProbitLink())


# Logit

υ_A = rand.(Gumbel(), N)

υ_B = rand.(Gumbel(), N)

y = (X_A .- X_B)*β .+ υ_A .- υ_B .> 0

data2 = DataFrame(y = y, x₁=X_A[1:end,2] .- X_B[1:end,2], x₂=X_A[1:end,3] .- X_B[1:end,3])

glm2 = glm(@formula(y ~ x₁ + x₂), data2, Binomial(), LogitLink())


# 5.6

# 5.6.1

# 5.6.2

# 5.6.3


biprobit = function(β, ρ, y, W₁, W₂, K)

rng = MersenneTwister(123457689)

ϵ = 1e-10

N = size(y)[1]

υ = randn!(rng, zeros(K))

p_00 = zeros(N)

p_10 = zeros(N)

υ_k0 = zeros(N)

for k = 1:K

υ_k0 = υ[k] .< -W₁*β

p_00 = p_00 .+ υ_k0.*Φ.((-W₂*β .- ρ*υ[k])./√(1 - ρ^2))

p_10 = p_10 .+ (1 .- υ_k0).*Φ.(((W₁ - W₂)*β .+ (1 - ρ)*υ[k])./√(1 - ρ^2))

end

loglik = (y[1:end,1] .== 0).*(y[1:end,2] .== 0).*log.(p_00./K .+ ϵ) + (y[1:end,1] .== 1).*(y[1:end,2] .== 0).*log.(p_10./K .+ ϵ) + (y[1:end,1] .== 0).*(y[1:end,2] .== 1).*log.(1 .- p_00./K .- p_10./K .+ ϵ)

end


biprobit_wrap = function(par)

ρ = exp(par[1])/(1 + exp(par[1]))

β = par[2:end]

-sum(biprobit(β, ρ, y, W₁, W₂, K))

end


# 5.6.4

# 5.6.5


f_logit = function(β, y, W₁, W₂)

ϵ = 1e-20

p_10 = exp.(W₁*β)./(1 .+ exp.(W₁*β) .+ exp.(W₂*β))

p_01 = exp.(W₂*β)./(1 .+ exp.(W₁*β) .+ exp.(W₂*β))

p_00 = 1 .- p_10 .- p_01

loglik = (y[1:end,1] .== 0).*(y[1:end,2] .== 0).*log.(p_00 .+ ϵ) + (y[1:end,1] .== 1).*(y[1:end,2] .== 0).*log.(p_10 .+ ϵ) + (y[1:end,1] .== 0).*(y[1:end,2] .== 1).*log.(p_01 .+ ϵ)

end


f_logit_wrap = function(β)

-sum(f_logit(β, y, W₁, W₂))

end


# 5.6.6

Random.seed!(123456789)

N = 1000

μ = [0; 0]

ρ = 0.1

Σ = hcat([1; ρ], [ρ; 1])

Υ = rand(MvNormal(μ, Σ), N)'

X₁ = hcat(rand!(rng, zeros(N)), rand!(rng, zeros(N)))

X₂ = hcat(rand!(rng, zeros(N)), rand!(rng, zeros(N)))

a = fill(-1, N, 2)

b = -3

c = 4

U = a + b.*X₁ + c.*X₂ + Υ

global y = hcat((U[1:end,1] .> 0).*(U[1:end,1] .> U[1:end,2]), (U[1:end,2] .> 0).*(U[1:end,2] .> U[1:end,1]))

init = [log(0.1); -1; -3; 4]

global W₁ = hcat(ones(N), X₁[1:end,1], X₂[1:end,1])

global W₂ = hcat(ones(N), X₁[1:end,2], X₂[1:end,2])

global K = 100


a₁ = optimize(biprobit_wrap, init)

a₁.minimizer[2:end]

exp(a₁.minimizer[1])/(1 + exp(a₁.minimizer[1]))


b₁ = optimize(f_logit_wrap, init[2:end])

b₁.minimizer


# 5.7

# 5.7.1

path = "ENTER PATH HERE"

x = DataFrame(CSV.read(string(path,"/hhpub.csv")))

choice = Vector{Union{Missing, String}}(missing, 129696)

home = Vector{Union{Missing, Float64}}(missing, 129696)

income = Vector{Union{Missing, Float64}}(missing, 129696)

density = Vector{Union{Missing, Float64}}(missing, 129696)

index = []

for i = 1:129696

if x.CAR[i] == 1

choice[i] = "car"

end

if x.BUS[i] == 1

choice[i] = "bus"

end

if x.TRAIN[i] == 1

choice[i] = "train"

end

if x.HOMEOWN[i] == 1

home[i] = 1

elseif x.HOMEOWN[i] > 1

home[i] = 0

end

if x.HHFAMINC[i] > 0

income[i] = x.HHFAMINC[i]

end

if x.HTPPOPDN[i] > 0

density[i] = x.HTPPOPDN[i]/1000

end

if x.WRKCOUNT[i] > 0 && (x.MSACAT[i] == 1 || x.MSACAT[i] == 2)

index = [index; i]

end

#println(i)

end

car1 = choice .== "car"

train1 = choice .== "train"

urban1 = x.URBAN .== 1

rail = x.RAIL .== 1


x1 = DataFrame(car1=car1[index], train1=train1[index], home=home[index], HHSIZE=x.HHSIZE[index], income=income[index], urban1=urban1[index], density=density[index], MSACAT=x.MSACAT[index], rail=rail[index])

x1 = dropmissing(x1)

# Note in the R code this is y, which leads to confusion below.


vars = ["car1", "train1", "home", "HHSIZE", "income", "urban1", "density"]

summ_tab = Array{Union{Missing,Float64,String}}(missing,length(vars)+1,3)

summ_tab[1,2:end] = ["Rail" "No Rail"]

summ_tab[2:end,1] = vars

for i in 1:length(vars)

summ_tab[i+1,2] = mean(x1[1:end,i][x1.rail .== 1])

summ_tab[i+1,3] = mean(x1[1:end,i][x1.rail .== 0])

end

summ_tab


# 5.7.2


# without rail

x1_nr = x1[x1.rail .== 0,1:end]

glm_nr = glm(@formula(car1 ~ home + HHSIZE + income + urban1 + density), x1_nr, Binomial())


# with rail

x1_r = x1[x1.rail .== 1,1:end]

glm_r = glm(@formula(car1 ~ home + HHSIZE + income + urban1 + density), x1_r, Binomial())


# 5.7.3

Nᵣ = size(x1_r)[1]

global W₁ = hcat(ones(Nᵣ), x1_r.home, x1_r.HHSIZE, x1_r.income, x1_r.urban1, zeros(Nᵣ))

global W₂ = hcat(ones(Nᵣ), zeros(Nᵣ), zeros(Nᵣ), zeros(Nᵣ), zeros(Nᵣ), x1_r.density)

global y = hcat(x1_r.car1,x1_r.train1)

global K = 200


init = coef(glm_r)

a₂ = optimize(f_logit_wrap, init)

a₂.minimizer


init = [0; a₂.minimizer]

a₁ = optimize(biprobit_wrap, init, show_trace=true, iterations=5000)

a₁.minimizer[2:end]

exp(a₁.minimizer[1])/(1 + exp(a₁.minimizer[1]))





# 5.7.4

N_nr = size(x1_nr)[1]

W₁ = hcat(ones(N_nr), x1_nr.home, x1_nr.HHSIZE, x1_nr.income, x1_nr.urban1, zeros(N_nr))

W₂ = hcat(ones(N_nr), zeros(N_nr), zeros(N_nr), zeros(N_nr), zeros(N_nr), x1_nr.density)


# Probit Estimate

rng = MersenneTwister(123457689)

ρ̂ = exp(a₁.minimizer[1])/(1 + exp(a₁.minimizer[1]))

β̂ = a₁.minimizer[2:end]

K = 200

υ = randn!(rng, zeros(K))

p_00 = zeros(N_nr)

p_10 = zeros(N_nr)

for k = 1:K

υ_k0 = υ[k] .< -W₁*β̂

p_00 = p_00 .+ υ_k0.*Φ.((-W₂*β̂ .- ρ̂*υ[k])./√(1 - ρ̂^2))

p_10 = p_10 .+ (1 .- υ_k0).*Φ.(((W₁ - W₂)*β̂ .+ (1 - ρ̂)*υ[k])./√(1 - ρ̂^2))

end

p_train_pr = (K .- p_00 .- p_10)./K

mean(p_train_pr)


# Logit Estimate

β̂ = a₂.minimizer

p_train_lg = exp.(W₂*β̂)./(1 .+ exp.(W₁*β̂) + exp.(W₂*β̂))

mean(p_train_lg)


# 5.8