# Chapter 2
#import Pkg
#Pkg.add("Plots")
#Pkg.add("Optim")
#Pkg.add("StatsKit")
#Pkg.add("GLM")
#Pkg.add("DataFrames")
using Random
using Statistics
using Plots
using Optim
using StatsKit
using CSV
using GLM
using DataFrames
using LinearAlgebra
# 2.1
# 2.2
# 2.2.1
# 2.2.2
# 2.2.3
# 2.2.4
rng = MersenneTwister(123456789)
N = 1000
a = fill(2, N)
b = 3
c = 4
υₓ = randn!(rng, zeros(N))
α = 0
x = x1 = (1 - α).*rand!(rng, zeros(N)) + α.*υₓ
w = w1 = (1 - α).*rand!(rng, zeros(N)) + α.*υₓ
υ = randn!(rng, zeros(N))
y = a + b.*x + c.*w + υ
data1 = DataFrame(y=y, x=x, w=w)
lm1 = lm(@formula(y ~ x), data1)
lm2 = lm(@formula(y ~ x + w), data1)
α = 0.5
x = x2 = (1 - α).*rand!(rng, zeros(N)) + α.*υₓ
w = w2 = (1 - α).*rand!(rng, zeros(N)) + α.*υₓ
υ = randn!(rng, zeros(N))
y = a + b.*x + c.*w + υ
data2 = DataFrame(y=y, x=x, w=w)
lm3 = lm(@formula(y ~ x), data2)
lm4 = lm(@formula(y ~ x + w), data2)
α = 0.95
x = x3 = (1 - α).*rand!(rng, zeros(N)) + α.*υₓ
w = w3 = (1 - α).*rand!(rng, zeros(N)) + α.*υₓ
υ = randn!(rng, zeros(N))
y = a + b.*x + c.*w + υ
data3 = DataFrame(y=y, x=x, w=w)
lm5 = lm(@formula(y ~ x), data3)
lm6 = lm(@formula(y ~ x + w), data3)
# 2.2.5
cov([x1 w1])
cov([x2 w2])
x2'w2
# 2.3
# 2.3.1
# 2.3.2
X2 = [ones(N) x3 w3]
inv(X2'X2)X2'υ
mean(υ)
cov([x3 υ])
cov([w3 υ])
X2'υ/N
1/det(X2'X2)
# 2.4
# 2.4.1
# 2.4.2
path = "ENTER PATH HERE"
x = CSV.read(string(path,"/nls.csv"))
lwage76_temp = Vector{Union{Missing, Float64}}(missing,length(x.lwage76))
wage76_temp = Vector{Union{Missing, Float64}}(missing,length(x.wage76))
for i in 1:length(lwage76_temp)
if length(x.lwage76[i]) > 1
lwage76_temp[i] = parse(Float64, x.lwage76[i])
wage76_temp[i] = parse(Float64, x.wage76[i])
end
end
x.lwage76 = lwage76_temp
x.wage76 = wage76_temp
x1 = DataFrame(x)
x1 = dropmissing(x1)
x1.exp1 = x1.age76 - x1.ed76 - fill(6,3010)
x1.exp2 = (x1.exp1.^2)./100
# 2.4.3
lm1 = lm(@formula(lwage76 ~ ed76), x1)
lm2 = lm(@formula(lwage76 ~ ed76 + exp1 + exp2), x1)
lm3 = lm(@formula(lwage76 ~ ed76 + exp1 + exp2 + black + reg76r), x1)
lm4 = lm(@formula(lwage76 ~ ed76 + exp1 + exp2 + black + reg76r +
smsa76r + smsa66r + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 +
reg668 + reg669), x1)
r2(lm1)
r2(lm2)
r2(lm3)
r2(lm4)
# 2.5
# 2.5.1
# 2.5.2
Random.seed!(123456789)
N = 50
a = fill(1, N)
b = 0
c = 3
d = 4
x = rand([0,1], N)
υ_w = rand!(rng, zeros(N))
w = d.*x + υ_w
υ = randn!(rng, zeros(N))
y = a + b.*x + c.*w + υ
data1 = DataFrame(y = y, x = x, w = w)
lm1 = lm(@formula(y ~ x), data1)
lm2 = lm(@formula(y ~ x + w), data1)
ê = coef(lm1)[2]
ĉ = coef(lm(@formula(y ~ w), data1))[2]
d̂ = coef(lm(@formula(w ~ x), data1))[2]
b̂ = ê - ĉ*d̂
# 2.5.3
Random.seed!(123456789)
b_mat = Array{Union{Missing, Float64}}(missing, 100, 3)
for i in 1:100
x = rand([0,1], N)
υ_w = rand(N)
w = d.*x + υ_w
υ = randn(N)
y = a + b.*x + c.*w + υ
data_temp = DataFrame(y = y, x = x, w = w)
lm2_temp = lm(@formula(y ~ x + w), data_temp)
b_mat[i,1] = coef(lm2_temp)[2]
b_mat[i,2] = coef(lm2_temp)[2]/stderror(lm2_temp)[2]
ê = coef(lm(@formula(y ~ x), data_temp))[2]
ĉ = coef(lm(@formula(y ~ w), data_temp))[2]
d̂ = coef(lm(@formula(w ~ x), data_temp))[2]
b_mat[i,3] = ê - ĉ*d̂
print(i)
end
b_mat
mean(b_mat[1:end,1])
std(b_mat[1:end,1])
minimum(b_mat[1:end,2])
maximum(b_mat[1:end,2])
mean(b_mat[1:end,3])
std(b_mat[1:end,3])
histogram(b_mat[1:end,1])
histogram!(b_mat[1:end,3])
# 2.5.5
X = [ones(N) x]
W = [ones(N) w]
β̃̂ = inv(X'X)X'y
Δ̂ = inv(X'X)X'W
γ̂ = inv(W'W)W'y
β̂ = β̃̂ - Δ̂*γ̂
# 2.6
# 2.6.1
path = "ENTER PATH HERE"
x = CSV.read(string(path,"/hmda_aer.csv"))
deny = Vector{Union{Missing,Float64}}(missing,2925)
black = Vector{Union{Missing,Float64}}(missing,2925)
for i in 1:2925
if x.s7[i] == 3
deny[i] = 1
elseif x.s7[i] == 1
deny[i] = 0
elseif x.s7[i] == 2
deny[i] = 0
end
if x.s13[i] == 3
black[i] = 1
else
black[i] = 0
end
end
deny
black
data1 = DataFrame(deny=deny, black=black)
lm1 = lm(@formula(deny ~ black), data1)
# 2.6.2
# 2.6.3
lwage = Vector{Union{Missing,Float64}}(missing,2925)
mhist = Vector{Union{Missing,Float64}}(missing,2925)
chist = Vector{Union{Missing,Float64}}(missing,2925)
phist = Vector{Union{Missing,Float64}}(missing,2925)
emp = Vector{Union{Missing,Float64}}(missing,2925)
index_na = []
for i in 1:2925
if x.s31a[i] > 0 && x.s31a[i] < 999999 && x.s25a[i] < 1000
lwage[i] = log(x.s31a[i])
emp[i] = x.s25a[i]
index_na = [index_na; i]
end
mhist[i] = x.s42[i]
chist[i] = x.s43[i]
phist[i] = x.s44[i]
end
lwage
mhist
chist
phist
emp
y = deny[index_na]
X = [ones(2925) black][index_na, 1:end]
W = [ones(2925) lwage chist mhist phist emp][index_na,1:end]
β̃̂ = inv(X'X)X'y
Δ̂ = inv(X'X)X'W
γ̂ = inv(W'W)W'y
β̂ = β̃̂ - Δ̂*γ̂
# 2.6.4
lwage = Vector{Union{Missing,Float64}}(missing,2925)
mhist = Vector{Union{Missing,Float64}}(missing,2925)
chist = Vector{Union{Missing,Float64}}(missing,2925)
phist = Vector{Union{Missing,Float64}}(missing,2925)
emp = Vector{Union{Missing,Float64}}(missing,2925)
married = Vector{Union{Missing,Float64}}(missing,2925)
dr = Vector{Union{Missing,Float64}}(missing,2925)
clines = Vector{Union{Missing,Float64}}(missing,2925)
male = Vector{Union{Missing,Float64}}(missing,2925)
suff = Vector{Union{Missing,Float64}}(missing,2925)
assets = Vector{Union{Missing,Float64}}(missing,2925)
s6 = Vector{Union{Missing,Float64}}(missing,2925)
s50 = Vector{Union{Missing,Float64}}(missing,2925)
s33 = Vector{Union{Missing,Float64}}(missing,2925)
lr = Vector{Union{Missing,Float64}}(missing,2925)
pr = Vector{Union{Missing,Float64}}(missing,2925)
coap = Vector{Union{Missing,Float64}}(missing,2925)
school = Vector{Union{Missing,Float64}}(missing,2925)
s57 = Vector{Union{Missing,Float64}}(missing,2925)
s48 = Vector{Union{Missing,Float64}}(missing,2925)
s39 = Vector{Union{Missing,Float64}}(missing,2925)
chval = Vector{Union{Missing,Float64}}(missing,2925)
s20 = Vector{Union{Missing,Float64}}(missing,2925)
lwage_coap = Vector{Union{Missing,Float64}}(missing,2925)
lwage_coap2 = Vector{Union{Missing,Float64}}(missing,2925)
male_coap = Vector{Union{Missing,Float64}}(missing,2925)
index_na = []
for i in 1:2925
if (x.s31a[i] > 0 && x.s31a[i] < 999999 && x.s25a[i] < 1000 && x.s45[i] < 999999 && x.s41[i] < 999999 &&
x.s11[i] < 999999 && x.s35[i] < 999999 && x.s6[i] < 999999 && x.s50[i] < 999999 &&
x.s33[i] < 999999 && x.school[i] < 999999 && x.s57[i] < 999999 && x.s48[i] < 999999 && x.s39[i] < 999999 &&
x.chval[i] < 999999 && x.s20[i] < 999999 && x.s31c[i] > 0 && x.s31c[i] < 999999 && !ismissing(x.s16[i]))
lwage[i] = log(x.s31a[i])
emp[i] = x.s25a[i]
mhist[i] = x.s42[i]
chist[i] = x.s43[i]
phist[i] = x.s44[i]
if x.s23a[i] == "M"
married[i] = 1
else
married[i] = 0
end
dr[i] = x.s45[i]
clines[i] = x.s41[i]
if x.s15[i] == 1
male[i] = 1
else
male[i] = 0
end
suff[i] = x.s11[i]
assets[i] = x.s35[i]
s6[i] = x.s6[i]
s50[i] = x.s50[i]
s33[i] = x.s33[i]
lr[i] = s6[i]/s50[i]
pr[i] = s33[i]/s50[i]
if x.s16[i] == 4
coap[i] = 1
else
coap[i] = 0
end
school[i] = x.school[i]
s57[i] = x.s57[i]
s48[i] = x.s48[i]
s39[i] = x.s39[i]
chval[i] = x.chval[i]
s20[i] = x.s20[i]
lwage_coap[i] = x.s31c[i]
if coap[i] == 1
lwage_coap2[i] = lwage_coap[i]
else
lwage_coap2[i] = 0
end
if x.s16[i] == 1
male_coap[i] = 1
else
male_coap[i] = 0
end
index_na = [index_na; i]
end
println(i)
end
y = deny[index_na]
X = [ones(2925) black][index_na, 1:end]
W1 = [ones(2925) lwage chist mhist phist emp emp.^2 married dr clines male suff assets lr pr coap]
W2 = [s20 x.s24a x.s27a s39 s48 x.s53 x.s55 x.s56 s57 chval school x.bd x.mi x.old x.vr x.uria x.netw]
W3 = [x.dnotown x.dprop lwage_coap2 lr.^2 pr.^2 clines.^2 x.rtdum]
W = [W1 W2 W3][index_na,1:end]
β̃̂ = inv(X'X)X'y
Δ̂ = inv(X'X)X'W
γ̂ = inv(W'W)W'y
β̂ = β̃̂ - Δ̂*γ̂
# 2.6.5
Random.seed!(123456789)
K = 1000
bs_mat = Array{Union{Missing, Float64}}(missing, K, 2)
for k in 1:K
index_k = rand(1:length(y), length(y))
y1 = y[index_k]
X1 = X[index_k,1:end]
W1 = W[index_k,1:end]
β̃̂ = inv(X1'X1)X1'y1
Δ̂ = inv(X1'X1)X1'W1
γ̂ = inv(W1'W1)W1'y1
β̂ = β̃̂ - Δ̂*γ̂
bs_mat[k,1:end] = β̂
#println(k)
end
bs_mat
tab_res = Array{Union{Missing,Float64,String}}(missing,3,5)
tab_res[1,2:end] = ["Mean" "SD" "2.5%" "97.5%"]
tab_res[2:3,1] = ["intercept" "direct effect"]
for i in 1:2
tab_res[i+1,2] = mean(bs_mat[1:end,i])
tab_res[i+1,3] = std(bs_mat[1:end,i])
tab_res[i+1,4:5] = quantile(bs_mat[1:end,i],[0.025,0.975])
end
tab_res