chapter3.jl
# Chapter 3
#import Pkg
#Pkg.add("Plots")
#Pkg.add("Optim")
#Pkg.add("StatsKit")
#Pkg.add("GLM")
#Pkg.add("DataFrames")
using Random
using Statistics
using Plots
using Optim
using StatsKit
using CSV
using GLM
using DataFrames
using LinearAlgebra
# 3.1
# 3.2
# 3.2.1
# 3.2.2
# 3.2.3
rng = MersenneTwister(123456789)
N = 1000
a = fill(2, N)
b = 3
c = 2
e = 3
f = fill(-1, N)
d = 4
z = rand!(rng, zeros(N))
u_1 = randn!(rng, zeros(N))*3
u_2 = randn!(rng, zeros(N))
x = f + d.*z + u_2 + c.*u_1
y = a + b.*x + e.*u_1
data1 = DataFrame(y = y, x = x, z = z)
lm1 = lm(@formula(y ~ x), data1)
# 3.3
# 3.3.1
# 3.3.2
# 3.3.3
# 3.3.4
bd_hat = coef(lm(@formula(y ~ z), data1))[2]
d̂ = coef(lm(@formula(x ~ z), data1))[2]
b̂ = bd_hat/d̂
# 3.3.5
# 3.3.6
# 3.3.7
X = [ones(N) x]
Z = [ones(N) z]
β̂_ols = inv(X'X)X'y
β̂_iv = inv(Z'X)Z'y
lm_iv = function (y, X; Z=X, Reps=100, min1=0.05, max1=0.95)
# Set up
Random.seed!(123456789)
X = [ones(length(y)) X]
Z = [ones(length(y)) Z]
# Bootstrap
bs_mat = Array{Union{Missing, Float64}}(missing, Reps, size(X)[2])
for r in 1:Reps
index_r = rand(1:length(y), length(y))
y_bs = y[index_r]
X_bs = X[index_r,1:end]
Z_bs = Z[index_r,1:end]
β̂ = inv(Z_bs'X_bs)Z_bs'y_bs
bs_mat[r,1:end] = β̂
# println(r)
end
# Present Results
tab_res = Array{Union{Missing,Float64,String}}(missing,1+size(X)[2],5)
tab_res[1,2:end] = ["Mean" "SD" string(min1) string(max1)]
tab_res[2,1] = "intercept"
for i in 1:size(X)[2]
tab_res[i+1,2] = mean(bs_mat[1:end,i])
tab_res[i+1,3] = std(bs_mat[1:end,i])
tab_res[i+1,4:5] = quantile(bs_mat[1:end,i],[min1,max1])
end
return tab_res
end
lm_iv(y, x)
lm_iv(y, x, Z=z)
# 3.4
# 3.4.1
# 3.4.2
path = "ENTER PATH HERE"
x = CSV.read(string(path,"/nls.csv"))
lwage76_temp = Vector{Union{Missing, Float64}}(missing,length(x.lwage76))
wage76_temp = Vector{Union{Missing, Float64}}(missing,length(x.wage76))
for i in 1:length(lwage76_temp)
if length(x.lwage76[i]) > 1
lwage76_temp[i] = parse(Float64, x.lwage76[i])
wage76_temp[i] = parse(Float64, x.wage76[i])
end
end
x.lwage76 = lwage76_temp
x.wage76 = wage76_temp
x1 = DataFrame(x)
x1 = dropmissing(x1)
x1.exp1 = x1.age76 - x1.ed76 - fill(6,3010)
x1.exp2 = (x1.exp1.^2)./100
# 3.4.3
# OLS
lm4 = lm(@formula(lwage76 ~ ed76 + exp1 + exp2 + black + reg76r + smsa76r + smsa66r +
reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669), x1)
coef(lm4)[2]
# Intent-to-treat
lm5 = lm(@formula(lwage76 ~ nearc4 + exp1 + exp2 + black + reg76r + smsa76r + smsa66r +
reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669), x1)
coef(lm5)[2]
# Effect of instrument
lm6 = lm(@formula(ed76 ~ nearc4 + exp1 + exp2 + black + reg76r + smsa76r + smsa66r +
reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669), x1)
coef(lm6)[2]
coef(lm5)[2]/coef(lm6)[2]
# 3.4.4
y = x1.lwage76
X = [x1.ed76 x1.exp1 x1.exp2 x1.black x1.reg76r x1.smsa76r x1.smsa66r x1.reg662 x1.reg663]
X = [X x1.reg664 x1.reg665 x1.reg666 x1.reg667 x1.reg668 x1.reg669]
Z1 = [x1.nearc4 x1.age76 x1.age76.^2 x1.black x1.reg76r x1.smsa76r x1.smsa66r x1.reg662 x1.reg663]
Z1 = [Z1 x1.reg664 x1.reg665 x1.reg666 x1.reg667 x1.reg668 x1.reg669]
res = lm_iv(y, X, Z=Z1)
res[2:9,1] = ["intercept" "ed76" "exp" "exp2" "black" "reg76r" "smsa76r" "smsa66r"]
res[10:17,1] = ["reg662" "reg663" "reg664" "reg665" "reg666" "reg667" "reg668" "reg669"]
res
# 3.4.5
tab_cols = ["Near College" "Not Near College"]
tab_rows = ["ed76" "exp1" "black" "south66" "smsa66r" "reg76r" "smsa76r"]
table_dist = Array{Union{Missing,Float64,String}}(missing,8,3)
tab_index = indexin(tab_rows, names(x1))
index_near = []
index_notnear = []
table_dist[1,2:3] = tab_cols
table_dist[2:8,1] = tab_rows
for i = 1:length(x1.lwage76)
if x1.nearc4[i] == 1
index_near = [index_near; i]
elseif x1.nearc4[i] == 0
index_notnear = [index_notnear; i]
end
end
for i = 1:7
table_dist[i+1,2] = mean(x1[index_near,tab_index[i]])
table_dist[i+1,3] = mean(x1[index_notnear,tab_index[i]])
end
table_dist
# 3.5
# 3.5.1
# 3.5.2
Z2 = [x1.momdad14 x1.age76 x1.age76.^2 x1.black x1.reg76r x1.smsa76r x1.smsa66r x1.reg662 x1.reg663]
Z2 = [Z2 x1.reg664 x1.reg665 x1.reg666 x1.reg667 x1.reg668 x1.reg669]
# Bootstrap
Random.seed!(123456789)
bs_diff = Vector{Union{Missing, Float64}}(missing, 1000)
for i in 1:1000
index_bs = rand(1:length(y), length(y))
y_bs = y[index_bs]
X_bs = X[index_bs,1:end]
Z1_bs = Z1[index_bs,1:end]
Z2_bs = Z2[index_bs,1:end]
β̂₁ = inv(Z1_bs'X_bs)Z1_bs'y_bs
β̂₂ = inv(Z2_bs'X_bs)Z2_bs'y_bs
bs_diff[i,1] = β̂₁[2] - β̂₂[2]
# println(i)
end
mean(bs_diff)
std(bs_diff)
quantile(bs_diff,[0.05,0.95])
# 3.6
# 3.6.1
# 3.6.2
# 3.6.3
X₂ = Vector{Union{Missing, Float64}}(missing, length(y))
for i = 1:length(y)
if x1.ed76[i] > 12
X₂[i] = 1
else
X₂[i] = 0
end
end
μ₁ = mean(y[index_near])
μ₀ = mean(y[index_notnear])
p₁ = mean(X₂[index_near])
p₀ = mean(X₂[index_notnear])
β_LATE = ((μ₁ - μ₀)/(p₁ - p₀))/4
index_md = []
index_notmd = []
for i = 1:length(x1.lwage76)
if x1.momdad14[i] == 1
index_md = [index_md; i]
elseif x1.momdad14[i] == 0
index_notmd = [index_notmd; i]
end
end
μ₁ = mean(y[index_md])
μ₀ = mean(y[index_notmd])
p₁ = mean(X₂[index_md])
p₀ = mean(X₂[index_notmd])
β_LATE = ((μ₁ - μ₀)/(p₁ - p₀))/4
# 3.7