chapter3.jl

# Chapter 3


#import Pkg

#Pkg.add("Plots")

#Pkg.add("Optim")

#Pkg.add("StatsKit")

#Pkg.add("GLM")

#Pkg.add("DataFrames")

using Random

using Statistics

using Plots

using Optim

using StatsKit

using CSV

using GLM

using DataFrames

using LinearAlgebra


# 3.1


# 3.2

# 3.2.1

# 3.2.2

# 3.2.3


rng = MersenneTwister(123456789)

N = 1000

a = fill(2, N)

b = 3

c = 2

e = 3

f = fill(-1, N)

d = 4

z = rand!(rng, zeros(N))

u_1 = randn!(rng, zeros(N))*3

u_2 = randn!(rng, zeros(N))

x = f + d.*z + u_2 + c.*u_1

y = a + b.*x + e.*u_1


data1 = DataFrame(y = y, x = x, z = z)

lm1 = lm(@formula(y ~ x), data1)


# 3.3


# 3.3.1

# 3.3.2

# 3.3.3

# 3.3.4


bd_hat = coef(lm(@formula(y ~ z), data1))[2]

d̂ = coef(lm(@formula(x ~ z), data1))[2]

b̂ = bd_hat/d̂


# 3.3.5

# 3.3.6

# 3.3.7


X = [ones(N) x]

Z = [ones(N) z]

β̂_ols = inv(X'X)X'y

β̂_iv = inv(Z'X)Z'y



lm_iv = function (y, X; Z=X, Reps=100, min1=0.05, max1=0.95)

# Set up

Random.seed!(123456789)

X = [ones(length(y)) X]

Z = [ones(length(y)) Z]

# Bootstrap

bs_mat = Array{Union{Missing, Float64}}(missing, Reps, size(X)[2])

for r in 1:Reps

index_r = rand(1:length(y), length(y))

y_bs = y[index_r]

X_bs = X[index_r,1:end]

Z_bs = Z[index_r,1:end]

β̂ = inv(Z_bs'X_bs)Z_bs'y_bs

bs_mat[r,1:end] = β̂

# println(r)

end

# Present Results

tab_res = Array{Union{Missing,Float64,String}}(missing,1+size(X)[2],5)

tab_res[1,2:end] = ["Mean" "SD" string(min1) string(max1)]

tab_res[2,1] = "intercept"

for i in 1:size(X)[2]

tab_res[i+1,2] = mean(bs_mat[1:end,i])

tab_res[i+1,3] = std(bs_mat[1:end,i])

tab_res[i+1,4:5] = quantile(bs_mat[1:end,i],[min1,max1])

end


return tab_res


end


lm_iv(y, x)


lm_iv(y, x, Z=z)



# 3.4


# 3.4.1

# 3.4.2


path = "ENTER PATH HERE"

x = CSV.read(string(path,"/nls.csv"))

lwage76_temp = Vector{Union{Missing, Float64}}(missing,length(x.lwage76))

wage76_temp = Vector{Union{Missing, Float64}}(missing,length(x.wage76))

for i in 1:length(lwage76_temp)

if length(x.lwage76[i]) > 1

lwage76_temp[i] = parse(Float64, x.lwage76[i])

wage76_temp[i] = parse(Float64, x.wage76[i])

end

end

x.lwage76 = lwage76_temp

x.wage76 = wage76_temp


x1 = DataFrame(x)

x1 = dropmissing(x1)


x1.exp1 = x1.age76 - x1.ed76 - fill(6,3010)

x1.exp2 = (x1.exp1.^2)./100


# 3.4.3

# OLS

lm4 = lm(@formula(lwage76 ~ ed76 + exp1 + exp2 + black + reg76r + smsa76r + smsa66r +

reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669), x1)

coef(lm4)[2]


# Intent-to-treat

lm5 = lm(@formula(lwage76 ~ nearc4 + exp1 + exp2 + black + reg76r + smsa76r + smsa66r +

reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669), x1)

coef(lm5)[2]


# Effect of instrument

lm6 = lm(@formula(ed76 ~ nearc4 + exp1 + exp2 + black + reg76r + smsa76r + smsa66r +

reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + reg669), x1)

coef(lm6)[2]


coef(lm5)[2]/coef(lm6)[2]


# 3.4.4

y = x1.lwage76

X = [x1.ed76 x1.exp1 x1.exp2 x1.black x1.reg76r x1.smsa76r x1.smsa66r x1.reg662 x1.reg663]

X = [X x1.reg664 x1.reg665 x1.reg666 x1.reg667 x1.reg668 x1.reg669]

Z1 = [x1.nearc4 x1.age76 x1.age76.^2 x1.black x1.reg76r x1.smsa76r x1.smsa66r x1.reg662 x1.reg663]

Z1 = [Z1 x1.reg664 x1.reg665 x1.reg666 x1.reg667 x1.reg668 x1.reg669]


res = lm_iv(y, X, Z=Z1)

res[2:9,1] = ["intercept" "ed76" "exp" "exp2" "black" "reg76r" "smsa76r" "smsa66r"]

res[10:17,1] = ["reg662" "reg663" "reg664" "reg665" "reg666" "reg667" "reg668" "reg669"]


res


# 3.4.5

tab_cols = ["Near College" "Not Near College"]

tab_rows = ["ed76" "exp1" "black" "south66" "smsa66r" "reg76r" "smsa76r"]


table_dist = Array{Union{Missing,Float64,String}}(missing,8,3)

tab_index = indexin(tab_rows, names(x1))

index_near = []

index_notnear = []

table_dist[1,2:3] = tab_cols

table_dist[2:8,1] = tab_rows

for i = 1:length(x1.lwage76)

if x1.nearc4[i] == 1

index_near = [index_near; i]

elseif x1.nearc4[i] == 0

index_notnear = [index_notnear; i]

end

end

for i = 1:7

table_dist[i+1,2] = mean(x1[index_near,tab_index[i]])

table_dist[i+1,3] = mean(x1[index_notnear,tab_index[i]])

end

table_dist


# 3.5


# 3.5.1

# 3.5.2


Z2 = [x1.momdad14 x1.age76 x1.age76.^2 x1.black x1.reg76r x1.smsa76r x1.smsa66r x1.reg662 x1.reg663]

Z2 = [Z2 x1.reg664 x1.reg665 x1.reg666 x1.reg667 x1.reg668 x1.reg669]


# Bootstrap

Random.seed!(123456789)

bs_diff = Vector{Union{Missing, Float64}}(missing, 1000)

for i in 1:1000

index_bs = rand(1:length(y), length(y))

y_bs = y[index_bs]

X_bs = X[index_bs,1:end]

Z1_bs = Z1[index_bs,1:end]

Z2_bs = Z2[index_bs,1:end]

β̂₁ = inv(Z1_bs'X_bs)Z1_bs'y_bs

β̂₂ = inv(Z2_bs'X_bs)Z2_bs'y_bs

bs_diff[i,1] = β̂₁[2] - β̂₂[2]

# println(i)

end

mean(bs_diff)

std(bs_diff)

quantile(bs_diff,[0.05,0.95])



# 3.6


# 3.6.1

# 3.6.2

# 3.6.3


X₂ = Vector{Union{Missing, Float64}}(missing, length(y))

for i = 1:length(y)

if x1.ed76[i] > 12

X₂[i] = 1

else

X₂[i] = 0

end

end

μ₁ = mean(y[index_near])

μ₀ = mean(y[index_notnear])

p₁ = mean(X₂[index_near])

p₀ = mean(X₂[index_notnear])


β_LATE = ((μ₁ - μ₀)/(p₁ - p₀))/4


index_md = []

index_notmd = []

for i = 1:length(x1.lwage76)

if x1.momdad14[i] == 1

index_md = [index_md; i]

elseif x1.momdad14[i] == 0

index_notmd = [index_notmd; i]

end

end


μ₁ = mean(y[index_md])

μ₀ = mean(y[index_notmd])

p₁ = mean(X₂[index_md])

p₀ = mean(X₂[index_notmd])


β_LATE = ((μ₁ - μ₀)/(p₁ - p₀))/4



# 3.7