chapter7.jl

# Chapter 7


#import Pkg

#Pkg.add("Plots")

#Pkg.add("Optim")

#Pkg.add("StatsKit")

#Pkg.add("GLM")

#Pkg.add("DataFrames")

#Pkg.add("StatsPlots")

#Pkg.add("StatFiles")

#Pkg.add("NLSolversBase")

#Pkg.add("Distributions")

using Random

using Statistics

using Plots

using Optim

using NLSolversBase

using StatsKit

using CSV

using GLM

using DataFrames

using LinearAlgebra

using StatsPlots

using StatFiles

using Distributions



# 7.1


# 7.2

# 7.2.1

# 7.2.2

# 7.2.3


# 7.3

# 7.3.1


rng = MersenneTwister(123456789)

N = 1000 # number of markets

β = 0.25 # demand parameter

v = 3 .+ randn!(rng, zeros(N)) # quality (vertical) measure

c_L = 3 .+ randn!(rng, zeros(N))

c_R = 9 .+ randn!(rng, zeros(N))

# costs for both firms

p_L = (v .+ 1 .+ β.*c_R + 2*β.*c_L)./(3*β)

p_R = (-v .- 1 .+ β.*c_L + 2*β.*c_R)./(3*β)

# price function for each firm

x_L = (v .+ 1 .- β.*p_L .+ β.*p_R)./2


index = (p_L .> 0).*(p_R .> 0).*(x_L .> 0).*(x_L .< 1)

c_L1 = c_L[index]

c_R1 = c_R[index]

p_L1 = p_L[index]

p_R1 = p_R[index]

x_L1 = x_L[index]

v1 = v[index]

# adjusting values to make things nice


scatter(x_L1, p_L1 - p_R1)


# 7.3.3


# Intent-to-treat

data1 = DataFrame(x = x_L1, p = p_L1 - p_R1, c = c_L1 - c_R1)

lm1 = lm(@formula(x ~ c), data1)

# First stage

lm2 = lm(@formula(p ~ c), data1)

# IV

coef(lm1)[2]/coef(lm2)[2]


lm_iv = function (y, X; Z=X, Reps=100, min1=0.05, max1=0.95)

# Set up

Random.seed!(123456789)

X = [ones(length(y)) X]

Z = [ones(length(y)) Z]

# Bootstrap

bs_mat = Array{Union{Missing, Float64}}(missing, Reps, size(X)[2])

for r = 1:Reps

index_r = rand(1:length(y), length(y))

y_bs = y[index_r]

X_bs = X[index_r,1:end]

Z_bs = Z[index_r,1:end]

bs_mat[r,1:end] = inv(Z_bs'X_bs)*Z_bs'y_bs

#println(r)

end

# Present Results

tab_res = Array{Union{Missing,Float64,String}}(missing,1+size(X)[2],5)

tab_res[1,2:end] = ["Mean" "SD" string(min1) string(max1)]

tab_res[2,1] = "intercept"

for i in 1:size(X)[2]

tab_res[i+1,2] = mean(bs_mat[1:end,i])

tab_res[i+1,3] = std(bs_mat[1:end,i])

tab_res[i+1,4:5] = quantile(bs_mat[1:end,i],[min1,max1])

end


return tab_res


end


y = x_L1

X = hcat(p_L1, p_R1)

Z = hcat(c_L1, c_R1)


tab_ols = lm_iv(y, X)

tab_iv = lm_iv(y, X, Z=Z)


# 7.3.4

lm1 = lm(@formula(p ~ v), DataFrame(p=p_L1, v=v1))

b̂ = coef(lm1)[2]

β̂ = 1/(3*b̂)

-β̂/2



# 7.4

# 7.4.1

# 7.4.2

# 7.4.3

# 7.4.4

# 7.4.5

# 7.4.6


# 7.5

# 7.5.1

path = "/Volumes/My WD/Book"

x = DataFrame(CSV.read(string(path,"/dominicks.csv")))

p = Vector{Union{Missing, Float64}}(missing,88160)

sig = Vector{Union{Missing, Float64}}(missing, 88160)

oz = Vector{Union{Missing, Float64}}(missing,88160)

age9 = Vector{Union{Missing, Float64}}(missing, 88160)

hhlarge = Vector{Union{Missing, Float64}}(missing, 88160)

income = Vector{Union{Missing, Float64}}(missing, 88160)

share = Vector{Union{Missing, Float64}}(missing, 88160)

for i = 1:88160

if x.ozprice[i] == "NA" || x.oz[i] == "NA" || x.sig[i] == "NA" || x.age9[i] == "NA" || x.hhlarge[i] == "NA"

p[i] = missing

else

p[i] = parse(Float64, x.ozprice[i])

oz[i] = parse(Float64, x.oz[i])/100

sig[i] = parse(Float64, x.sig[i])

age9[i] = parse(Float64, x.age9[i])

hhlarge[i] = parse(Float64, x.hhlarge[i])

income[i] = parse(Float64, x.income[i])

share[i] = parse(Float64, x.share[i])

end

#println(i)

end

fat = x.fat./100

sodium = x.sodium./1000

carbo = x.carbo./100


x1 = DataFrame(sig=sig, income=income, p=p, fat=fat, carbo=carbo, sodium=sodium, fiber=x.fiber, oz=oz, quaker=x.quaker, post=x.post, kellogg=x.kellogg, age9=age9, hhlarge=hhlarge, store=x.store, WEEK=x.WEEK, UPC=x.UPC, share=share)

x1 = dropmissing!(x1)

# note that this is different from the R code.


W = hcat(x1.sig, x1.fat, x1.sodium, x1.fiber, x1.carbo, x1.oz, x1.quaker, x1.post, x1.kellogg)


# 7.5.2

p = x1.p

Z = hcat(x1.income, x1.fat, x1.sodium, x1.fiber, x1.carbo, x1.oz, x1.quaker, x1.post, x1.kellogg)

tab_ols = lm_iv(p, W, Reps=300)

tab_iv = lm_iv(p, W, Z=Z, Reps=300)

# Note that these results are different from the R code

# In particular there seems to be a multicollinearity problem

# with the store level measures - income, age9 and hhlarge.

# age9 and hhlarge are not used here.


# 7.5.3

β̂ = -1/(2*tab_iv[3,2])

γ̂ = -tab_iv[2:end,2]./β̂

γ̂[2] = β̂


stores = unique(x1.store)

weeks = unique(x1.WEEK)

UPCs = unique(x1.UPC)

T_s = length(stores)

T_w = length(weeks)

J = length(UPCs)

exp_δ = Array{Union{Missing, Float64}}(missing, T_s*T_w, J)

global t = 1

for t_s = 1:T_s

store = stores[t_s]

for t_w = 1:T_w

week = weeks[t_w]

upc_temp = x1.UPC[(x1.WEEK .== week).*(x1.store .== store)]

index_upc = []

for j = 1:J

if UPCs[j] ∈ upc_temp

index_upc = [index_upc; j]

end

end

W_temp = W[(x1.WEEK .== week).*(x1.store .== store),1:end]

exp_δ[t,index_upc] = exp.(hcat(ones(size(W_temp)[1]), W_temp)*γ̂)

global t += 1

println(t)

end

end


share_est = Array{Union{Missing,Float64}}(missing,1387,57)

for i = 1:1387

sum_exp_δ = sum(skipmissing(exp_δ[i,1:end]))

for j = 1:57

if !ismissing(exp_δ[i,j])

share_est[i,j] = exp_δ[i,j]/sum_exp_δ

end

end

end


share_est_ave = Vector{Union{Missing, Float64}}(missing, 57)

for j = 1:57

share_est_ave[j] = mean(skipmissing(share_est[1:end,j]))

end


share_est_ave

histogram(share_est_ave)


upc_acc = 1600062760

# Family size Apple cinnamon cheerios

share_acc = mean(x1.share[x1.UPC .== upc_acc])

j_acc = UPCs .== upc_acc


K = 20

min_k = -7

max_k = -5

global diff_k = (max_k - min_k)/K

acc_demand = Array{Union{Missing, Float64}}(missing, K, 2)

global min_t = min_k

for k = 1:K

pr = min_t + diff_k

exp_δ2 = exp_δ

index_acc = x1.UPC .== upc_acc

N = sum(index_acc)

W_temp = hcat(ones(N), fill(pr, N), W[x1.UPC .== upc_acc,2:end])

exp_δ2[1:end,j_acc] = exp.(W_temp*γ̂)

ave_share = Array{Union{Missing, Float64}}(missing, length(UPCs), 2)

for i = 1:length(UPCs)

UPC = UPCs[i]

ave_share[i,1] = UPC

ave_share[i,2] = mean(skipmissing(exp_δ2[1:end,i]))

println(i)

end

ave_share[1:end,2] = ave_share[1:end,2]./(1 + sum(skipmissing(ave_share[1:end,2])))

acc_demand[k,1] = pr

acc_demand[k,2] = ave_share[j_acc,2][1]

global min_t = min_t + diff_k

println(k)

end


scatter(acc_demand[1:end,2],acc_demand[1:end,1])

hline!(fill(-6.1,K))



# 7.5.4

(0.5*(-5.2-acc_demand[9,1]))*acc_demand[9,2]*267888011*(52/367)

# Note that this is different from results with the R code.

# The data sets are somewhat different and the estimation procedure also differs.