Code

R Code used to present "R" to students

########################## PREMISES ##########################

###Step Minus One###

#Remember to comment as much as possible.

#You should do that more for yourself than for the assignment.

#To remember what has been done in a script is not easy and, maybe, this script would be useful for you even a couple of years from now

###Step Zero###

#Understanding the windows: script, console, environment, help,...

2+1

#try to put the same command inside the console.

#What is the difference?

#What you do directly in the console is not saved!

#(Remember to save often your script. A few things in life are more )

###First Step###

#Some basics

#Creating a vector

# character-entry

x <- c("red", "blue", "green", "no color")

rm(x) #to remove the object "x"

# numeric-entry

a=c(1,2,3) #same for a<-c(1,2,3)

a

a=as.character(a) #We are saying to R that the entries have to be considered characters

a=as.numeric(a) #We are saying to R that the entries have to be considered numbers

b=c(4,5,6)

b

c=c(7,8,9)

c

#Creating a matrix

A1=c(a,b,c)

A1

A2=matrix(c(a,b,c),nrow=3,ncol=3)

A2

A3=rbind(a,b,c)

A3

A4=cbind(a,b,c)

A4

###Second Step###

#Setting a directory

help(setwd)

# to remove all objects

rm(list = ls())

setwd("C:\Users\Massimo Ceraolo\Desktop\Principles_of_economics") #this syntax does not work because2 "\" is considered a command

######

setwd("C:\\Users\\Massimo Ceraolo\\Desktop\\Principles_of_economics") #we can solve this either with a double backward slash

######

setwd("C:/Users/Massimo Ceraolo/Desktop/Principles_of_economics") #or this a forward slash

getwd()

###Third Step###

#Importing a dataset

install.packages(c("tidyverse","readr")) #you need to installa package just once (to install a package you need an Internet connection)

library(tidyverse) #the command "library" ask to R Studio to add to the CURRENT session the commands added by that package.

library(readr) #You need to repeat this command every time you re-open R Studio

tempdata <- read.csv("NH.Ts+dSST.csv", skip = 1, na.strings = "***") #"tempdata" is the name we have chosen for the imported the dataset.

#You can choose whatever name you want. I suggest you to choose something short but that helps you to remember the type of data that are inside.

#Try to import the dataset with the user interface (Inside the Environment window there is "Import Dataset")

#Is R recognising correctly the data? Check it immediatly opening the dataset or using the following commands

head(tempdata)

str(tempdata)

########################## PART 1.1 ##########################

###Drawning a graph line

tempdata$Jan <- ts(tempdata$Jan, start = c(1880), end = c(2017), frequency = 1)

#It does not work. What should we do?

#Let us give a look at the data!

#Our time series goes beyond the 2017

#Let us drop a line

#general command: mydataframe[-c(row_index_1, row_index_2),]

newdata=tempdata[-c(143),] #because we want to drop just the line n.143

tempdata=tempdata[-c(143),] #There is no need to use different names for changes in the dataset (sometimes it helps, sometomes is adds confusion)

rm(newdata)

tempdata$Jan <- ts(tempdata$Jan, start = c(1880), end = c(2021), frequency = 1)

tempdata$DJF <- ts(tempdata$DJF,

start = c(1880), end = c(2021), frequency = 1)

tempdata$MAM <- ts(tempdata$MAM,

start = c(1880), end = c(2021), frequency = 1)

tempdata$JJA <- ts(tempdata$JJA,

start = c(1880), end = c(2021), frequency = 1)

tempdata$SON <- ts(tempdata$SON,

start = c(1880), end = c(2021), frequency = 1)

tempdata$J.D <- ts(tempdata$J.D,

start = c(1880), end = c(2021), frequency = 1)

# Set line width and colour

plot(tempdata$Jan, type = "l", col = "blue", lwd = 2, ylab = "Annual temperature anomalies", xlab = "Year")

#let's put the year on the x axis

plot(x=tempdata$Year,y=tempdata$Jan,, type = "l", col = "blue", lwd = 2, ylab = "Annual temperature anomalies", xlab = "Year")

#Try some variations! https://www.rdocumentation.org/packages/graphics/versions/3.6.2/topics/plot

plot(tempdata$Year,tempdata$Jan, type = "l", col = "green", lwd = 5, ylab = "Annual temperature anomalies", xlab = "Year")

plot(tempdata$Year,tempdata$Jan, type = "p", col = "green", lwd = 5, ylab = "Annual temperature anomalies", xlab = "Year")

#In R, colors can be specified either by name (e.g col = "red") or as a hexadecimal RGB triplet (such as col = "#FFCC00").

#You can also use other color systems such as ones taken from the RColorBrewer package.

#https://html-color-codes.com/

plot(tempdata$Year,tempdata$Jan, type = "p", col = "#009999", lwd = 5, ylab = "Annual temperature anomalies", xlab = "Year")

# Add a title

title("Average temperature anomaly in January in the northern hemisphere (1880-2016)")

# Add a horizontal line (at y = 0)

abline(h = 0, col = "darkorange2", lwd = 2)

# Add a label to the horizontal line

text(2000, -0.1, "1951-1980 average")

########################## PART 1.2 ##########################

###Creating an histogram

tempdata$Period <- factor(NA, levels = c("1921-1950", "1951-1980", "1981-2010"), ordered = TRUE)

#Look at the dataset!

#What is a factor variable in R Language? https://www.stat.berkeley.edu/~s133/factors.html

tempdata$Period[(tempdata$Year > 1920) & (tempdata$Year < 1951)] <- "1921-1950"

tempdata$Period[(tempdata$Year > 1950) & (tempdata$Year < 1981)] <- "1951-1980"

tempdata$Period[(tempdata$Year > 1980) & (tempdata$Year < 2011)] <- "1981-2010"

#Look at the dataset!

tempdata$prova <- tempdata$Jan + tempdata$Feb

tempdata = subset(tempdata, select = -c(prova))

temp_summer <- c(tempdata$Jun, tempdata$Jul, tempdata$Aug)

temp_summer <- unlist(tempdata[,7:9],use.names = FALSE)

temp_Period <- c(tempdata$Period, tempdata$Period, tempdata$Period)

temp_Period <- factor(temp_Period, levels = 1:nlevels(tempdata$Period),labels = levels(tempdata$Period))

hist(temp_summer[(temp_Period == "1951-1980")], plot = FALSE)

#install.packages("mosaic")

library(mosaic)

histogram(~ temp_summer | temp_Period, type = "count",

breaks = seq(-0.5, 1.3, 0.10),

main = "Histogram of Temperature anomalies",

xlab = "Summer temperature distribution")

###Using the "quantile" function

# Select years 1951 to 1980

temp_all_months <- subset(tempdata, (Year >= 1951 & Year <= 1980))

# Columns 2 to 13 contain months Jan to Dec.

temp_51to80 <- unlist(temp_all_months[, 2:13])

temp_51to80

# c(0.3, 0.7) indicates the chosen percentiles.

perc <- quantile(temp_51to80, c(0.3, 0.7))

perc

# The cold threshold

p30 <- perc[1]

p30

# The hot threshold

p70 <- perc[2]

p70

###Using the "mean" function

mean(c(2,3,4))

temp <- temp_51to80 <p30

temp

mean(temp) #it interprets TRUE=1 and FALSE=0

###Calculating and understanding mean and variance

paste("Mean of DJF temperature anomalies across periods")

mean(~DJF|Period,data = tempdata)

#Why this command has this peculiar structure?

#It is due to the mosaic package

#First of all: DO NOT PANIC. In R there are many many different ways to do the same thing

#Second of all: each package has its explanation paper: https://journal.r-project.org/archive/2017/RJ-2017-024/RJ-2017-024.pdf

#here we can read: "Formulas with a single variable correspond to situations where there is no "explanatory" variable

#to be included, for example in simple numerical summaries or depictions of the distribution of a

#variable" (page 81, keyword "mean")

paste("Variance of DJF anomalies across periods")

var(~DJF|Period,data = tempdata)

###Putting 2 time series in the same plot

plot(tempdata$Year,tempdata$DJF, type = "l", col = "blue", lwd = 2,

ylab = "Annual temperature anomalies", xlab = "Year")

# \n creates a line break

title("Average temperature anomaly in DJF and JJA \n in the northern hemisphere (1880-2016)")

# Add a horizontal line (at y = 0)

abline(h = 0, col = "green", lwd = 2)

lines(tempdata$Year,tempdata$JJA, col = "orange", lwd = 2)

# Add a label to the horizontal line

text(1895, 0.1, "1951-1980 average",)

legend(1880, 1.5, legend = c("DJF", "JJA"), col = c("blue", "orange"), lty = 1, cex = 0.8, lwd = 2)

########################## PART 1.3 ##########################

### Scatterplots and the correlation coefficient

#install.packages("readxl")

library(readxl)

CO2data <- read_excel("1_CO2-data.xlsx")

CO2data_june <- CO2data[CO2data$Month == 6,]

?merge

tempCO2data <- merge(tempdata, CO2data_june)

head(tempCO2data)

head(tempCO2data[, c("Year", "Jun", "Trend")])

plot(tempCO2data$Jun, tempCO2data$Trend,

xlab = "Temperature anomaly (degrees Celsius)",

ylab = "CO2 levels (trend, mole fraction)",

pch = 16, col = "blue")

title("Scatterplot for CO2 emissions and temperature anomalies")

cor(tempCO2data$Jun, tempCO2data$Trend)

str(tempCO2data)

plot(tempCO2data$Jun, type = "l", col = "blue", lwd = 2,

ylab = "June temperature anomalies", xlab = "Year")

tempCO2data$Jun <- ts(tempCO2data$Jun, start = c(1958), end = c(2017), frequency = 1)

tempCO2data$Trend <- ts(tempCO2data$Trend, start = c(1958), end = c(2017), frequency = 1)

str(tempCO2data)

plot(tempCO2data$Jun, type = "l", col = "blue", lwd = 2,

ylab = "June temperature anomalies", xlab = "Year")

title("June temperature anomalies and CO2 emissions")

### A graph with double Y axis

# Create extra margins used for the second axis

par(mar = c(5, 5, 2, 5))

plot(tempCO2data$Jun, type = "l", col = "blue", lwd = 2,

ylab = "June temperature anomalies", xlab = "Year")

title("June temperature anomalies and CO2 emissions")

# This puts the next plot into the same picture.

par(new = T)

# No axis, no labels

plot(tempCO2data$Trend, pch = 16, lwd = 2,

axes = FALSE, xlab = NA, ylab = NA, cex = 1.2)

axis(side = 4)

mtext(side = 4, line = 3, 'CO2 emissions')

legend("topleft", legend = c("June temp anom", "CO2 emis"),

lty = c(1, 1), col = c("blue", "black"), lwd = 2)

Stata Code used in the research on Dual Income Tax

*From progressive taxation to DIT - Complete DID2S Analysis for Academic Paper

clear all

set more off

*ssc install did2s, replace

*ssc install coefplot, replace

*ssc install estout, replace

set scheme plotplain, permanently

import excel "C:\Users\Michele\Desktop\DIT paper\Dataset_DIT_relaxed.xlsx", sheet("Sheet1") firstrow clear

sum

********************************** DATA PREPARATION *****************************

*** Premises for DID2S

** Generating Gvar or Cohort (same as CS)

tempvar aux

bysort country_id:egen `aux'=min(year) if tax_reform_dummy==1

bysort country_id:egen gvar=max(`aux')

replace gvar=0 if gvar==.

** Creating event study variables

* This creates the event dummies

gen event=year-gvar

* but shifted because stata doesnt like "negative" factor variables

replace event = event+28

* And we set the never treated as 0

replace event=0 if gvar==0

** Also label them to easy recognize the dynamic effects

forvalues i = 1/56 {

label define event `i' "T`=`i'-28'", modify

}

label define event 0 "base", modify

label values event event

** restricting event2 to window (-5,5) plus never treated

clonevar event2=event if event==0 | inrange(event-28,-5,5)

// START COMPREHENSIVE LOGGING

log using "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\DID2S_academic_analysis_w5.txt", text replace

display "================================================================="

display "COMPLETE DID2S ANALYSIS FOR ACADEMIC PUBLICATION"

display "================================================================="

display "Study: The Effect of Dual Post_tax_income Taxation on Post_tax_income Inequality"

display "Method: Two-Stage Difference-in-Differences (Gardner 2022)"

display "Sample: 15 OECD countries, 1980-2019"

display "Treatment: Introduction of Dual Post_tax_income Tax systems"

display "================================================================="

*************************** CONTROL GROUPS DEFINITION ***********************

display "================================================================="

display "THEORETICAL FRAMEWORK FOR CONTROL VARIABLES"

display "================================================================="

// Based on your H1-H5 theoretical framework

global controls_cycle "real_gdp_growth_pct_IMF unemployment_rate_IMF cpi_inflation_pct_IMF"

display "H1 - Business Cycle: $controls_cycle"

global controls_development "gdp_pc_real_WDI human_capital_idx_PWT population_mil_PWT"

display "H2 - Development Level: $controls_development"

global controls_production "labour_share_PWT tfp_index_PWT capital_services_idx_PWT avg_hours_worked_PWT"

display "H3 - Production Structure: $controls_production"

global controls_capital "real_internal_return_pct_PWT depreciation_rate_pct_PWT investment_pct_gdp_WDI"

display "H4 - Capital Markets: $controls_capital"

global controls_external "current_account_pct_gdp_IMF trade_open_WDI"

display "H5 - External Balance: $controls_external"

// Define main specification (for Table 1)

global main_controls "$controls_cycle $controls_development $controls_production"

display ""

display "MAIN SPECIFICATION (Table 1): $main_controls"

display "Theoretical justification: Captures business cycle effects, development level,"

display "and production structure - key channels affecting post_tax_income distribution"

*************************** OUTCOME VARIABLES ******************************

display "================================================================="

display "OUTCOME VARIABLES DEFINITION"

display "================================================================="

// Primary outcomes for main tables

global primary_outcomes "post_tax_top10 post_tax_top5 post_tax_top1"

global primary_names "Top 10%" "Top 5%" "Top 1%"

// Secondary outcomes for robustness/appendix

global secondary_outcomes "post_tax_next9 post_tax_next4 post_tax_bottom50 post_tax_bottom90"

// Ratio outcomes

global ratio_outcomes "post_tax_ratio_t10_b50 post_tax_ratio_t10_b90 post_tax_ratio_t1_b50 post_tax_ratio_t1_b90 post_tax_ratio_n9_b50 post_tax_ratio_n9_b90 post_tax_ratio_n4_b50 post_tax_ratio_n4_b90"

// All outcomes combined

global all_outcomes "$primary_outcomes $secondary_outcomes $ratio_outcomes"

display "Primary outcomes (main tables): $primary_outcomes"

display "Secondary outcomes (appendix): $secondary_outcomes"

display "Ratio outcomes (inequality measures): $ratio_outcomes"

*************************** MAIN DID2S ANALYSIS - ALL OUTCOMES **************

******************** RUN MAIN SPECIFICATION FOR ALL OUTCOMES **************

display "================================================================="

display "MAIN DID2S ANALYSIS - ALL OUTCOMES"

display "================================================================="

display "Specification: Business cycle + Development + Production structure"

display "Running main specification for ALL outcomes first"

display "================================================================="

// Run main specification for ALL outcomes (primary, secondary, and ratios)

foreach outcome in $all_outcomes {

display ""

display "=== MAIN ANALYSIS: `outcome' ==="

display "Controls: $main_controls"

did2s `outcome', first_stage(i.country_id i.year log_gdp_pc_real_WDI Total_tax_revenue_perc_GDP_OECD labour_share_PWT population_mil_PWT cpi_inflation_pct_IMF real_internal_return_pct_PWT unemployment_rate_IMF) ///

second_stage(i.event2) treatment(tax_reform_dummy) cluster(country_id)

estimates store main_`outcome'

display "Successfully stored: main_`outcome'"

}

*************************** KEY TREATMENT EFFECTS ***************************

******************** CALCULATE EFFECTS FOR PRIMARY OUTCOMES **************

display "================================================================="

display "KEY TREATMENT EFFECTS - PRIMARY OUTCOMES"

display "================================================================="

foreach outcome in $primary_outcomes {

display ""

display "=== KEY EFFECTS: `outcome' ==="

estimates restore main_`outcome'

// Calculate key treatment effects for reporting

display "Key treatment effects:"

// Year 0 effect (immediate)

display "Treatment year (T0): " %8.4f _b[28.event2] " (se=" %6.4f _se[28.event2] ")"

// Average short-term effect (T0 to T+2)

lincom (28.event2 + 29.event2 + 30.event2)/3

display "Short-term average (T0-T2): " %8.4f r(estimate) " (se=" %6.4f r(se) " p=" %5.3f r(p) ")"

// Average medium-term effect (T+3 to T+5)

lincom (31.event2 + 32.event2 + 33.event2)/3

display "Medium-term average (T3-T5): " %8.4f r(estimate) " (se=" %6.4f r(se) " p=" %5.3f r(p) ")"

// Overall post-treatment average

lincom (28.event2 + 29.event2 + 30.event2 + 31.event2 + 32.event2 + 33.event2)/6

display "Overall post-treatment: " %8.4f r(estimate) " (se=" %6.4f r(se) " p=" %5.3f r(p) ")"

}

*************************** TABLE 1: MAIN POST_TAX_RESULTS\ ***************************

display "================================================================="

display "CREATING TABLE 1: MAIN POST_TAX_RESULTS\"

display "================================================================="

// TABLE 1: Main Post_Tax_Results\ for Academic Paper

esttab main_post_tax_top10 main_post_tax_top5 main_post_tax_top1 ///

using "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\Table1_main_Post_Tax_Results_w5.tex", ///

replace booktabs ///

b(4) se(4) star(* 0.10 ** 0.05 *** 0.01) ///

title("Effect of Dual Post_tax_income Taxation on Top Post_tax_income Shares\label{tab:main_Post_Tax_Results\}") ///

mtitles("Top 10\%" "Top 5\%" "Top 1\%") ///

keep(*event2) ///

order(23.event2 24.event2 25.event2 26.event2 27.event2 28.event2 29.event2 30.event2 31.event2 32.event2 33.event2) ///

coeflabels(23.event2 "T-5" 24.event2 "T-4" 25.event2 "T-3" 26.event2 "T-2" 27.event2 "T-1" ///

28.event2 "T0" 29.event2 "T+1" 30.event2 "T+2" 31.event2 "T+3" 32.event2 "T+4" 33.event2 "T+5") ///

stats(N r2, fmt(0 3) labels("Observations" "R-squared")) ///

addnotes("Notes: This table reports coefficient estimates from two-stage difference-in-differences regressions." ///

"The dependent variables are post_tax_income shares held by the top 10\%, 5\%, and 1\% of earners." ///

"Treatment is the introduction of dual post_tax_income taxation. Event time T0 indicates the treatment year." ///

"All regressions include country and year fixed effects, and controls for business cycle," ///

"development level, and production structure variables. Standard errors are clustered by country." ///

"Significance levels: * p$<$0.10, ** p$<$0.05, *** p$<$0.01.")

// TABLE 1 Alternative: Summary coefficients only

esttab main_post_tax_top10 main_post_tax_top5 main_post_tax_top1 ///

using "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\Table1_summary_w5.tex", ///

replace booktabs ///

b(4) se(4) star(* 0.10 ** 0.05 *** 0.01) ///

title("Effect of Dual Post_tax_income Taxation on Top Post_tax_income Shares - Summary\label{tab:main_summary}") ///

mtitles("Top 10\%" "Top 5\%" "Top 1\%") ///

keep(28.event2 29.event2 30.event2) ///

coeflabels(28.event2 "Treatment year (T0)" 29.event2 "Year after (T+1)" 30.event2 "Two years after (T+2)") ///

stats(N r2, fmt(0 3) labels("Observations" "R-squared")) ///

addnotes("Notes: This table reports key coefficient estimates from two-stage difference-in-differences regressions." ///

"The dependent variables are post_tax_income shares held by the top 10\%, 5\%, and 1\% of earners." ///

"Treatment is the introduction of dual post_tax_income taxation. All regressions include country and year" ///

"fixed effects, and controls for business cycle, development, and production structure variables." ///

"Standard errors are clustered by country. * p$<$0.10, ** p$<$0.05, *** p$<$0.01.")

*************************** EVENT STUDY FIGURES ****************************

******************** FIGURE 1: MAIN POST_TAX_RESULTS\ PLOTS **********************

display "================================================================="

display "CREATING FIGURE 1: EVENT STUDY PLOTS - PRIMARY OUTCOMES"

display "================================================================="

// Create publication-quality event study plots

foreach outcome in $primary_outcomes {

display "Creating event study plot for `outcome'"

coefplot main_`outcome', ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ///

ciopts(recast(rcap) lwidth(medium)) ///

msymbol(circle) msize(medium) mcolor(navy) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on `outcome' (percentage points)", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(off) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\Figure1_`outcome'_event_study_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\Figure1_`outcome'_event_study_w5.png", replace

}

// Combined event study plot for main paper

coefplot (main_post_tax_top10, label("Top 10%") msymbol(circle) mcolor(navy)) ///

(main_post_tax_top5, label("Top 5%") msymbol(triangle) mcolor(maroon)) ///

(main_post_tax_top1, label("Top 1%") msymbol(square) mcolor(forest_green)), ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ciopts(recast(rcap)) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on post_tax_income share (percentage points)", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(rows(1) size(small) position(12)) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\Figure1_combined_event_study_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\Figure1_combined_event_study_w5.png", replace

*************************** EVENT STUDY FIGURES - SECONDARY OUTCOMES *********

******************** FIGURE A1: SECONDARY OUTCOMES PLOTS ******************

display "================================================================="

display "CREATING FIGURE A1: EVENT STUDY PLOTS - SECONDARY OUTCOMES"

display "================================================================="

// Individual plots for secondary outcomes

foreach outcome in $secondary_outcomes {

display "Creating event study plot for `outcome'"

coefplot main_`outcome', ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ///

ciopts(recast(rcap) lwidth(medium)) ///

msymbol(circle) msize(medium) mcolor(navy) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on `outcome' (percentage points)", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(off) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA1_`outcome'_event_study_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA1_`outcome'_event_study_w5.png", replace

}

// Combined plot for middle post_tax_income groups (Next 9%, Next 4%)

coefplot (main_post_tax_next9, label("Next 9% (P90-P99)") msymbol(circle) mcolor(navy)) ///

(main_post_tax_next4, label("Next 4% (P95-P99)") msymbol(triangle) mcolor(maroon)), ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ciopts(recast(rcap)) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on post_tax_income share (percentage points)", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(rows(1) size(small) position(12)) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA1_middle_groups_combined_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA1_middle_groups_combined_w5.png", replace

// Combined plot for bottom post_tax_income groups

coefplot (main_post_tax_bottom50, label("Bottom 50%") msymbol(circle) mcolor(navy)) ///

(main_post_tax_bottom90, label("Bottom 90%") msymbol(triangle) mcolor(maroon)), ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ciopts(recast(rcap)) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on post_tax_income share (percentage points)", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(rows(1) size(small) position(12)) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA1_bottom_groups_combined_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA1_bottom_groups_combined_w5.png", replace

*************************** EVENT STUDY FIGURES - RATIO OUTCOMES *************

******************** FIGURE A2: INEQUALITY RATIOS PLOTS ******************

display "================================================================="

display "CREATING FIGURE A2: EVENT STUDY PLOTS - INEQUALITY RATIOS"

display "================================================================="

// Individual plots for ratio outcomes

foreach outcome in $ratio_outcomes {

display "Creating event study plot for `outcome'"

coefplot main_`outcome', ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ///

ciopts(recast(rcap) lwidth(medium)) ///

msymbol(circle) msize(medium) mcolor(navy) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on `outcome'", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(off) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_`outcome'_event_study_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_`outcome'_event_study_w5.png", replace

}

// Combined plot for Top 10% ratios

coefplot (main_post_tax_ratio_t10_b50, label("Top10/Bottom50") msymbol(circle) mcolor(navy)) ///

(main_post_tax_ratio_t10_b90, label("Top10/Bottom90") msymbol(triangle) mcolor(maroon)), ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ciopts(recast(rcap)) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on inequality ratio", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(rows(1) size(small) position(12)) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_top10_ratios_combined_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_top10_ratios_combined_w5.png", replace

// Combined plot for Top 1% ratios

coefplot (main_post_tax_ratio_t1_b50, label("Top1/Bottom50") msymbol(circle) mcolor(navy)) ///

(main_post_tax_ratio_t1_b90, label("Top1/Bottom90") msymbol(triangle) mcolor(maroon)), ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ciopts(recast(rcap)) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on inequality ratio", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(rows(1) size(small) position(12)) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_top1_ratios_combined_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_top1_ratios_combined_w5.png", replace

// Combined plot for Next 9% and Next 4% ratios

coefplot (main_post_tax_ratio_n9_b50, label("Next9/Bottom50") msymbol(circle) mcolor(navy)) ///

(main_post_tax_ratio_n4_b50, label("Next4/Bottom50") msymbol(triangle) mcolor(maroon)), ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ciopts(recast(rcap)) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on inequality ratio", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(rows(1) size(small) position(12)) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_next_ratios_combined_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_next_ratios_combined_w5.png", replace

// Grand combined plot for key inequality ratios

coefplot (main_post_tax_ratio_t10_b50, label("Top10/Bot50") msymbol(circle) mcolor(navy)) ///

(main_post_tax_ratio_t1_b50, label("Top1/Bot50") msymbol(triangle) mcolor(maroon)) ///

(main_post_tax_ratio_n9_b50, label("Next9/Bot50") msymbol(square) mcolor(forest_green)) ///

(main_post_tax_ratio_n4_b50, label("Next4/Bot50") msymbol(diamond) mcolor(orange)), ///

keep(*event2) vertical ///

yline(0, lcolor(black) lwidth(thin)) ///

xline(6, lcolor(red) lpattern(dash) lwidth(medium)) ///

levels(95) ciopts(recast(rcap)) ///

xlabel(1 "T-5" 2 "T-4" 3 "T-3" 4 "T-2" 5 "T-1" 6 "T0" 7 "T+1" 8 "T+2" 9 "T+3" 10 "T+4" 11 "T+5", ///

angle(0)) ///

ytitle("Effect on inequality ratio", size(medsmall)) ///

xtitle("Years relative to DIT introduction", size(medsmall)) ///

title("") ///

graphregion(color(white)) plotregion(color(white)) ///

legend(rows(2) size(small) position(12)) ///

note("95% confidence intervals. Standard errors clustered by country." ///

"Vertical line indicates treatment year. All ratios relative to bottom 50%.", size(vsmall))

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_all_ratios_combined_w5.pdf", replace

graph export "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\FigureA2_all_ratios_combined_w5.png", replace

*************************** ROBUSTNESS ANALYSIS *****************************

******************** TABLE 2: ROBUSTNESS TO CONTROLS ********************

display "================================================================="

display "ROBUSTNESS ANALYSIS - TABLE 2"

display "================================================================="

display "Purpose: Show main results robust to alternative control specifications"

display "Selected outcomes: Top 10%, Top 1%, Next 4%, Next9/Bottom90 ratio"

display "================================================================="

// Define robustness specifications

global spec1 "$controls_cycle $controls_development"

global spec2 "$controls_cycle $controls_production"

global spec3 "$controls_cycle $controls_capital"

global spec4 "$controls_cycle $controls_external"

global spec5 "$main_controls" // This is our main specification

global spec6 "$controls_cycle $controls_development $controls_capital"

global spec7 "$controls_cycle $controls_production $controls_capital"

global spec8 "$controls_cycle $controls_development $controls_external"

local spec_names `"Baseline" "Production" "Capital" "External" "Main" "Capital+" "Supply" "Open""'

// MODIFIED: Use specific robustness outcomes instead of primary outcomes

global robustness_outcomes "post_tax_top10 post_tax_top1 post_tax_next4 post_tax_ratio_n9_b90"

display "Selected outcomes for robustness: $robustness_outcomes"

// Store results for robustness table

tempname results_file

postfile `results_file' str30 outcome byte spec double effect double se double pvalue using robustness_results_w5, replace

foreach outcome in $robustness_outcomes {

display ""

display "=== ROBUSTNESS ANALYSIS: `outcome' ==="

forvalues s = 1/8 {

display "--- Specification `s': ${spec`s'} ---"

capture did2s `outcome', first_stage(i.country_id i.year ${spec`s'}) ///

second_stage(i.event2) treatment(tax_reform_dummy) cluster(country_id)

if _rc == 0 {

estimates store rob_`s'_`outcome'

// Calculate average post-treatment effect for robustness comparison

capture lincom (28.event2 + 29.event2 + 30.event2 + 31.event2 + 32.event2 + 33.event2)/6

if _rc == 0 {

local avg_effect = r(estimate)

local avg_se = r(se)

local avg_pval = r(p)

post `results_file' ("`outcome'") (`s') (`avg_effect') (`avg_se') (`avg_pval')

display "Average post-treatment effect: " %8.4f `avg_effect' " (se=" %6.4f `avg_se' " p=" %5.3f `avg_pval' ")"

}

else {

display "ERROR: Could not calculate average effect for specification `s'"

post `results_file' ("`outcome'") (`s') (.) (.) (.)

}

else {

display "ERROR: Regression failed for specification `s'"

post `results_file' ("`outcome'") (`s') (.) (.) (.)

}

postclose `results_file'

*************************** CREATE ROBUSTNESS TABLES IN TXT FORMAT ***************************

// First, check which estimates actually exist

foreach outcome in $robustness_outcomes {

display ""

display "=== Checking stored estimates for `outcome' ==="

forvalues s = 1/4 {

capture estimates describe rob_`s'_`outcome'

if _rc == 0 {

display "rob_`s'_`outcome': FOUND"

}

else {

display "rob_`s'_`outcome': NOT FOUND"

}

// Create individual robustness tables in TXT format (only for existing estimates)

foreach outcome in $robustness_outcomes {

display "Creating robustness table for `outcome'"

// Check if at least the first 4 estimates exist

local has_estimates = 0

forvalues s = 1/4 {

capture estimates describe rob_`s'_`outcome'

if _rc == 0 {

local has_estimates = 1

}

if `has_estimates' == 1 {

// Create a simple text table

capture file open robustness_file using "Table2_robustness_`outcome'_w5.txt", write replace

if _rc == 0 {

file write robustness_file "Robustness to Control Selection: `outcome'" _n

file write robustness_file "========================================" _n _n

file write robustness_file "Specification" _tab "T0 Coeff" _tab "T0 SE" _tab "T+1 Coeff" _tab "T+1 SE" _tab "T+2 Coeff" _tab "T+2 SE" _tab "N" _n

file write robustness_file "-------------" _tab "--------" _tab "-----" _tab "---------" _tab "------" _tab "---------" _tab "------" _tab "---" _n

forvalues s = 1/4 {

capture estimates restore rob_`s'_`outcome'

if _rc == 0 {

// Get coefficients and standard errors

local spec_name : word `s' of "Baseline" "Production" "Capital" "External"

capture local t0_coef = _b[28.event2]

capture local t0_se = _se[28.event2]

capture local t1_coef = _b[29.event2]

capture local t1_se = _se[29.event2]

capture local t2_coef = _b[30.event2]

capture local t2_se = _se[30.event2]

capture local obs = e(N)

// Handle missing values

if "`t0_coef'" == "" local t0_coef = "."

if "`t0_se'" == "" local t0_se = "."

if "`t1_coef'" == "" local t1_coef = "."

if "`t1_se'" == "" local t1_se = "."

if "`t2_coef'" == "" local t2_coef = "."

if "`t2_se'" == "" local t2_se = "."

if "`obs'" == "" local obs = "."

file write robustness_file "`spec_name'" _tab %8.4f (`t0_coef') _tab %6.4f (`t0_se') _tab %8.4f (`t1_coef') _tab %6.4f (`t1_se') _tab %8.4f (`t2_coef') _tab %6.4f (`t2_se') _tab %8.0f (`obs') _n

}

else {

local spec_name : word `s' of "Baseline" "Production" "Capital" "External"

file write robustness_file "`spec_name'" _tab "." _tab "." _tab "." _tab "." _tab "." _tab "." _tab "." _n

}

file write robustness_file _n "Notes:" _n

file write robustness_file "This table shows robustness of main results to alternative control specifications." _n

file write robustness_file "T0 = Treatment year, T+1 = Year after, T+2 = Two years after" _n

file write robustness_file "Baseline: business cycle + development." _n

file write robustness_file "Production: business cycle + production structure." _n

file write robustness_file "Capital: business cycle + capital markets." _n

file write robustness_file "External: business cycle + external balance." _n

file write robustness_file "Standard errors clustered by country." _n

file close robustness_file

display "Created: Table2_robustness_`outcome'_w5.txt"

}

else {

display "ERROR: Could not create file for `outcome'"

}

else {

display "SKIPPED: No valid estimates found for `outcome'"

}

*************************** COEFFICIENT STABILITY TABLE IN TXT FORMAT ***************************

use robustness_results_w5, clear

// Export to TXT format instead of Excel

outsheet using "coefficient_stability_selected_outcomes_w5.txt", replace

// Create a formatted summary table

file open summary_file using "coefficient_stability_summary_w5.txt", write replace

file write summary_file "COEFFICIENT STABILITY RESULTS - SELECTED OUTCOMES" _n

file write summary_file "=================================================" _n _n

file write summary_file "Outcome" _tab "Spec1" _tab "Spec2" _tab "Spec3" _tab "Spec4" _tab "Spec5" _tab "Mean" _tab "StdDev" _tab "CV" _n

file write summary_file "-------" _tab "-----" _tab "-----" _tab "-----" _tab "-----" _tab "-----" _tab "----" _tab "------" _tab "--" _n

foreach outcome in post_tax_top10 post_tax_top1 post_tax_next4 post_tax_ratio_n9_b90 {

preserve

keep if outcome == "`outcome'"

if _N > 0 {

// Initialize locals with missing values

local eff1 = .

local eff2 = .

local eff3 = .

local eff4 = .

local eff5 = .

// Get effects for each specification (proper Stata syntax)

forvalues s = 1/5 {

capture local temp_eff = effect[`s'] if spec[`s'] == `s'

if _rc == 0 & !missing(effect[`s']) {

local eff`s' = effect[`s']

}

// Alternative method: loop through observations

forvalues i = 1/`=_N' {

if spec[`i'] == 1 & !missing(effect[`i']) {

local eff1 = effect[`i']

}

if spec[`i'] == 2 & !missing(effect[`i']) {

local eff2 = effect[`i']

}

if spec[`i'] == 3 & !missing(effect[`i']) {

local eff3 = effect[`i']

}

if spec[`i'] == 4 & !missing(effect[`i']) {

local eff4 = effect[`i']

}

if spec[`i'] == 5 & !missing(effect[`i']) {

local eff5 = effect[`i']

}

// Calculate summary statistics

summarize effect if !missing(effect), detail

if r(N) > 0 {

local mean_val = r(mean)

local sd_val = r(sd)

// Calculate coefficient of variation

if `mean_val' != 0 {

local cv_val = abs(`sd_val'/`mean_val')

}

else {

local cv_val = .

}

else {

local mean_val = .

local sd_val = .

local cv_val = .

}

// Write to file (handle missing values)

if `eff1' == . local eff1_str = "."

else local eff1_str = string(`eff1', "%6.4f")

if `eff2' == . local eff2_str = "."

else local eff2_str = string(`eff2', "%6.4f")

if `eff3' == . local eff3_str = "."

else local eff3_str = string(`eff3', "%6.4f")

if `eff4' == . local eff4_str = "."

else local eff4_str = string(`eff4', "%6.4f")

if `eff5' == . local eff5_str = "."

else local eff5_str = string(`eff5', "%6.4f")

if `mean_val' == . local mean_str = "."

else local mean_str = string(`mean_val', "%6.4f")

if `sd_val' == . local sd_str = "."

else local sd_str = string(`sd_val', "%6.4f")

if `cv_val' == . local cv_str = "."

else local cv_str = string(`cv_val', "%6.4f")

file write summary_file "`outcome'" _tab "`eff1_str'" _tab "`eff2_str'" _tab "`eff3_str'" _tab "`eff4_str'" _tab "`eff5_str'" _tab "`mean_str'" _tab "`sd_str'" _tab "`cv_str'" _n

// Display interpretation

display ""

display "--- `outcome' ---"

display "Mean effect: `mean_str'"

display "Std deviation: `sd_str'"

display "Coefficient of variation: `cv_str'"

if `cv_val' != . {

if `cv_val' < 0.1 {

display "INTERPRETATION: Highly robust (CV < 0.1)"

}

else if `cv_val' < 0.3 {

display "INTERPRETATION: Moderately robust (CV < 0.3)"

}

else {

display "INTERPRETATION: Sensitive to controls (CV >= 0.3)"

}

restore

}

file write summary_file _n "Notes:" _n

file write summary_file "Spec1=Baseline, Spec2=Production, Spec3=Capital, Spec4=External, Spec5=Main" _n

file write summary_file "CV = Coefficient of Variation (StdDev/Mean)" _n

file write summary_file "CV < 0.1: Highly robust" _n

file write summary_file "CV < 0.3: Moderately robust" _n

file write summary_file "CV >= 0.3: Sensitive to controls" _n

file close summary_file

display ""

display "================================================================="

display "FILES CREATED (TXT FORMAT):"

display "- robustness_results_w5.dta: Raw robustness results"

display "- coefficient_stability_selected_outcomes_w5.txt: CSV format"

display "- coefficient_stability_summary_w5.txt: Formatted summary"

display "- Table2_robustness_[outcome]_w5.txt: Individual outcome tables"

display "================================================================="

*************************** ROBUSTNESS STATISTICS FOR ACADEMIC DISCUSSION ****************************

display "================================================================="

display "ROBUSTNESS STATISTICS FOR ACADEMIC DISCUSSION"

display "================================================================="

use robustness_results_w5, clear

foreach outcome in post_tax_top10 post_tax_top1 post_tax_next4 post_tax_ratio_n9_b90 {

display ""

display "--- Robustness Statistics: `outcome' ---"

preserve

keep if outcome == "`outcome'"

drop if missing(effect)

if _N > 0 {

summarize effect, detail

local mean_effect = r(mean)

local min_effect = r(min)

local max_effect = r(max)

local sd_effect = r(sd)

local range_effect = r(max) - r(min)

display "Mean effect across specifications: " %8.4f `mean_effect'

display "Standard deviation across specs: " %8.4f `sd_effect'

display "Range (max - min): " %8.4f `range_effect'

if `mean_effect' != 0 {

local cv_effect = abs(`sd_effect'/`mean_effect')

display "Coefficient of variation: " %8.4f `cv_effect'

// Academic interpretation

if `cv_effect' < 0.1 {

display "INTERPRETATION: Highly robust (CV < 0.1)"

display "TEXT: 'Results are highly robust to control specification (CV = " %5.3f `cv_effect' ")'"

}

else if `cv_effect' < 0.3 {

display "INTERPRETATION: Moderately robust (CV < 0.3)"

display "TEXT: 'Results show moderate robustness to control specification (CV = " %5.3f `cv_effect' ")'"

}

else {

display "INTERPRETATION: Sensitive to controls (CV >= 0.3)"

display "TEXT: 'Results show some sensitivity to control specification (CV = " %5.3f `cv_effect' ")'"

}

// Show range of estimates for academic text

display ""

display "FOR ACADEMIC TEXT:"

display "The estimated effect ranges from " %6.4f `min_effect' " to " %6.4f `max_effect' " percentage points"

display "across different control specifications (mean = " %6.4f `mean_effect' " pp)."

}

restore

}

log close

// Clean up and export robustness data

use robustness_results_w5, clear

export delimited using "C:\Users\Michele\Desktop\DIT paper\Post_Tax_Results\Relaxed\DID2S\window_5\robustness_summary_academic_w5.csv", replace

display "================================================================="

display "ACADEMIC ANALYSIS COMPLETED"

display "All tables and figures ready for academic publication"

display "All files saved to: window_5 subfolder with w5 suffix"

display "================================================================="

Python code used in the research on Taxation on Flour

import os

import time

import PyPDF2

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import Select

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

def analyze_pdf(file_path, keywords):

with open(file_path, 'rb') as file:

reader = PyPDF2.PdfReader(file)

found_words = []

for page_num, page in enumerate(reader.pages, 1):

text = page.extract_text()

lines = text.split('\n')

for keyword in keywords:

keyword_lower = keyword#.lower()

for line_num, line in enumerate(lines, 1):

if keyword_lower in line:#.lower():

position = f"Page {page_num}, Line {line_num}"

found_words.append((keyword, position))

return found_words

# Setup the webdriver

options = webdriver.ChromeOptions()

download_dir = "C:\\Users\\Michele\\Desktop\\Oxford\\File Prezzi Beni Agricoli"

prefs = {"download.default_directory": download_dir}

options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=options)

url = "https://www.gazzettaufficiale.it/ricerca/pdf/preRsi/foglio_ordinario1/1/0/0?reset=true"

year = 1868

# Keywords to search for in PDFs

keywords = ["TAB"]

# wait 5 seconds

time.sleep(5)

# Navigate to the page

driver.get(url)

# Wait for and select the year

year_dropdown = WebDriverWait(driver, 10).until(

EC.presence_of_element_located((By.ID, "annoPubblicazione"))

)

Select(year_dropdown).select_by_value(str(year))

# Find and click the "Cerca" button

cerca_button = driver.find_element(By.XPATH, "//input[@type='submit' and @value='Cerca']")

cerca_button.click()

# Wait for the results page to load

WebDriverWait(driver, 10).until(

EC.presence_of_element_located((By.CLASS_NAME, "elenco_pdf"))

)

# Find all download links

download_links = driver.find_elements(By.XPATH, "//a[contains(@class, 'download_pdf')]")

# create a folder to store the log file, called with the current date and time

import datetime

now = datetime.datetime.now()

log_folder = now.strftime("%Y-%m-%d_%H-%M-%S")

os.makedirs(log_folder, exist_ok=True)

# Process each PDF

for i, link in enumerate(download_links):

# Click download link

link.click()

time.sleep(5) # Wait for download to complete

# Get the name of the most recently downloaded file

list_of_files = os.listdir(download_dir)

latest_file = max([os.path.join(download_dir, f) for f in list_of_files], key=os.path.getctime)

print(f"Processing {latest_file}")

# Analyze the PDF

found_words = analyze_pdf(latest_file, keywords)

print(f"Found {len(found_words)} keywords")

if found_words:

# Keep the file and log the findings

log_file_name = "pdf_analysis_log.txt"

log_file_path = os.path.join(log_folder, log_file_name)

with open(log_file_path, 'a') as log_file:

log_file.write(f"File: {latest_file}\n")

for word, location in found_words:

log_file.write(f"Found keyword '{word}' at {location}\n")

log_file.write("\n\n")

else:

# Delete the file if no keywords found

os.remove(latest_file)

print(f"Deleted {latest_file}")

print(f"Processed file {i+1} of {len(download_links)}")

print()

if i == 30:

break

#print("Waiting for 2 seconds")

#time.sleep(2)

print("All PDFs have been processed.")

# Close the browser

driver.quit()

Code

R Code used to present "R" to students

Stata Code used in the research on Dual Income Tax

Python code used in the research on Taxation on Flour

contact me at: m.ceraolo@student.unisi.it