cd "G:\My Drive\Madhav\11th grade\ECON258\minwage"
*********************************************************************************************************************************************************************************************************readin state gdp dataimport excel using "dol_mw_history.xlsx", sheet(state_gdp) first clear //Data source: https://apps.bea.gov/itable/?ReqID=70&step=1&acrdn=1drop if state==""reshape long gdp, i(state) j(year)save state_gdp, replace *********************************************************************************************************************************************************************************************************Read in minimum wage data. Plot changes in effective min wage for states****************************************************************************************************import excel using "dol_mw_history.xlsx", sheet(clean_2000_2022) first clear//The data in the excel file was copied from https://www.dol.gov/agencies/whd/state/minimum-wage/historydrop if state==""reshape long mw, i(state) j(year)destring mw, gen (minw) force gen xx=minw if state_id=="US"egen fmin=max(xx), by(year)gen eminw=max(minw, fmin) //Effective minimum wage is higher of state minimum wage or the federal minimum wagedrop xxdrop if state_c==. //Guam and Virgin Islands are dropped; no data in QWI anyway for these locationsdrop if state_id=="US"keep if year>=2010/**/
// Plotting graph of minimum wage for all 50 states + 2 other regions (DC and Puerto Rico)twoway connected eminw year, by(state, title("Effective Minimum Wage: 2010-2022", size(small)) graphregion(fcolor(white)) note("Source: https://www.dol.gov/agencies/whd/state/minimum-wage/history" "Note: 21 States have binding Federal Minimum Wage, which did not change between 2010 and 2022.", size(vsmall) span)) scale(0.9) ytitle("", size(small)) xtitle("") xlabel(2010(4)2022) name(allmw, replace)graph export all, as(png) replace
//Define top states with the largest growth in minimum wage between 2010 and 2022, and also idetify states with no changeegen maxemw=max(eminw), by(state)egen minemw=min(eminw), by(state)gen agr_emw=(max-mine)/mineegen tag=tag(state)
egen xx=rank(agr) if tag==1egen yy=group(xx)egen rank_ch=max(yy), by(state)tabstat agr if tag==1, by(rank) stats(N min max)//This shows that 21 states have zero growth. Also growth of 0.60 is agood break point. This gives 14 top changing states.
gen nochange=(rank<=1)gen topchange=(rank>=18)gen top_vs_none=(topchange) if topchange==1|nochange==1 //To plot graphs of Top vs no changerskeep state* year eminw mine maxe agr rank nochange topch top_vs_nonemerge 1:1 state year using state_gdpdrop _msave data_minw_short, replace**********************************************************************************************************************************************************************************************************Read in industry codes********************************************************************************************************************************************************************************************************import excel using dol_mw_history.xlsx, sheet(ind_codes) first clear //The industry codes are taken from the source data webpage https://www.dol.gov/agencies/whd/state/minimum-wage/historysort industrysave ind_codes, replace**********************************************************************************************************************************************************************************************************Import QWI data with Industry X Sex X Age X Firmsize split to Stata**Downloaded as CSV from Quarterly Workforce Indicators (https://ledextract.ces.census.gov/qwi/all)********************************************************************************************************************************************************************************************************import delimited "qwi_firmsize_sex_age.csv", clearlabel var earns "Incumbents Average Monthly Earnings"**EarnS: "Full Quarter Employment (Stable): Average Monthly Earnings"; description: Average monthly earnings of employees with stable jobs (i.e., worked with the same firm throughout the quarter).compresslabel define labelfirmsize 0 "All Firm Sizes" 1 "0-19 Employees" 2 "20-49 Employees" 3 "50-249 Employees" 4 "250-499 Employees" 5 "500+ Employees", replace //From https://ledextract.ces.census.gov/qwi/alllabel define labelsex 0 "All Sexes" 1 "Male" 2 "Female"egen agegroup=group(agegrp)replace agegroup=agegroup-1 // Changes A00 to 0, A01 to 1, etcdrop agegrplabel define labelage 0 "All Ages (14-99)" 1 "14-18" 2 "19-21" 3 "22-24" 4 "25-34" 5 "35-44" 6 "45-54" 7 "55-64" 8 "65-99", replace // From https://ledextract.ces.census.gov/qwi/alllabel values firmsize labelfirmsize, nofixlabel values sex labelsex, nofixlabel values age labelage, nofix**Tagging quartiles of industry by average wagekeep if year>=2010save qwi_firmsize_sex_age, replace**********************************************************************************************************************************************************************************************************Exploring which sector has the highest vs lowest average monthly wages for the 2010 to 2021, to reduce the data to those industries********************************************************************************************************************************************************************************************************use industry year earns earnhiras sex firmsize age geography using qwi_firmsize_sex_age if sex==0 & firmsize==0 & age==0 & geography ==0, clear collapse (mean) earns, by(industry)drop if industry=="00"save temp_earn, replacemerge 1:1 industry using ind_codeskeep if _m==3capture ssc install grstyle // This gives a nice style to the graphsgrstyle initgrstyle set symbol tuftegen xx=" ("gen yy=")"egen inddc=concat(ind_desc xx industry yy) //This combines the industry number to the namegraph hbar earns,over(inddc,sort(earns) label(labsize(small))) scale(0.8) graphregion(fcolor(white)) ytitle("") title("Average Monthly Earnings (in Dollars, 2010-2022)", size(small)) blabel(industry) name(b1, replace) note("Source: Quarterly Workforce Indicators (https://ledextract.ces.census.gov/qwi/all)", size(small) span)graph export meanearn, as(png) replace//Accommodation and Food services (NAICS72) and Retail Trade (44-45) have the lowest wage****Check if these are big in terms of employment by industryuse industry year emp sex firmsize age geography quarter using qwi_firmsize_sex_age if sex==0 & firmsize==0 & age==0 & geography ==0, clearcollapse (mean) emp, by(industry)replace emp=emp/10^6drop if industry=="00"merge 1:1 industry using ind_codeskeep if _m==3capture ssc install grstyle // This gives a nice style to the graphsgrstyle initgrstyle set symbol tuftegen xx=" ("gen yy=")"egen inddc=concat(ind_desc xx industry yy) //This combines the industry number to the namegraph hbar emp,over(inddc,sort(emp) label(labsize(small))) scale(0.8) graphregion(fcolor(white)) ytitle("") title("Average Monthly Employment (in Mns, 2010-2022)", size(small)) blabel(industry) name(b1, replace) note("Source: Quarterly Workforce Indicators (https://ledextract.ces.census.gov/qwi/all)", size(small) span) //Retail is second biggest and Acco & Food is the 4th biggest 2-digit NAICS sector in employment termsgraph export indemp, as(png) replacecapture drop _mmerge 1:1 industry using temp_earnscatter earns emp, ml(ind_short) ytitle("Average Monthly Earnings (in dollars)", size(small)) xtitle("Average Monthly Employment (in Mns)", size(small)) graphregion(fcolor(white))graph export emp_vs_earn, as(png) replace// Acco (72) and retail (44-45) have lowest wages and are pretty big. So will focus on those.
**********************************************************************************************************************************************************************************************************Some overall exploratory graphs for by SEX X AGE for Acco(72) and Retail (44-45)********************************************************************************************************************************************************************************************************use qwi_firmsize_sex_age if (industry=="72"||industry=="44-45") & year>=2010 & geography ~=0, clear rename geography state_codemerge m:1 state_code year using data_minw_short/**tab state if _m==2
state | Freq. Percent Cum.---------------------+----------------------------------- Alaska | 6 40.00 40.00 Arkansas | 4 26.67 66.67 Michigan | 1 6.67 73.33 Mississippi | 4 26.67 100.00---------------------+----------------------------------- Total | 15 100.00
**6 years missing for Alaska (2017-22), 4 for Arkansas and Mississippi (19-22) and 1 for Michigan. Michigan also has only 3 quarters for 2021. But not dropping it as I have only 2 quarters missing from 49 possible quarters***/drop if state=="Alaska"|state=="Arkansas"|state=="Mississippi"keep if _m==3collapse (sum) emp (mean) earns (mean) eminw nochange topch top_vs_none gdp, by(state year quarter sex age firmsize ) // This gives sum of employment for Acco+ Retail, and average earns for the two sectors
collapse (mean) emp earns (mean) eminw nochange topch top_vs_none gdp, by(state year sex age firmsize) //This gives mean emp and earns for the year. This deals with some misisng quarter data. Also it smoothes out seasonality.gen learns=log(earns)gen lemp=log(emp)gen lgdp=log(gdp)save temp_sex_age_fsize, replace
***********************************************************************************************************************************************************************************Exploring variation by Age (all ages vs Age 14-18); no break by firm size********************************************************************************************************************************************************************************use temp_sex_age_fsize if firmsize==0 & (age==0|age==1),cleargen date=year //I had tried also quarter foreach i in 0 1 { egen mles0a`i'=mean(learns) if sex==0 & age==`i', by(top_vs_none date) egen mes0a`i'=mean(earns) if sex==0 & age==`i', by(top_vs_none date) egen mlemps0a`i'=mean(lemp) if sex==0 & age==`i', by(top_vs_none date) egen memps0a`i'=mean(emp) if sex==0 & age==`i', by(top_vs_none date)
}// Define a tag variable to retain one observation for each group (top and nochange) by date egen taga0=tag(top_vs date) if mles0a0!=.egen taga1=tag(top_vs date) if mles0a1!=.
//This is repeated in every graph, so put into global macroglobal grcommon legend(order(1 "Top Changers" 2 "No change")) graphregion(fcolor(white)) ytitle("") xtitle("") scale(0.8) xlabel(2010(3)2022)
//The employment had too many zeros, so made it 000sreplace memps0a0=memps0a0/1000replace memps0a1=memps0a1/1000 //Overall average monthly earnings by min wage change groups twoway (connected mes0a0 date if top_vs==1 & taga0==1) (connected mes0a0 date if top_vs==0 & taga0==1), title("Average Monthly Earnings: Full Sample", size(medium)) $grcommon name(s0a0es, replace)//Overall average monthly emp by min wage change groupstwoway (connected memps0a0 date if top_vs==1 & taga0==1) (connected memps0a0 date if top_vs==0 & taga0==1) , title("Average Monthly Employment (000s): Full Sample", size(medium)) $grcommon name(s0a0em, replace)
//Average earnings for teenagers ( age 14-18) by min wage change groups twoway (connected mes0a1 date if top_vs==1 & taga1==1) (connected mes0a1 date if top_vs==0 & taga1==1), title("Average Monthly Earnings: 14-18 year-olds", size(medium)) $grcommon name(s0a1es, replace)//Average monthly employment for teenagers ( age 14-18) by min wage change groups twoway (connected memps0a1 date if top_vs==1 & taga1==1) (connected memps0a1 date if top_vs==0 & taga1==1) , title("Average Monthly Employment (000s): 14-18 year-olds", size(medium)) $grcommon name(s0a1em, replace)
graph combine s0a0es s0a0em s0a1es s0a1em , graphregion(fcolor(white)) title("Firms: All, Sectors: Accommodation and Food Services (72) & Retail Trade (44-45)", size(small) ) note("Note: The figure compares trends in the top 14 states with the biggest change in the effective minimum wage to the 21 states with no change" "in the effective minimum wage in the 2010-2022 period", size(vsmall) span) scale(0.9)// Earnings gap widens between high change and no change; employment difference at end seems to suggest employment went up more in no change statesgraph export trendsall_level, as(png) replace
********************************************************************************************************************************************************************************
use temp_sex_age_fsize if firmsize==0,clearlabel var eminw "Effective Minimum Wage ($/hr)"label var lgdp "Log State Nominal GDP (in $ mn)"
su earns if sex==0 & age==0 local mean=int(1000*r(mean))/1000local sd=int(1000*r(sd))/1000//reghdfe with absorb(state year) is same as reg with i.state and i.year, except the state/year coefficients are suppressedreghdfe earns eminw if sex==0 & age==0 , absorb(state year) cluster(state)outreg2 using base_table, replace addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes) dec(4) label excelreghdfe earns eminw lgdp if sex==0 & age==0 , absorb(state year) cluster(state)outreg2 using base_table, append addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes) dec(4) label excel
foreach xx in learns lemp { su `xx' if sex==0 & age==0 local mean=int(1000*r(mean))/1000 local sd=int(1000*r(sd))/1000
reghdfe `xx' eminw if sex==0 & age==0 , absorb(state year) cluster(state) outreg2 using base_table, append addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes) dec(4) label excel reghdfe `xx' eminw lgdp if sex==0 & age==0 , absorb(state year) cluster(state) outreg2 using base_table, append addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes) dec(4) label excel}**************************************************************************************************Check to excluding Covid yearskeep if year<2020su earns if sex==0 & age==0 local mean=int(1000*r(mean))/1000local sd=int(1000*r(sd))/1000reghdfe earns eminw if sex==0 & age==0 , absorb(state year) cluster(state)outreg2 using base_table, append addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes, Excovid, Yes) dec(4) label excelreghdfe earns eminw lgdp if sex==0 & age==0 , absorb(state year) cluster(state)outreg2 using base_table, append addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes, Excovid, Yes) dec(4) label excel
foreach xx in learns lemp { su `xx' if sex==0 & age==0 local mean=int(1000*r(mean))/1000 local sd=int(1000*r(sd))/1000
reghdfe `xx' eminw if sex==0 & age==0 , absorb(state year) cluster(state) outreg2 using base_table, append addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes, Excovid, Yes) dec(4) label excel reghdfe `xx' eminw lgdp if sex==0 & age==0 , absorb(state year) cluster(state) outreg2 using base_table, append addtext(DVmean, `mean', DVsd, `sd', Sex, All, Age, All, Firmsize, All, State FE, Yes, Year FE, Yes, Excovid, Yes) dec(4) label excel}
***********************************************************************************************************************************************************************************Plotting Graphs from Regression Analysis, with Splits by Sex and Age ********************************************************************************************************************************************************************************capture program drop reggraphsprogram define reggraphs foreach xx in all male female { foreach yy in earns learns lemp { matrix `xx'_`yy'=J(8,3,.) // Matrix to store the coefficients and standard errors matrix coln `xx'_`yy' = `xx'_`yy'_agec `xx'_`yy'_mcoef `xx'_`yy'_mse //Column names
} } //These macros help to use strings all/male/female for loop though data has 0/1/2. This helps to keep the string description for the matrices global dall 0 global dmale 1 global dfemale 2 forv i=1/8{ foreach xx in all male female { foreach yy in earns learns lemp { //reghdfe with absorb(state year) does the same as i.state and i.year without showing those reghdfe `yy' eminw lgdp if sex==${d`xx'} & age==`i' , absorb(state year) cluster(state) matrix `xx'_`yy'[`i',1]=`i' matrix `xx'_`yy'[`i',2]=_b[eminw] matrix `xx'_`yy'[`i',3]=_se[eminw] } } }
label define labelage 0 "All Ages (14-99)" 1 "14-18" 2 "19-21" 3 "22-24" 4 "25-34" 5 "35-44" 6 "45-54" 7 "55-64" 8 "65-99", replace foreach xx in all male female { foreach yy in earns learns lemp{ svmat `xx'_`yy', names(col) gen `xx'_`yy'_up=`xx'_`yy'_mc+1.96*`xx'_`yy'_mse gen `xx'_`yy'_down=`xx'_`yy'_mc-1.96*`xx'_`yy'_mse label value `xx'_`yy'_agec labelage } }
global t_all "Full Sample (Male and Female)" global t_All "Full Sample (Male and Female)" global t_Large "Large (>500 emp) Firms" global t_Small "Small (<20 emp) Firms"
global tt_earns "Incumbents Average Monthly Earnings" global tt_learns "Log Incumbents Average Monthly Earnings" global tt_lemp "Log Average Monthly Employment"
twoway (bar all_learns_mcoef all_learns_agec)(rcap all_learns_up all_learns_down all_learns_agec), graphregion(fcolor(white)) xlabel(1 "14-18" 2 "19-21" 3 "22-24" 4 "25-34" 5 "35-44" 6 "45-54" 7 "55-64" 8 "65-99") xtitle("Age groups") title("Effect on Log Average Monthly Earnings", size(medium) ) t2(${t_`1'}, size(small)) scale(0.8) name(`1'_les, replace) legend(order(1 "Coefficient" 2 "95% CI"))
twoway (bar all_lemp_mcoef all_lemp_agec)(rcap all_lemp_up all_lemp_down all_lemp_agec), graphregion(fcolor(white)) xlabel(1 "14-18" 2 "19-21" 3 "22-24" 4 "25-34" 5 "35-44" 6 "45-54" 7 "55-64" 8 "65-99") xtitle("Age groups") title("Effect on Log Average Monthly Employment", size(medium) ) t2(${t_`1'}, size(small)) scale(0.8) name(`1'_lem, replace) legend(order(1 "Coefficient" 2 "95% CI"))
graph combine `1'_les `1'_lem, graphregion(fcolor(white)) title("Coefficient on Minimum Wage", size(medium))t2("Firms: `1', Sectors: Accommodation and Food Services (72) & Retail Trade (44-45)", size(small) ) note("Note: The figure reports coeficient on effective minimum wage (in dollars), in regressions run separately by age group (with state and year fixed effects).", size(vsmall) span) scale(0.9) graph export reggraph_`1', as(png) replace foreach yy in earns learns lemp{ twoway (connected male_`yy'_mcoef male_earns_agec)(rcap male_`yy'_up male_`yy'_down male_`yy'_agec) (connected female_`yy'_mcoef male_`yy'_agec)(rcap female_`yy'_up female_`yy'_down male_`yy'_agec), graphregion(fcolor(white)) xlabel(1 "14-18" 2 "19-21" 3 "22-24" 4 "25-34" 5 "35-44" 6 "45-54" 7 "55-64" 8 "65-99") xtitle("Age groups") title("${tt_`yy'}", size(medium)) scale(0.8) legend(order(1 "Male" 2 "95% CI Male" 3 "Female" 4 "95% CI Female")) name(g_`yy'_`1', replace) }
graph combine g_learns_`1' g_lemp_`1' , graphregion(fcolor(white)) title("Coefficient on Minimum Wage", size(medium))t2("Firms: `1', Sectors: Accommodation and Food Services (72) & Retail Trade (44-45)", size(small) ) note("Note: The figure reports coeficient on effective minimum wage (in dollars), in regressions run separately by age group (with state and year fixed effects).", size(vsmall) span) scale(0.9) name(byagesex_`1', replace) graph export reggraph_byage_`1', as(png) replace end use temp_sex_age_fsize if firmsize==0,clearreggraphs All
use temp_sex_age_fsize if sex==0,cleartable firmsize age, c(mean earns)
use temp_sex_age_fsize if firmsize==1,clearreggraphs Small
use temp_sex_age_fsize if firmsize==5,clearreggraphs Large
graph combine Large_les Large_lem Small_les Small_lem, graphregion(fcolor(white)) title("Coefficient on Minimum Wage", size(medium)) t2("Firms: Large and Small, Sectors: Accommodation and Food Services (72) & Retail Trade (44-45)", size(small) ) note("Note: The figure reports coeficient on effective minimum wage (in dollars), in regressions run separately by education group (with state and year fixed effects).", size(vsmall) span) scale(0.9)graph export reggraph_large_small, as(png) replace