import pandas as pd
import numpy as np
from linearmodels import FamaMacBeth
from decimal import Decimal
def fm_single_summary(fm_single_df):
'''
This function is an auxiliary of the core function `fm_summary`. It inputs a dataframe of a single fama-macbeth regression.\n
The columns are all the variables' names, the indexes are 'parameter' and 'tstat'.\n
It returns a pd.Series of the single fama-macbeth regression.
'''
df = fm_single_df.copy()
var_list = []
## keep 2 decimals of the t-stat in parentheses, like (1.96)
tstat_list = ['(' + str(Decimal("%.2f" % float(i))) +')' if np.isnan(i)==False else '' for i in df.loc['tstat', :] ]
df.loc['tstat', :] = tstat_list
for variable in df.columns:
for j in ['parameter', 'tstat']:
var_list.append(df.loc[j, variable])
summ = pd.Series(var_list)
index_list = []
for m in range(len(summ)):
if (m % 2) == 0:
index_list.append(df.columns[int(m/2)])
elif (m % 2) !=0:
index_list.append('')
summ.index = index_list
return summ
def fm_summary(reg_list):
"""
:param `reg_list`: the list of regressions that need to be combined and summarized.\n
:return: the summary of all the regressions in `reg_list`, which conforms to academic norms.
"""
total_df = pd.DataFrame()
r2_list = []
for reg in reg_list:
reg_df = pd.DataFrame([reg.params, reg.tstats])
total_df = pd.concat([total_df, reg_df])
try:
r2_list.append(reg.avg_adj_rsquared)
except:
r2_list.append(reg.rsquared)
fm_summ_df = []
for i in range(len(reg_list)):
df_block = total_df.iloc[int(i*2):int(i*2+2),:]
fm_summ = fm_single_summary(df_block) # for each reg, get its single summary
fm_summ_df.append(fm_summ) # then combined all the summaries
total_summ = pd.concat(fm_summ_df, axis=1)
total_summ = total_summ.fillna('') # fill the nan with empty term
## the column names are positive integers starting from 1
total_summ.columns = [str(i+1) for i in range(len(reg_list))]
## the next 4 rows are for getting the 'Avg. R Square', 'Total Obs.', 'Avg. Obs.'
## can be adjusted if more statistics are needed
r2_list = pd.Series(r2_list, name='Avg. R Square', index=total_summ.columns)
total_obs_list = pd.Series([reg.nobs for reg in reg_list], name='Total Obs.', index=total_summ.columns)
mean_obs_list = pd.Series([reg.time_info['mean'] for reg in reg_list], name='Avg. Obs.', index=total_summ.columns)
total_summ = total_summ.append([r2_list, total_obs_list, mean_obs_list])
return total_summ
print(fm_summary([reg, reg1, reg2]))