import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport sklearn.linear_modeldesired_width=320pd.set_option('display.width', desired_width)np.set_printoptions(linewidth=desired_width)pd.set_option('display.max_columns',100)url = 'https://raw.githubusercontent.com/nyp-sit/data/master/Better_Life.csv'df = pd.read_csv(url)print(df.head())LOCATION Country INDICATOR Indicator MEASURE Measure INEQUALITY Inequality
0 AUS Australia JE_LMIS Labour market insecurity L Value TOT Total
1 AUT Austria JE_LMIS Labour market insecurity L Value TOT Total
2 BEL Belgium JE_LMIS Labour market insecurity L Value TOT Total
3 CAN Canada JE_LMIS Labour market insecurity L Value TOT Total
4 CZE Czech Republic JE_LMIS Labour market insecurity L Value TOT Total
life_df = df.loc[(df['Indicator'] == 'Life satisfaction') & (df['Inequality'] == 'Total')]print(life_df.head())LOCATION Country INDICATOR Indicator MEASURE Measure INEQUALITY Inequality
2859 AUS Australia SW_LIFS Life satisfaction L Value TOT Total
2860 AUT Austria SW_LIFS Life satisfaction L Value TOT Total
2861 BEL Belgium SW_LIFS Life satisfaction L Value TOT Total
2862 CAN Canada SW_LIFS Life satisfaction L Value TOT Total
2863 CZE Czech Republic SW_LIFS Life satisfaction L Value TOT Total
url1 = 'https://raw.githubusercontent.com/nyp-sit/data/master/WEO_Data.csv'gdp_df = pd.read_csv(url1, encoding='latin-1', thousands=',')print(gdp_df.head())Country Subject Descriptor Units Scale 2015 Estimates Start After
0 Afghanistan Gross domestic product per capita, current prices U.S. dollars Units 599.994 2013.0
1 Albania Gross domestic product per capita, current prices U.S. dollars Units 3995.380 2010.0
2 Algeria Gross domestic product per capita, current prices U.S. dollars Units 4318.140 2014.0
3 Angola Gross domestic product per capita, current prices U.S. dollars Units 4100.320 2014.0
4 Antigua and Barbuda Gross domestic product per capita, current prices U.S. dollars Units 14414.300 2011.0
pdf = pd.merge(gdp_df[['Country', '2015']], life_df[['Country', 'Value']] , on='Country', how='inner')pdf.rename(columns={'Value':'Life satisfaction','2015':'GDP per capita'}, inplace=True)print(pdf.head())Country GDP per capita Life satisfaction
0 Australia 50961.87 7.3
1 Austria 43724.03 7.0
2 Belgium 40106.63 6.9
3 Brazil 8670.00 6.6
4 Canada 43331.96 7.3
import matplotlib.pyplot as pltax1=pdf.plot.scatter(x = 'GDP per capita', y = 'Life satisfaction', c='blue', ylim=[0,10])plt.show()X = np.c_[pdf['GDP per capita']]y = np.c_[pdf['Life satisfaction']]lin_reg_model = sklearn.linear_model.LinearRegression()lin_reg_model.fit(X, y)X_new = [[22587]]print(lin_reg_model.predict(X_new))[[6.24626326]]