import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
desired_width=320
pd.set_option('display.width', desired_width)
np.set_printoptions(linewidth=desired_width)
pd.set_option('display.max_columns',100)
url = 'https://raw.githubusercontent.com/nyp-sit/data/master/Better_Life.csv'
df = pd.read_csv(url)
print(df.head())
LOCATION Country INDICATOR Indicator MEASURE Measure INEQUALITY Inequality
0 AUS Australia JE_LMIS Labour market insecurity L Value TOT Total
1 AUT Austria JE_LMIS Labour market insecurity L Value TOT Total
2 BEL Belgium JE_LMIS Labour market insecurity L Value TOT Total
3 CAN Canada JE_LMIS Labour market insecurity L Value TOT Total
4 CZE Czech Republic JE_LMIS Labour market insecurity L Value TOT Total
life_df = df.loc[(df['Indicator'] == 'Life satisfaction') & (df['Inequality'] == 'Total')]
print(life_df.head())
LOCATION Country INDICATOR Indicator MEASURE Measure INEQUALITY Inequality
2859 AUS Australia SW_LIFS Life satisfaction L Value TOT Total
2860 AUT Austria SW_LIFS Life satisfaction L Value TOT Total
2861 BEL Belgium SW_LIFS Life satisfaction L Value TOT Total
2862 CAN Canada SW_LIFS Life satisfaction L Value TOT Total
2863 CZE Czech Republic SW_LIFS Life satisfaction L Value TOT Total
url1 = 'https://raw.githubusercontent.com/nyp-sit/data/master/WEO_Data.csv'
gdp_df = pd.read_csv(url1, encoding='latin-1', thousands=',')
print(gdp_df.head())
Country Subject Descriptor Units Scale 2015 Estimates Start After
0 Afghanistan Gross domestic product per capita, current prices U.S. dollars Units 599.994 2013.0
1 Albania Gross domestic product per capita, current prices U.S. dollars Units 3995.380 2010.0
2 Algeria Gross domestic product per capita, current prices U.S. dollars Units 4318.140 2014.0
3 Angola Gross domestic product per capita, current prices U.S. dollars Units 4100.320 2014.0
4 Antigua and Barbuda Gross domestic product per capita, current prices U.S. dollars Units 14414.300 2011.0
pdf = pd.merge(gdp_df[['Country', '2015']], life_df[['Country', 'Value']] , on='Country', how='inner')
pdf.rename(columns={'Value':'Life satisfaction','2015':'GDP per capita'}, inplace=True)
print(pdf.head())
Country GDP per capita Life satisfaction
0 Australia 50961.87 7.3
1 Austria 43724.03 7.0
2 Belgium 40106.63 6.9
3 Brazil 8670.00 6.6
4 Canada 43331.96 7.3
import matplotlib.pyplot as plt
ax1=pdf.plot.scatter(x = 'GDP per capita', y = 'Life satisfaction', c='blue', ylim=[0,10])
plt.show()
X = np.c_[pdf['GDP per capita']]
y = np.c_[pdf['Life satisfaction']]
lin_reg_model = sklearn.linear_model.LinearRegression()
lin_reg_model.fit(X, y)
X_new = [[22587]]
print(lin_reg_model.predict(X_new))
[[6.24626326]]