統計分析(二)

分析

  • 探討失業率與 GDP 能指引公共政策方向。
  • 兩年度指標資料儲存於 NoSQL (Cassandra)。
  • 兩指標年度範圍並不一致,GDP 資料多於失業率。
  • 失業率與 GDP 反向。
  • 兩指標振幅比例不同,放在同一圖表中,容易解讀失真。
  • 兩指標振幅比例依照各指標振幅(最大 - 最小)計算比例。

分析輸出

失業率與 GDP 相關係數=-0.1948
當 GDP 增長為正 (7) 時,失業率卻提高 (2) 之機率=28.57%
當 GDP 增長為負 (8) 時,失業率卻降低 (4) 之機率=50.00%
GDP 與失業率增長發生乖離 
        (GDP 貢獻與勞動力增長相背離) 之機率=40.00%

分析圖

未調整圖:
指數振幅比例圖:

分析圖

指數成長率圖:
指數成長率累積圖:

程式碼

# -*- coding: utf-8 -*-

from cassandra.cluster import Cluster
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager

class LABOR_INDICES_IDX_02:
	nameOfKeyspace = 'emprogria'
	dbSession = None
	XYZ = None

	def __init__(self):
		pass

	def Open(self):
		dbCluster = Cluster(contact_points=['127.0.0.1'])
		self.dbSession = dbCluster.connect(self.nameOfKeyspace)

	def Close(self):
		if self.dbSession is not None:
			self.dbSession.shutdown()

	def LoadTable(self, SQL):
		dbDataSet = self.dbSession.execute(SQL)
		myData = {}

		for row in dbDataSet:
			myData[row[0]] = row[1]

		_XY = {}
		for key in sorted(myData.keys()):
			_XY[key] = myData[key]
		
		return _XY
		
	def LoadData(self):
		if self.dbSession is None:
			return

		_XY_01 = self.LoadTable("SELECT * FROM LABOR_INDICES_IDX_01")
		_XY_02 = self.LoadTable("SELECT * FROM LABOR_INDICES_IDX_02")
		
		_XY = {}
		
		for key in _XY_01.keys():
			_XY[key] = [_XY_01[key], None]
			
		for key in _XY_02.keys():
			if key in _XY_01.keys():
				_XY[key] = [_XY_01[key], _XY_02[key]]
			else:
				_XY[key] = [None, _XY_02[key]]
		
		_XYZ = []
		for key in sorted(_XY.keys()):
			if (_XY[key][0] is not None) and (_XY[key][1] is not None):
				_XYZ.append([key, _XY[key][0], _XY[key][1]])
				
		self.XYZ = np.array(_XYZ)
			
	def List(self, YYYY=None):
		if self.XYZ is None:
			self.LoadData()

		for y in range(0, len(self.XYZ[:, 0])):
				if (self.XYZ[:, 0][y] == YYYY):
					print "%d: %.2f %.2f" % (self.XYZ[:, 0][y], self.XYZ[:, 1][y], self.XYZ[:, 2][y])
					break
		else:
			for y in range(0, len(self.XYZ[:, 0])):
				print "%d: %.2f %.2f" % (self.XYZ[:, 0][y], self.XYZ[:, 1][y], self.XYZ[:, 2][y])
			
	def Stat(self):
		if self.XYZ is None:
			self.LoadData()

		_XYZ = np.corrcoef(self.XYZ[:, 1], self.XYZ[:, 2])
		print u"失業率與 GDP 相關係數=%.4f" % (_XYZ[0][1])
		
		self.Stat2()
		
	def Stat2(self):
		if self.XYZ is None:
			self.LoadData()

		_X1 = []
		_X2 = []
		
		for i in range(0, len(self.XYZ[:, 0]) - 1):
			_X1.append(self.XYZ[:, 1][i + 1] - self.XYZ[:, 1][i])
			_X2.append(self.XYZ[:, 2][i + 1] - self.XYZ[:, 2][i])

		_GDP = 0.0
		_UEP = 0.0
		
		for i in range(0, len(_X1)):
			if _X2[i] > 0:
				_GDP += 1
				if _X1[i] > 0:
					_UEP += 1
		
		if _GDP > 0:
			print '當 GDP 增長為正 (%d) 時,失業率卻提高 (%d) 之機率=%.2f%%' % (_GDP, _UEP, (_UEP / _GDP) * 100.0)	
		
		_GDP = 0.0
		_UEP = 0.0
		
		for i in range(0, len(_X1)):
			if _X2[i] < 0:
				_GDP += 1
				if _X1[i] < 0: _UEP += 1
		
		if _GDP > 0:
			print '當 GDP 增長為負 (%d) 時,失業率卻降低 (%d) 之機率=%.2f%%' % (_GDP, _UEP, (_UEP / _GDP) * 100.0)	

		_GDP = 0.0
		_UEP = 0.0
		
		for i in range(0, len(_X1)):
			if _X1[i] * _X2[i] > 0:
				_GDP += 1
		
		print 'GDP 與失業率增長發生乖離 (GDP 貢獻與勞動力增長相背離) 之機率=%.2f%%' % ((_GDP / len(_X1)) * 100.0)	

	def PlotLines(self):
		if self.XYZ is None:
			self.LoadData()

		plt.rcParams["font.family"] = 'STHeiti'

		_X1 = plt.plot(self.XYZ[:, 0], self.XYZ[:, 1], linewidth=2.0, color='red', label=u'失業率')
		_X2 = plt.plot(self.XYZ[:, 0], self.XYZ[:, 2], linewidth=2.0, label='GDP')
		
		plt.legend(loc='upper left')
		
		plt.xlabel(u'年份')
		plt.ylabel(u'指數')
		plt.title(u'失業率 - GDP')

#		plt.show()
		plt.savefig(u'失業率-GDP 指數圖.png')
		plt.close()

	def PlotLines2(self):
		if self.XYZ is None:
			self.LoadData()

		_X1_Min = self.XYZ[:, 1].min()
		_X2_Min = self.XYZ[:, 2].min()
		
		_X1_Range = self.XYZ[:, 1].max() - _X1_Min
		_X2_Range = self.XYZ[:, 2].max() - _X2_Min
		
		_X10 = []
		_X20 = []
		
		for i in range(0, len(self.XYZ)):
			_X10.append(1 - (self.XYZ[:, 1][i] - _X1_Min) / _X1_Range)
			_X20.append((self.XYZ[:, 2][i] - _X2_Min) / _X2_Range)
			
		plt.rcParams["font.family"] = 'STHeiti'
			
		_XYZ = np.corrcoef(_X10, _X20)
		_Text = u"相關係數 = %.4f" % (_XYZ[0][1])
		plt.text(self.XYZ[:, 0].mean(), 0.1, _Text, ha='center', va='center')

		_X1 = plt.plot(self.XYZ[:, 0], _X10, linewidth=2.0, color='red', label=u'1 - 失業率')
		_X2 = plt.plot(self.XYZ[:, 0], _X20, linewidth=2.0, label='GDP')
		
		plt.legend(loc='upper left')
		
		plt.xlabel(u'年份')
		plt.ylabel(u'指數振幅比')
		plt.title(u'失業率 - GDP')

#		plt.show()
		plt.savefig(u'失業率-GDP 指數振幅比圖.png')
		plt.close()
		
	def PlotLines3(self):
		if self.XYZ is None:
			self.LoadData()

		_X1 = []
		_X2 = []
		_X = []
		
		for i in range(0, len(self.XYZ[:, 0]) - 1):
			_X.append(self.XYZ[:, 0][i + 1])
			_X1.append(self.XYZ[:, 1][i + 1] - self.XYZ[:, 1][i])
			_X2.append(self.XYZ[:, 2][i + 1] - self.XYZ[:, 2][i])
		
		X1 = np.array(_X1)
		X2 = np.array(_X2)
		X = np.array(_X)
		
		_X1_Min = X1.min()
		_X2_Min = X2.min()
		
		_X1_Range = X1.max() - _X1_Min
		_X2_Range = X2.max() - _X2_Min
		
		_X1 = []
		_X2 = []
		
		for i in range(0, len(_X)):
			_X1.append(1 - (X1[i] - _X1_Min) / _X1_Range)
			_X2.append((X2[i] - _X2_Min) / _X2_Range)
			
		plt.rcParams["font.family"] = 'STHeiti'
			
		_XYZ = np.corrcoef(_X1, _X2)
		_Text = u"相關係數 = %.4f" % (_XYZ[0][1])
		plt.text(X.mean(), 0.1, _Text, ha='center', va='center')

		_X1 = plt.plot(_X, _X1, linewidth=2.0, color='red', label=u'1 - 失業率')
		_X2 = plt.plot(_X, _X2, linewidth=2.0, label='GDP')
		
		plt.legend(loc='upper left')
		
		plt.xlabel(u'年份')
		plt.ylabel(u'指數成長率振幅比')
		plt.title(u'失業率 - GDP')

#		plt.show()
		plt.savefig(u'失業率-GDP 指數成長率振幅比圖.png')
		plt.close()
		
	def PlotLines4(self):
		if self.XYZ is None:
			self.LoadData()

		_X1 = []
		_X2 = []
		_X = []
		
		for i in range(0, len(self.XYZ[:, 0]) - 1):
			_X.append(self.XYZ[:, 0][i + 1])
			_X1.append(self.XYZ[:, 1][i + 1] - self.XYZ[:, 1][i])
			_X2.append(self.XYZ[:, 2][i + 1] - self.XYZ[:, 2][i])
		
		for i in range(1, len(_X)):
			_X1[i] = _X1[i] + _X1[i - 1]
			_X2[i] = _X2[i] + _X2[i - 1]

		X1 = np.array(_X1)
		X2 = np.array(_X2)
		X = np.array(_X)
		
		_X1_Min = X1.min()
		_X2_Min = X2.min()
		
		_X1_Range = X1.max() - _X1_Min
		_X2_Range = X2.max() - _X2_Min
		
		_X1 = []
		_X2 = []
		
		for i in range(0, len(_X)):
			_X1.append(1 - (X1[i] - _X1_Min) / _X1_Range)
			_X2.append((X2[i] - _X2_Min) / _X2_Range)
			
		plt.rcParams["font.family"] = 'STHeiti'
			
		_XYZ = np.corrcoef(_X1, _X2)
		_Text = u"相關係數 = %.4f" % (_XYZ[0][1])
		plt.text(X.mean(), 0.1, _Text, ha='center', va='center')

		_X1 = plt.plot(_X, _X1, linewidth=2.0, color='red', label=u'1 - 失業率')
		_X2 = plt.plot(_X, _X2, linewidth=2.0, label='GDP')
		
		plt.legend(loc='upper left')
		
		plt.xlabel(u'年份')
		plt.ylabel(u'指數成長率累積振幅比')
		plt.title(u'失業率 - GDP')

#		plt.show()
		plt.savefig(u'失業率-GDP 指數成長率累積振幅比圖.png')
		plt.close()
		
	def ListFonts(self):
		for f in font_manager.fontManager.ttflist:
			print f.name
	

if __name__ == '__main__':
	jobTasks = [False, False, True, False, False, False, False]

	emprogria = LABOR_INDICES_IDX_02()

	emprogria.Open()

	if jobTasks[0]:
		emprogria.List()

	if jobTasks[1]:
		emprogria.List(2010)

	if jobTasks[2]:
		emprogria.Stat()

	if jobTasks[3]:
		emprogria.PlotLines()

	if jobTasks[4]:
		emprogria.PlotLines2()

	if jobTasks[5]:
		emprogria.PlotLines3()

	if jobTasks[6]:
		emprogria.PlotLines4()

	emprogria.Close()
ċ
LABOR_INDICES_IDX_01_02.py
(7k)
李智,
2015年5月26日 上午2:09