讀取 Excel 檔轉換為 MongoDB 後再處理

程式碼

# -*- coding: utf-8 -*- from xlrd import open_workbook from pymongo import MongoClient def getSegment(xlsx, rowStart, rowNum, colStart, colNum): global dbCursor fieldName = xlsx.cell(rowStart - 1, 0).value fieldName = fieldName.strip('\n') fieldName = "".join(fieldName.split()) for rowData in range(rowStart, rowStart + rowNum): fieldYear = xlsx.cell(rowData, 0).value if type(fieldYear) is not int: fieldYear = int(fieldYear) for colData in range(colStart, colStart + colNum): fieldCity = xlsx.cell(1, colData).value fieldValue = xlsx.cell(rowData, colData).value if type(fieldValue) is not float: fieldValue = 0.00 dbSQL = {} dbSQL['YEAR'] = fieldYear dbSQL['CITY'] = fieldCity dbSQL['FNAME'] = fieldName dbSQL['FVALUE'] = fieldValue dbCursor.MYDATA3.insert(dbSQL) def query1(rowList): global dbCursor fieldNameList = [u'年份', u'城市', u'欄位', u'值'] for col in fieldNameList: print '%s\t' % (col.encode('big5')), print for row in rowList: print '%s\t%s\t%d\t%12.2f' % (row['FNAME'].encode('big5'), row['CITY'].encode('big5'), row['YEAR'], row['FVALUE']) def query2(rowList): global dbCursor fieldNameList = [u'欄位', u'平均值'] for col in fieldNameList: print '%s\t' % (col.encode('big5')), print for row in rowList['result']: print '%s\t%12.2f' % (row['_id']['FNAME'].encode('big5'), row['FVALUE']) def query3(rowList): global dbCursor fieldNameList = [u'年份', u'欄位', u'平均值'] for col in fieldNameList: print '%s\t' % (col.encode('big5')), print for row in rowList['result']: print '%d\t%s\t%12.2f' % (row['_id']['YEAR'], row['_id']['FNAME'].encode('big5'), row['FVALUE']) def getStat(id): if id == 1: # 詳細 rowList = dbCursor.MYDATA3.find().sort('FNAME, CITY, YEAR') query1(rowList) elif id == 2: # 各統計量平均值 dbSQL = [ {'$group':{'_id':{'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}}, {"$sort": {'FNAME':1}} ] rowList = dbCursor.MYDATA3.aggregate(dbSQL) query2(rowList) elif id == 3: # 各年度所有統計量平均值 dbSQL = [ {'$group':{'_id':{'YEAR':'$YEAR', 'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}}, {'$sort': {'YEAR':1, 'FNAME':1}} ] rowList = dbCursor.MYDATA3.aggregate(dbSQL) query3(rowList) def getStatByCity(cityName, id): if id == 1: # 詳細 rowList = dbCursor.MYDATA3.find({'CITY': cityName}).sort('FNAME, CITY, YEAR') query1(rowList) elif id == 2: # 各統計量平均值 dbSQL = [ {'$match':{'CITY':cityName}}, {'$group':{'_id':{'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}}, {"$sort": {'FNAME':1}} ] rowList = dbCursor.MYDATA3.aggregate(dbSQL) query2(rowList) elif id == 3: # 各年度所有統計量平均值 dbSQL = [ {'$match':{'CITY':cityName}}, {'$group':{'_id':{'YEAR':'$YEAR', 'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}}, {'$sort':{'YEAR':1, 'FNAME':1}} ] rowList = dbCursor.MYDATA3.aggregate(dbSQL) query3(rowList) #-- main --# dbConn = MongoClient() dbCursor = dbConn['test'] taskControl = '002' if taskControl[0] == '1': filePath = '/Users/Emprogria/Documents/Birdy' xlsxFile = '3.xlsx' wb = open_workbook('%s/%s' % (filePath, xlsxFile)) xlsx = wb.sheet_by_index(0) dbCursor.MYDATA3.drop() for i in range(3, 394, 17): getSegment(xlsx, i, 16, 3, 22) idQuery = int(taskControl[1]) if idQuery > 0: getStat(idQuery) idQuery = int(taskControl[2]) if idQuery > 0: getStatByCity(u'臺北市', idQuery) dbConn.close()