讀取 Excel 檔轉換為 MongoDB 後再處理

程式碼

# -*- coding: utf-8 -*-

from xlrd import open_workbook
from pymongo import MongoClient


def getSegment(xlsx, rowStart, rowNum, colStart, colNum):
    global dbCursor

    fieldName = xlsx.cell(rowStart - 1, 0).value
    fieldName = fieldName.strip('\n')
    fieldName = "".join(fieldName.split())

    for rowData in range(rowStart, rowStart + rowNum):
        fieldYear = xlsx.cell(rowData, 0).value
        if type(fieldYear) is not int:
            fieldYear = int(fieldYear)

        for colData in range(colStart, colStart + colNum):
            fieldCity = xlsx.cell(1, colData).value

            fieldValue = xlsx.cell(rowData, colData).value
            if type(fieldValue) is not float:
                fieldValue = 0.00

            dbSQL = {}

            dbSQL['YEAR'] = fieldYear
            dbSQL['CITY'] = fieldCity
            dbSQL['FNAME'] = fieldName
            dbSQL['FVALUE'] = fieldValue

            dbCursor.MYDATA3.insert(dbSQL)


def query1(rowList):
    global dbCursor

    fieldNameList = [u'年份', u'城市', u'欄位', u'值']
    for col in fieldNameList:
        print '%s\t' % (col.encode('big5')),

    print

    for row in rowList:
        print '%s\t%s\t%d\t%12.2f' % (row['FNAME'].encode('big5'),
                                      row['CITY'].encode('big5'),
                                      row['YEAR'], row['FVALUE'])


def query2(rowList):
    global dbCursor

    fieldNameList = [u'欄位', u'平均值']
    for col in fieldNameList:
        print '%s\t' % (col.encode('big5')),

    print

    for row in rowList['result']:
        print '%s\t%12.2f' % (row['_id']['FNAME'].encode('big5'), row['FVALUE'])


def query3(rowList):
    global dbCursor

    fieldNameList = [u'年份', u'欄位', u'平均值']
    for col in fieldNameList:
        print '%s\t' % (col.encode('big5')),

    print

    for row in rowList['result']:
        print '%d\t%s\t%12.2f' % (row['_id']['YEAR'],
                                  row['_id']['FNAME'].encode('big5'),
                                  row['FVALUE'])


def getStat(id):
    if id == 1:
        # 詳細
        rowList = dbCursor.MYDATA3.find().sort('FNAME, CITY, YEAR')
        query1(rowList)
    elif id == 2:
        # 各統計量平均值
        dbSQL = [
                 {'$group':{'_id':{'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}},
                 {"$sort": {'FNAME':1}}
                ]
        rowList = dbCursor.MYDATA3.aggregate(dbSQL)
        query2(rowList)
    elif id == 3:
        # 各年度所有統計量平均值
        dbSQL = [
                 {'$group':{'_id':{'YEAR':'$YEAR', 'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}},
                 {'$sort': {'YEAR':1, 'FNAME':1}}
                ]
        rowList = dbCursor.MYDATA3.aggregate(dbSQL)
        query3(rowList)


def getStatByCity(cityName, id):
    if id == 1:
        # 詳細
        rowList = dbCursor.MYDATA3.find({'CITY': cityName}).sort('FNAME, CITY, YEAR')
        query1(rowList)
    elif id == 2:
        # 各統計量平均值
        dbSQL = [
                 {'$match':{'CITY':cityName}},
                 {'$group':{'_id':{'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}},
                 {"$sort": {'FNAME':1}}
                ]
        rowList = dbCursor.MYDATA3.aggregate(dbSQL)
        query2(rowList)
    elif id == 3:
        # 各年度所有統計量平均值
        dbSQL = [
                 {'$match':{'CITY':cityName}},
                 {'$group':{'_id':{'YEAR':'$YEAR', 'FNAME':'$FNAME'}, 'FVALUE':{'$avg':'$FVALUE'}}},
                 {'$sort':{'YEAR':1, 'FNAME':1}}
                ]
        rowList = dbCursor.MYDATA3.aggregate(dbSQL)
        query3(rowList)


#-- main --#
dbConn = MongoClient()
dbCursor = dbConn['test']

taskControl = '002'
if taskControl[0] == '1':
    filePath = '/Users/Emprogria/Documents/Birdy'
    xlsxFile = '3.xlsx'

    wb = open_workbook('%s/%s' % (filePath, xlsxFile))
    xlsx = wb.sheet_by_index(0)

    dbCursor.MYDATA3.drop()
    for i in range(3, 394, 17):
        getSegment(xlsx, i, 16, 3, 22)

idQuery = int(taskControl[1])
if  idQuery > 0:
    getStat(idQuery)

idQuery = int(taskControl[2])
if  idQuery > 0:
    getStatByCity(u'臺北市', idQuery)

dbConn.close()