Cassandra 與 XML

Cassandra 資料結構回首頁

CREATE TABLE smarthome.productinfo (
    id uuid PRIMARY KEY,
    author text,
    cat set<text>,
    comments set<text>,
    description set<text>,
    features set<text>,
    keywords set<text>,
    name text,
    sku text,
    url text
) WITH bloom_filter_fp_chance = 0.01
    AND caching = '{"keys":"ALL", "rows_per_partition":"NONE"}'
    AND comment = ''
    AND compaction = {'min_threshold': '4', 
'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy',
'max_threshold': '32'} AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'} AND dclocal_read_repair_chance = 0.1 AND default_time_to_live = 0 AND gc_grace_seconds = 864000 AND max_index_interval = 2048 AND memtable_flush_period_in_ms = 0 AND min_index_interval = 128 AND read_repair_chance = 0.0 AND speculative_retry = '99.0PERCENTILE';

XML 資料結構

<add> <doc> <field name="id"></field> <field name="author">瑞軒科技</field> <field name="comments">Seeing Better World ~ 瑞軒科技,看見更美好的視界~瑞軒的本質是「TV」,我們是一家以「顯示器」為主的公司。</field> <field name="comments">創立於 1994 年的瑞軒科技,是由一群在顯示器產業中深耕已久的優質團隊組成,初期以代工起家,至今,為年營收約 580 億、全球員工數超過 3500 人的上市公司。</field> <field name="comments">2009 年我們位居台灣製造業第 42 名,2011 年榮獲數位時代雜誌評比亞洲科技前 100 強企業,2012 年更是取得天下雜誌家電業第1名的殊榮。</field> <field name="comments">即使在早期專業代工時期,瑞軒就一直以「成為 No.1 顯示器製造商」為使命。</field> <field name="comments">我們的產品線由廣而深且物美價廉,超越 500 項已認證的專利,功能、設計與技術年年推陳出新。</field> <field name="comments">近年來的 LED 液晶電視、智慧型電視、3D 電視等產品,更榮獲 CES 消費性電子大展與工業設計獎 IF 及 RedDot Design 的肯定。</field> <field name="comments">在紮實的研發能力及供應鏈的貫穿整合管理,有效的降低成本並能同時鞏固品質的基礎下,我們的客戶都是來自世界各地引領科技的顯示器品牌大廠,成為 No.1 顯示器製造商。</field> <field name="features">OEM/ODM Display MFG</field> <field name="features">LCD Monitors / TVs</field> <field name="features">Audio Devices</field> <field name="features">Smart Remote Control</field> <field name="sku">智慧電視</field> <field name="name">智慧電視</field> <field name="description">智慧電視生產 組裝</field> <field name="url">http://www.amtran.com.tw/</field> <field name="keywords">多媒體分享</field> <field name="cat">社交互動</field> </doc> </add> 

程式碼

# -*- coding: utf-8 -*-

import xml.dom.minidom
from cassandra.cluster import Cluster
import sys


class SmartHome(object):
    session = None

    def connectDB(self, nodes):
        cluster = Cluster(nodes)
        self.session = cluster.connect()

    def insertDB(self, cql):
        print cql
        self.session.execute(cql)

    def closeDB(self):
        self.session.cluster.shutdown()

    def getString(self, data):
        ret = ','.join(data)
        return (ret)

    def loadXML(self, nameFile):
        cqlTemplate = "INSERT INTO SmartHome.ProductInfo(id, author, comments, sku, name, description, url, keywords, features, cat) "    \
              "VALUES(uuid(), '%s', {%s}, '%s', '%s', {%s}, '%s', {%s}, {%s}, {%s})"

        (author, sku, name, url) = ('', '', '', '')
        (comments, description, keywords, features, cat) = ([], [], [], [], [])

        DOMTree = xml.dom.minidom.parse(nameFile)
        xmlCollection = DOMTree.documentElement
        xmlProductInfo = xmlCollection.getElementsByTagName("field")

        for productInfo in xmlProductInfo:
            fieldName = productInfo.getAttribute("name")
            if fieldName == 'author':
                if productInfo.childNodes:
                    author = productInfo.childNodes[0].data
                else:
                    pass
            elif fieldName == 'sku':
                if productInfo.childNodes:
                    sku = productInfo.childNodes[0].data
                else:
                    pass
            elif fieldName == 'name':
                if productInfo.childNodes:
                    name = productInfo.childNodes[0].data
                else:
                    pass
            elif fieldName == 'url':
                if productInfo.childNodes:
                    url = productInfo.childNodes[0].data
                else:
                    pass
            elif fieldName == 'comments':
                if productInfo.childNodes:
                    comments.append("'%s'" % (productInfo.childNodes[0].data))
                else:
                    pass
            elif fieldName == 'description':
                if productInfo.childNodes:
                    description.append("'%s'" % (productInfo.childNodes[0].data))
                else:
                    pass
            elif fieldName == 'keywords':
                if productInfo.childNodes:
                    keywords.append("'%s'" % (productInfo.childNodes[0].data))
                else:
                    pass
            elif fieldName == 'features':
                if productInfo.childNodes:
                    keywords.append("'%s'" % (productInfo.childNodes[0].data))
                else:
                    pass
            elif fieldName == 'cat':
                if productInfo.childNodes:
                    cat.append("'%s'" % (productInfo.childNodes[0].data))
                else:
                    pass

        cql = cqlTemplate % (author, self.getString(comments), sku, name, self.getString(description),
                             url, self.getString(keywords), self.getString(features), self.getString(cat))
        return (cql)


def main(listHost, nameFile):
    clientSmartHome = SmartHome()
    clientSmartHome.connectDB(listHost)

    cql = clientSmartHome.loadXML(nameFile)
    clientSmartHome.insertDB(cql)

    clientSmartHome.closeDB()

if __name__ == "__main__":
    if len(sys.argv) > 1:
        main(['127.0.0.1'], sys.argv[1])
ċ
Cassandra-XML.py
(3k)
李智,
2015年4月6日 上午9:13
ċ
Cassandra-XML.xml
(2k)
李智,
2015年4月6日 上午9:14