Cassandra 與 XML

Cassandra 資料結構回首頁

CREATE TABLE smarthome.productinfo ( id uuid PRIMARY KEY, author text, cat set<text>, comments set<text>, description set<text>, features set<text>, keywords set<text>, name text, sku text, url text ) WITH bloom_filter_fp_chance = 0.01 AND caching = '{"keys":"ALL", "rows_per_partition":"NONE"}' AND comment = '' AND compaction = {'min_threshold': '4',

'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy',

'max_threshold': '32'} AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'} AND dclocal_read_repair_chance = 0.1 AND default_time_to_live = 0 AND gc_grace_seconds = 864000 AND max_index_interval = 2048 AND memtable_flush_period_in_ms = 0 AND min_index_interval = 128 AND read_repair_chance = 0.0 AND speculative_retry = '99.0PERCENTILE';

XML 資料結構

<add> <doc> <field name="id"></field> <field name="author">瑞軒科技</field> <field name="comments">Seeing Better World ~ 瑞軒科技,看見更美好的視界~瑞軒的本質是「TV」,我們是一家以「顯示器」為主的公司。</field> <field name="comments">創立於 1994 年的瑞軒科技,是由一群在顯示器產業中深耕已久的優質團隊組成,初期以代工起家,至今,為年營收約 580 億、全球員工數超過 3500 人的上市公司。</field> <field name="comments">2009 年我們位居台灣製造業第 42 名,2011 年榮獲數位時代雜誌評比亞洲科技前 100 強企業,2012 年更是取得天下雜誌家電業第1名的殊榮。</field> <field name="comments">即使在早期專業代工時期,瑞軒就一直以「成為 No.1 顯示器製造商」為使命。</field> <field name="comments">我們的產品線由廣而深且物美價廉,超越 500 項已認證的專利,功能、設計與技術年年推陳出新。</field> <field name="comments">近年來的 LED 液晶電視、智慧型電視、3D 電視等產品,更榮獲 CES 消費性電子大展與工業設計獎 IF 及 RedDot Design 的肯定。</field> <field name="comments">在紮實的研發能力及供應鏈的貫穿整合管理,有效的降低成本並能同時鞏固品質的基礎下,我們的客戶都是來自世界各地引領科技的顯示器品牌大廠,成為 No.1 顯示器製造商。</field> <field name="features">OEM/ODM Display MFG</field> <field name="features">LCD Monitors / TVs</field> <field name="features">Audio Devices</field> <field name="features">Smart Remote Control</field> <field name="sku">智慧電視</field> <field name="name">智慧電視</field> <field name="description">智慧電視生產 組裝</field> <field name="url">http://www.amtran.com.tw/</field> <field name="keywords">多媒體分享</field> <field name="cat">社交互動</field> </doc> </add>

程式碼

# -*- coding: utf-8 -*- import xml.dom.minidom from cassandra.cluster import Cluster import sys class SmartHome(object): session = None def connectDB(self, nodes): cluster = Cluster(nodes) self.session = cluster.connect() def insertDB(self, cql): print cql self.session.execute(cql) def closeDB(self): self.session.cluster.shutdown() def getString(self, data): ret = ','.join(data) return (ret) def loadXML(self, nameFile): cqlTemplate = "INSERT INTO SmartHome.ProductInfo(id, author, comments, sku, name, description, url, keywords, features, cat) " \ "VALUES(uuid(), '%s', {%s}, '%s', '%s', {%s}, '%s', {%s}, {%s}, {%s})" (author, sku, name, url) = ('', '', '', '') (comments, description, keywords, features, cat) = ([], [], [], [], []) DOMTree = xml.dom.minidom.parse(nameFile) xmlCollection = DOMTree.documentElement xmlProductInfo = xmlCollection.getElementsByTagName("field") for productInfo in xmlProductInfo: fieldName = productInfo.getAttribute("name") if fieldName == 'author': if productInfo.childNodes: author = productInfo.childNodes[0].data else: pass elif fieldName == 'sku': if productInfo.childNodes: sku = productInfo.childNodes[0].data else: pass elif fieldName == 'name': if productInfo.childNodes: name = productInfo.childNodes[0].data else: pass elif fieldName == 'url': if productInfo.childNodes: url = productInfo.childNodes[0].data else: pass elif fieldName == 'comments': if productInfo.childNodes: comments.append("'%s'" % (productInfo.childNodes[0].data)) else: pass elif fieldName == 'description': if productInfo.childNodes: description.append("'%s'" % (productInfo.childNodes[0].data)) else: pass elif fieldName == 'keywords': if productInfo.childNodes: keywords.append("'%s'" % (productInfo.childNodes[0].data)) else: pass elif fieldName == 'features': if productInfo.childNodes: keywords.append("'%s'" % (productInfo.childNodes[0].data)) else: pass elif fieldName == 'cat': if productInfo.childNodes: cat.append("'%s'" % (productInfo.childNodes[0].data)) else: pass cql = cqlTemplate % (author, self.getString(comments), sku, name, self.getString(description), url, self.getString(keywords), self.getString(features), self.getString(cat)) return (cql) def main(listHost, nameFile): clientSmartHome = SmartHome() clientSmartHome.connectDB(listHost) cql = clientSmartHome.loadXML(nameFile) clientSmartHome.insertDB(cql) clientSmartHome.closeDB() if __name__ == "__main__": if len(sys.argv) > 1: main(['127.0.0.1'], sys.argv[1])