JSON CSV XML

import csv max_fields_per_row = 0 field_lengths = [0] * 200 # 200 is just a big number more than the expected fields per row with open('yourfile.txt', 'r', encoding='utf-8', errors='replace', newline='') as fh: # csv breaks if it encounters a null character, hence the generator reader = csv.reader((l.replace('\0', ' ') for l in fh), delimiter=',') for row in reader: if len(row) > max_fields_per_row: max_fields_per_row = len(row) for index, field in enumerate(row): if len(field) > field_lengths[index]: field_lengths[index] = len(field) print(max_fields_per_row) print(field_lengths[:max_fields_per_row])

import csv

# Open file

f = open('soil_moisture.csv', 'rU')

csvfile = csv.reader(f, delimiter=',')

# Skip first line (header)

headerline = next(csvfile)

# Iterate through file

for line in csvfile:

print('First col = ',line[0])

f.close()

def load_csv_data(stream): result = [] for row in csv.reader(stream): if len(row) != 2: raise ValueError('Rows must have two entries') point = float(row[0]), float(row[1]) result.append(point) return resul

data = '1.0,2.5\n3.5,4.1\n7.5,2.2\n6.9,1.1\n' rows = load_csv_data(io.StringIO(data)) for i, row in enumerate(rows): print('Row %d is %r' % (i, row))

Here, I've rewritten the earlier function as a generator by removing the result list and changing the result.append call into a yield expression:

# Pythondef load_csv_data_streaming(stream): for row in csv.reader(stream): if len(row) != 2: raise ValueError('Rows must have two entries') point = float(row[0]), float(row[1]) yield point

Calling a generator function immediately returns an iterator and doesn't actually execute the function's body. Each time the iterator is advanced (e.g., by the nextbuilt-in function), the generator function will execute until the next yield statement is reached or the function exits. Python will interleave the generator's execution with the execution of the code that consumes the generator, in the same way you'd expect cooperative threads to work together.

The generator version of a function is often a drop-in replacement for the version that returns a list. Python's looping constructs interact with any type of iterable (lists, dictionaries, iterators, etc.) in the same way. The only change required to use the generator function is the call to load_csv_data_streaming:

# Python data = '1.0,2.5\n3.5,4.1\n7.5,2.2\n6.9,1.1\n' rows = load_csv_data_streaming(io.StringIO(data)) for i, row in enumerate(rows): print('Row %d is %r' % (i, row))

Panda

import pandas as pd

df = pd.DataFrame.from_csv('THECSV', sep=';')

df['valid_name'] = df.apply(lambda x: '.'.join([x['first'], x ['last']]))

df0 = df.ix[(df['valid_name'].lower() != df['user_name'].lower()), :]

df0.to_csv('out.csv')

import csv with open('...') as in_f, open('...', 'w') as out_f: reader = csv.DictReader(in_f, delimiter=';') writer = csv.writer(out_f) for record in reader: if record['UserName'].lower() == '{}.{}'.format(record['FirstName'], record['LastName']).lower(): writer.writerow([record['UserName'], record['FirstName'], record['LastName']])

JSON

http://jsonlint.com/

https://news.ycombinator.com/item?id=13090604

https://news.ycombinator.com/item?id=7895076 XML tools on bottom

If you just need pretty printing and don't have jq installed, you can use the python command line with the built in JSON module:

wget http://reddit.com/.json | python -mjson.tool

Java Json

https://habrahabr.ru/company/luxoft/blog/280782/

http://engineering.doubledutch.me/h/i/275207067-the-curious-case-of-the-very-lazy-but-blazingly-fast-json-parser

http://stedolan.github.io/jq/ command-line json processr written in C

http://jgrep.org/ written in RUBY

http://goessner.net/articles/JsonPath/ JS and Ruby

https://github.com/qnectar/pipeline

https://github.com/jb55/dot-lens

JSON editor in browser

https://github.com/kevinburke/hulk

https://github.com/josdejong/jsoneditor/

JSON

http://codular.com/json

http://www.jsonviewer.com/

http://freeformatter.com/ JSON TOOLS

http://bioinformatics.knowledgeblog.org/2011/06/21/making-use-of-json-a-primer-for-small-time-scripters/

Read/write json script

import json with open('data.json') as input_file: data = json.load(input_file)

data is now an object that you can manipulate as you normally would with any Python object:

print(data['cat']['lives']) output: 9

To write a Python dictionary to a JSON file, you can use the dump method:

import json data = {'dog': {'legs': 4, 'breeds': ['Border Collie', 'Pit Bull', 'Huskie']}, 'cat': {'legs': 4, 'breeds': ['Siamese', 'Persian', 'Sphynx']}}with open('data.json', 'w') as output_file: json.dump(data, output_file, indent=4)

----------------------

import simplejson import json def put(data, filename): try: jsondata = simplejson.dumps(data, indent=4, skipkeys=True, sort_keys=True) fd = open(filename, 'w') fd.write(jsondata) fd.close() except: print 'ERROR writing', filename pass def get(filename): returndata = {} try: fd = open(filename, 'r') text = fd.read() fd.close() returndata = json.read(text) # Hm. this returns unicode keys... #returndata = simplejson.loads(text) except: print 'COULD NOT LOAD:', filename return returndata if __name__ == '__main__': o = get(sys.argv[1]); if o: put(o, sys.argv[1]);

========================

#!/usr/bin/env python import json import sys def main(data): json_data = json.loads(data) if not json_data: print 'no data' sys.exit(1) if not isinstance(json_data, list): json_data = [json_data] def print_keys(obj, indentation=''): for key, value in obj.iteritems(): if isinstance(value, list): key = '%s[]' % key print indentation, key if isinstance(value, dict): print_keys(value, indentation + ' ') elif isinstance(value, list): print_keys(value[0], indentation + ' ') for item in json_data: print_keys(item) if __name__ == '__main__': main(sys.stdin.read())

Page updated

Google Sites

Report abuse