import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
#create a pandas data frame
df = pd.DataFrame({'one': [-1, np.nan, 2.5],
'two': ['foo', 'bar', 'baz'],
'three': [True, False, True]},
index=list('abc'))
#create a parquet table from pandas
table = pa.Table.from_pandas(df)
#write parquet, single file only
pq.write_table(table, 'c:/temp/example.parquet')
#pandas can write parquet as well
df.to_parquet('c:/temp/example2.parquet')
#read parquet
table2 = pq.read_table('c:/temp/example.parquet')
#write partitione (multiple files)
pq.write_to_dataset(table, root_path='dataset_name',
partition_cols=['one', 'two'])