DuckDB
mamba install duckdb-python
https://duckdb.org/docs/api/python/overview.htmlÂ
Read a parquet from s3
import boto3
import duckdb
con = duckdb.connect()
con.install_extension("httpfs")
con.load_extension("httpfs")
session = boto3.session.Session()
credentials = session.get_credentials()
con.execute(f"""
SET s3_access_key_id={credentials.access_key};
SET s3_secret_access_key={credentials.secret_key};
SELECT * FROM read_parquet("s3://dtn-dsci-prd-sia/data_fusion/processed/data_fusion_customers_out_sample.parquet")
""")