DuckDB

mamba install duckdb-python

https://duckdb.org/docs/api/python/overview.html 

Read a parquet from s3

import boto3

import duckdb

con = duckdb.connect()

con.install_extension("httpfs")

con.load_extension("httpfs")

session = boto3.session.Session()

credentials = session.get_credentials()

con.execute(f"""

SET s3_access_key_id={credentials.access_key};

SET s3_secret_access_key={credentials.secret_key};

SELECT * FROM read_parquet("s3://dtn-dsci-prd-sia/data_fusion/processed/data_fusion_customers_out_sample.parquet")

""")