Intake
Python
from intake import open_catalog
cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/atmosphere.yaml")
cat = open_catalog("https://raw.githubusercontent.com/aaronspring/remote_climate_data/master/master.yaml")
cat = open_catalog("s3://BUCKET/test.yaml")
cat = open_catalog("test.yaml")
list(cat)
cat.walk()
cat.entry.read()
intake-stac
from intake import open_stac_catalog
catalog_url = "https://www.planet.com/data/stac/catalog.json"
cat = open_stac_catalog(catalog_url)
collection = cat["planet-disaster-data"]["hurricane-harvey"]["hurricane-harvey-0831"]
item = collection["Houston-East-20170831-103f-100d-0f4f-RGB"]
entry = item["thumbnail"]
YAML
test.yaml:
metadata:
version: 1
sources:
entry:
description: description
driver: zarr
args:
urlpath: s3://BUCKET/file.zarr
consolidated: True
test.yaml:
metadata:
version: 1
sources:
entry:
description: description
driver: pattern_cat
args:
urlpath: s3://BUCKET/{datetime:%Y%m%d}.zarr
driver: zarr
driver_kwargs:
consolidated: True
listable: false
metadata:
contributor: contributor
Geoparquet
metadata:
version: 1
entry:
driver: pattern_cat
metadata:
source: source
contributor: contributor
description: description
args:
driver: geoparquet
urlpath: s3://BUCKET/year_{year}_month_{month}.parquet
driver_kwargs:
use_fsspec: True
cat.entry.get_entry_kwarg_sets()
Parquet
entry:
driver: pattern_cat
metadata:
source: source
contributor: contributor
description: description
args:
driver: parquet
driver_kwargs:
engine: pyarrow-dataset
urlpath: s3://BUCKET/year_{year}_month_{month}.parquet
CSV
entry:
driver: pattern_cat
metadata:
source: source
contributor: contributor
description: description
args:
listable: false
recursive_glob: try
driver: csv
urlpath: s3://BUCKET/year_{year}_month_{month}.csv
Zarr
entry:
description: description
metadata:
source: source
contributor: contributor
driver: pattern_cat
args:
urlpath: s3://BUCKET/{datetime:%Y%m%d}.zarr
driver: zarr
driver_kwargs:
consolidated: True
listable: false
sources:
GFS_Global_0p25deg_20210913_1800.grib2:
args:
chunks: {}
urlpath: https://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg/GFS_Global_0p25deg_20210913_1800.grib2
xarray_kwargs: {"engine": "netcdf4"}
description: THREDDS data
driver: intake_xarray.opendap.OpenDapSource
metadata:
catalog_dir: null
JSON
from intake.source.jsonfiles import JSONFileSource