Intake

Python

from intake import open_catalog


cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/atmosphere.yaml")

cat = open_catalog("https://raw.githubusercontent.com/aaronspring/remote_climate_data/master/master.yaml")

cat = open_catalog("s3://BUCKET/test.yaml")


cat = open_catalog("test.yaml")


list(cat)


cat.walk()


cat.entry.read()

intake-stac

from intake import open_stac_catalog


catalog_url = "https://www.planet.com/data/stac/catalog.json"

cat = open_stac_catalog(catalog_url)

collection = cat["planet-disaster-data"]["hurricane-harvey"]["hurricane-harvey-0831"]

item = collection["Houston-East-20170831-103f-100d-0f4f-RGB"]

entry = item["thumbnail"]

YAML

test.yaml:

metadata:

version: 1

sources:

entry:

description: description

driver: zarr

args:

urlpath: s3://BUCKET/file.zarr

consolidated: True

test.yaml:

metadata:

version: 1

sources:

entry:

description: description

driver: pattern_cat

args:

urlpath: s3://BUCKET/{datetime:%Y%m%d}.zarr

driver: zarr

driver_kwargs:

consolidated: True

listable: false

metadata:

contributor: contributor

Geoparquet

metadata:

version: 1

entry:

driver: pattern_cat

metadata:

source: source

contributor: contributor

description: description

args:

driver: geoparquet

urlpath: s3://BUCKET/year_{year}_month_{month}.parquet

driver_kwargs:

use_fsspec: True


cat.entry.get_entry_kwarg_sets()

Parquet

entry:

driver: pattern_cat

metadata:

source: source

contributor: contributor

description: description

args:

driver: parquet

driver_kwargs:

engine: pyarrow-dataset

urlpath: s3://BUCKET/year_{year}_month_{month}.parquet

CSV

entry:

driver: pattern_cat

metadata:

source: source

contributor: contributor

description: description

args:

listable: false

recursive_glob: try

driver: csv

urlpath: s3://BUCKET/year_{year}_month_{month}.csv

Zarr

entry:

description: description

metadata:

source: source

contributor: contributor

driver: pattern_cat

args:

urlpath: s3://BUCKET/{datetime:%Y%m%d}.zarr

driver: zarr

driver_kwargs:

consolidated: True

listable: false

sources:

GFS_Global_0p25deg_20210913_1800.grib2:

args:

chunks: {}

urlpath: https://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg/GFS_Global_0p25deg_20210913_1800.grib2

xarray_kwargs: {"engine": "netcdf4"}

description: THREDDS data

driver: intake_xarray.opendap.OpenDapSource

metadata:

catalog_dir: null

JSON

from intake.source.jsonfiles import JSONFileSource