https://virtualizarr.readthedocs.io/en/latest/index.html
from virtualizarr import open_virtual_dataset
vds = open_virtual_dataset('air.nc')
https://virtualizarr.readthedocs.io/en/stable/usage.html
pixi add "s3fs>=2024.2.0" boto3 virtualizarr kerchunk h5py h5netcdf fastparquet bottleneck
from virtualizarr import open_virtual_dataset
import xarray as xr
vds1 = open_virtual_dataset("s3://mg-fcst-nwp-dtn-onefx-prod-euw1/outgoing/netcdf/0.1x0.1-global/20241122T170000Z/20241122T170000Z-air_temperature-20241122T170000Z.nc", indexes={})
vds2 = open_virtual_dataset("s3://mg-fcst-nwp-dtn-onefx-prod-euw1/outgoing/netcdf/0.1x0.1-global/20241122T170000Z/20241122T170000Z-air_temperature-20241122T180000Z.nc", indexes={})
vds = xr.concat([vds1, vds2], dim='time', coords='minimal', compat='override')
vds.virtualize.to_kerchunk("s3://dtn-dsci-prd-eda/ray.bell/combined.parquet", format="parquet")
ds = xr.open_dataset("s3://dtn-dsci-prd-eda/ray.bell/combined.parquet", engine="kerchunk")
vds.virtualize.to_kerchunk("/tmp/combined.json", format="json")
ds = xr.open_dataset("/tmp/combined.json", engine="kerchunk")
vds.virtualize.to_kerchunk("/tmp/combined.parquet", format="parquet")
ds = xr.open_dataset("/tmp/combined.parquet", engine="kerchunk")
#ds["time"].values[0] = np.datetime64(datetime.strptime(init_time, "%Y%m%dT%H%M%SZ"))
aws_creds = boto3.Session().get_credentials()
aws_credentials = {"key": aws_creds.access_key, "secret": aws_creds.secret_key}
vds = open_virtual_dataset("s3://mg-fcst-nwp-dtn-onefx-prod-euw1/outgoing/netcdf/0.1x0.1-global/20241122T170000Z/20241122T170000Z-air_temperature-20241122T170000Z.nc", reader_options={'storage_options': aws_credentials})