from fsspec.registry import known_implementations
known_implementations
import fsspec
fs = fsspec.filesystem('s3', anon=True, skip_instance_cache=True)
fsspec.config.conf['simplecache'] = {'cache_storage': '/tmp', 'same_names': True}
from fsspec.implementations.local import LocalFileSystem
fs = fsspec.filesystem('s3', anon=True, skip_instance_cache=True)
from adlfs import AzureBlobFileSystem
storage_options = {"account_name": "ACCOUNT_NAME", "account_key": "ACCOUNT_KEY"}
fs = AzureBlobFileSystem(**storage_options)
fs.get("file.parquet/*", "file.parquet")
import s3fs
fs = s3fs.S3FileSystem(anon=True)
fs = s3fs.S3FileSystem()
fs = s3fs.S3FileSystem(client_kwargs={"endpoint_url": "http://localhost:4566"})
get_time = "2020/11/15/0300' # YYYY/MM/DD/HHMM
fs.ls('noaa-himawari8/AHI-L1b-FLDK/{}'.format(get_time))
fs.glob('noaa-himawari8/AHI-L1b-FLDK/{}'.format(get_time))
fs.get("noaa-nbm-pds/blendv4.0/conus/2021/02/14/0000/windspd/blendv4.0_conus_windspd_2021-02-14T00:00_2021-02-14T01:00.tif", "blendv4.0_conus_windspd_2021-02-14T00:00_2021-02-14T01:00.tif")
nit
import s3fs
fs = s3fs.S3FileSystem()
fs = s3fs.S3FileSystem(use_listings_cache=False)
fs = s3fs.S3FileSystem(anon=True)
fs = s3fs.S3FileSystem(profile=PROFILE)
fs = s3fs.S3FileSystem(requester_pays=True)
fs = s3fs.S3FileSystem(version_aware=True)
fs = s3fs.S3FileSystem(config_kwargs={"read_timeout": 7200})
List files
files = ["s3://" + f for f in fs.glob(f"{PATH}2020*.zarr")]
fs.ls('noaa-himawari8/AHI-L1b-FLDK/YYYY/MM/DD/HHMM/*')
fs.ls('noaa-himawari8/AHI-L1b-FLDK/2022/01/01/0000/', detail=True)
fs.ls("s3://era5-pds/zarr/2020/12/data/")
fs.ls("s3://era5-pds/zarr/2020/12/data/air_temperature_at_2_metres.zarr")
fs.ls('fmi-opendata-rcrhirlam-surface-grib/YYYY/MM/DD/HH/')
fs.ls('fmi-opendata-rcrhirlam-surface-grib/2021/02/03/00/numerical-hirlam74-forecast-MaximumWind-20210203T000000Z.grb2')
Download files
fs.get("s3://BUCKET/", ".", recursive=True)
fs.get('fmi-opendata-rcrhirlam-surface-grib/2021/02/03/00/numerical-hirlam74-forecast-MaximumWind-20210203T000000Z.grb2', 'out.grb2')
fs.get('fmi-opendata-rcrhirlam-surface-grib/2021/02/03/00/numerical-hirlam74-forecast-WindGust-20210203T000000Z.grb2', 'out.grb2')
Upload files
fs.put(x, "BUCKET/")
See info e.g. if in glacier
fs.info("s3://BUCKET/FILE.txt") # can be StorageClass: DEEP_ARCHIVE
Check if exists
fs.exsits
Read file
with fs.open("s3://BUCKET/file.txt", "r") as f:
s = f.read().rstrip("\n")
Read JSON
import json
import s3fs
with s3fs.S3FileSystem().open("s3://BUCKET/file.json", "r") as f:
d = json.load(f)
Write file
with fs.open("s3://BUCKET/file.html", "w") as f:
f.write(profile_html)
import pickle
pickle.dump(obj, fs.open(f"s3://BUCKET/obj.pkl", "wb"))
Bucket size
from dask.utils import format_bytes
format_bytes(fs.du("s3://BUCKET"))
Delete a bucker
fs.rm("s3://BUCKET/FILE.txt")
fs.rm("s3://BUCKET/", recursive=True)
Move a bucket
fs.mv("s3://BUCKET/source", "s3://BUCKET/destination", recursive=True)
See versions
fs.object_version_info("s3://BUCKET/FILE.zarr")
df["files2"].apply(fs.info)
df.join(df["info"].apply(pd.Series))
from fsspec.implementations.ftp import FTPFileSystem