https://opendata.maryland.gov/
https://opendata.maryland.gov/Demographic/Population-Estimates-2010-2022/r7ky-rq9s/about_data
https://opendata.maryland.gov/resource/uxq4-6wxf.json
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/r7ky-rq9s/rows.csv")
https://opendata.maryland.gov/Energy-and-Environment/Power-Outages-County/uxq4-6wxf/about_data
import pandas as pd
df = pd.read_json("https://opendata.maryland.gov/resource/uxq4-6wxf.json", storage_options={'User-Agent': 'Mozilla/5.0'})
df = pd.read_csv("https://opendata.maryland.gov/api/views/uxq4-6wxf/rows.csv")
gemini -s -y -m "gemini-3-flash-preview" -i "What is the latest outages in Anne Arundel and when was the maximum outages? The data can be found at https://opendata.maryland.gov/resource/uxq4-6wxf.json"
gemini -s -y -m "gemini-3-flash-preview" -i "What is the latest outages in Anne Arundel and when was the maximum outages? The data can be found at https://opendata.maryland.gov/api/views/uxq4-6wxf/rows.csv"
https://opendata.maryland.gov/Government/PLC-Data-Catalog/gdzy-2fen/about_data - permits, licenses, and certificates data catalog
import pandas as pd
df = pd.read_json(
"https://opendata.maryland.gov/resource/gdzy-2fen.json?$limit=50000",
storage_options={'User-Agent': 'Mozilla/5.0'},
)
df = pd.read_csv(
"https://opendata.maryland.gov/resource/gdzy-2fen.csv?$limit=50000",
storage_options={'User-Agent': 'Mozilla/5.0'},
)
https://opendata.maryland.gov/Energy-and-Environment/Power-Outages-Zipcode/nktk-ei6p/about_data
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/nktk-ei6p/rows.csv")
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/mfac-nzpe/rows.csv")
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/su2w-hm7s/rows.csv")
https://opendata.maryland.gov/Transportation/Baltimore-Port-overview/j5by-ruxq - invalid
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/j5by-ruxq/rows.csv")
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/2ir4-626w/rows.csv")
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/ief7-i74z/rows.csv")
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/7syw-q4cy/rows.csv")
https://opendata.maryland.gov/Budget/Maryland-Operating-Budget-FY2017-2024-/yu65-jmmv/about_data
import pandas as pd
df = pd.read_csv("https://opendata.maryland.gov/api/views/yu65-jmmv/rows.csv")
https://operatingbudget.maryland.gov/#!/year/default
https://www.weather.gov/gis/Counties
from io import BytesIO
import requests
response = requests.get("https://www.weather.gov/source/gis/Shapefiles/County/c_19se23.zip")
# response = requests.get("https://www2.census.gov/geo/tiger/TIGER2024/COUNTY/tl_2024_us_county.zip")
with BytesIO(response.content) as file_obj:
import geopandas as gpd
gdf = gpd.read_file(file_obj)
import geopandas as gpd
gdf = (
gpd.read_file(
"https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
)
.astype({"id": "int32", "STATE": "int32"})
.rename(columns={"id": "fips", "STATE": "state", "NAME": "county_name"})
)
state = 24 # MD
gdf[gdf["state"] == state]
https://www2.census.gov/programs-surveys/popest/datasets/
https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-total.html
12 - FL; 24 - MD
state = 24 # MD
import requests
headers = {
'Referer': 'https://www.census.gov/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
response = requests.get(
f'https://www2.census.gov/programs-surveys/popest/tables/2020-2024/counties/totals/co-est2024-pop-{state}.xlsx',
headers=headers,
)
from io import BytesIO
import pandas as pd
#df = pd.read_excel(BytesIO(response.content), skiprows=3)
df = pd.read_excel(
BytesIO(response.content),
header=None,
skiprows=5,
skipfooter=6,
usecols=[0, 2, 3, 4, 5, 6],
names=["county", "2020", "2021", "2022", "2023", "2024"],
)
https://sedac.ciesin.columbia.edu/data/collection/gpw-v4
https://explore.overturemaps.org/#15.07/41.89731/12.47626
aws s3 ls s3://overturemaps-us-west-2/release/2024-09-18.0/theme=buildings/ --no-sign-request
aws s3 ls s3://overturemaps-us-west-2/release/2024-09-18.0/theme=places/type=place/ --no-sign-request
import geopandas as gpd
gdf = gpd.read_file("https://naciscdn.org/naturalearth/10m/cultural/ne_10m_roads_north_america.zip", engine="pyogrio")
aws s3 ls s3://overturemaps-us-west-2/release/2024-11-13.0/theme=transportation/type=segment/ --no-sign-request
from overturemaps import core
gdf = core.geodataframe("segment", bbox=...)
https://maps.cteco.uconn.edu/ - Connecticut
LPdaac
Satellite: MODIS Terra+Aqua (MOTA), Product: MCD64A1.061, Grid (sinusoidal). https://lpdaac.usgs.gov/products/mcd64a1v061/
500 m MODIS Surface Reflectance couple with 1 km MODIS active fire observations. Burn sensitive Vegetation Index. Date of burn. SIN grid (https://modis-land.gsfc.nasa.gov/MODLAND_grid.html)
Google Earth
https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MCD64A1
2022-01-01 to Present. Global. Daily.
https://github.com/OPERA-Cal-Val/OPERA_Applications/tree/main
LPdaac
https://lpdaac.usgs.gov/products/opera_l3_dist-alert-hls_v1v001/
https://registry.opendata.aws/sentinel-1/
https://documentation.dataspace.copernicus.eu/Usecase.html
VH - linear gamma0 - This script displays a grayscale visualization of the gamma0 of the VH polarization. The values for the cross polarization (VH) are generally lower (darker visualization) than for the co-polarization (HH, VV). The VH polarization has higher values for surfaces characterized by volume scattering, e.g., branches, dry coil bodies, or canopies (lighter color in the visualization) and lower for surfaces with little to no scattering (darker color in the visualization).
This is useful for finding vessels.
Bottom of Atmosphere values with atmospheric corrections applied
Top of Atmosphere values
https://huggingface.co/datasets/stateofmaryland/Google-Alpha-Earth-MD
https://developers.google.com/earth-engine/guides/aef_on_gcs_readme
cloud storage ls gs://alphaearth_foundations/satellite_embedding/v1/annual/ --billing-project=$env:GCLOUD_PROJECT_ID
https://colab.research.google.com/drive/19352BbohJumX6RJThit5Zzk_aMU8ZNjO?usp=sharing
url = "hf://datasets/stateofmaryland/Google-Alpha-Earth-MD-data/google-alpha-earth-md-overview-9.zarr"
ds = xr.open_zarr(url)
import os
import geopandas as gpd
try:
from google.colab import auth, userdata
auth.authenticate_user()
os.environ["PROJECT_ID"] = userdata.get("PROJECT_ID")
except:
pass
gdf = (
gpd.read_file(
"https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
)
.astype({"id": "int32", "STATE": "int32"})
.rename(columns={"id": "fips", "STATE": "state", "NAME": "county_name"})
)
md_gdf = gdf[gdf["state"] == 24].dissolve()
gdf_index = gpd.read_parquet(
"gs://alphaearth_foundations/satellite_embedding/v1/annual/aef_index.parquet",
storage_options={
"project": os.environ.get("PROJECT_ID"),
"requester_pays": True
}
)
md_gdf = md_gdf.to_crs(gdf_index.crs)
gdf_md = gpd.clip(gdf_index, md_gdf).sort_values(["year", "utm_zone", "fid"])
gdf_md
https://datafusionsolutions.com/
https://datafusionsolutions.com/ToolPages/PowerOutagePro/OutageTrackerPro
https://smc-datachallenge.ornl.gov/eagle/
https://www.ncdc.noaa.gov/stormevents/ftp.jsp
import pandas as pd
pd.read_csv("ftp://ftp.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_locations-ftp_v1.0_d2024_c20241121.csv.gz")
https://www.spc.noaa.gov/climo/reports/today.html
https://www.spc.noaa.gov/exper/archive/events/
https://www.spc.noaa.gov/archive/
import pandas as pd
pd.set_option("display.max_colwidth", None)
df = pd.read_csv("https://www.spc.noaa.gov/climo/reports/today_raw_torn.csv")
df = pd.read_csv("https://www.spc.noaa.gov/climo/reports/today_filtered_torn.csv") # deduplicated
city = "Annapolis"
import requests
import pandas as pd
from geopy.geocoders import Photon, Nominatim
from geopy.adapters import GeocoderUnavailable
try:
location = Photon().geocode(city)
except GeocoderUnavailable:
location = Nominatim(user_agent="_").geocode(city)
response = requests.get(f"https://api.weather.gov/points/{location.point.latitude},{location.point.longitude}")
grid_id_forecast_url = response.json()["properties"]["forecast"]
response = requests.get(grid_id_forecast_url)
forecast = response.json()
forecasts = pd.DataFrame(forecast["properties"]["periods"])
forecasts = forecasts.rename(
columns={
"temperature": "temperature_degF",
"probabilityOfPrecipitation": "probabilityOfPrecipitation_percent",
# "dewpoint": "dewpoint_degC",
# "relativeHumidity": "relativeHumidity_percent",
"windSpeed": "windSpeed_mph",
}
)
forecasts = forecasts.drop(columns=["temperatureUnit"])
forecasts['probabilityOfPrecipitation_percent'] = forecasts['probabilityOfPrecipitation_percent'].apply(lambda x: x.get('value')).fillna(0).astype(int)
# forecasts['dewpoint_degC'] = forecasts['dewpoint_degC'].apply(lambda x: x.get('value')).round(1)
# forecasts['relativeHumidity_percent'] = forecasts['relativeHumidity_percent'].apply(lambda x: x.get('value'))
forecasts["windSpeed_mph"] = forecasts["windSpeed_mph"].str[0:-4]
forecasts[["name", "detailedForecast"]]
https://www.ncei.noaa.gov/products/international-best-track-archive
Nexrad
s3://noaa-nexrad-level2
National Gridded Snowfall Analysis
https://www.nohrsc.noaa.gov/snowfall_v2/data/202410/sfav2_CONUS_6h_2024100106.nc
US
https://www.wpc.ncep.noaa.gov/archives/sfc/2022/namussfc2022010100.gif
UK
https://www.wetter3.de/Archiv/UKMet/22010100_UKMet_Analyse.gif
s3://noaa-rtma-pds/rtma2p5.YYYYMMDD{20160101:}/rtma2ph.tHHz.2dvaranl_ndfd.grb2[_wexp]
import fsspec
import pandas as pd
import xarray as xr
time = pd.Timestamp("now").floor("1H")
# skips u10, v10, wdir10, si10, i10fg
ds = xr.open_dataset(
fsspec.open_local(
f"simplecache::s3://noaa-rtma-pds/rtma2p5.{time:%Y%m%d}/rtma2p5.t{time:%H}z.2dvaranl_ndfd.grb2_wexp"
),
engine="cfgrib",
)
variable = "u10"
ds = xr.open_dataset(
fsspec.open_local(
f"simplecache::s3://noaa-rtma-pds/rtma2p5.{time:%Y%m%d}/rtma2p5.t{time:%H}z.2dvaranl_ndfd.grb2_wexp"
),
engine="cfgrib",
backend_kwargs=dict(filter_by_keys={"cfVarName": variable}),
)
Planetary data store
s3://era5-pds/zarr/YYYY{1979:2013}/MM{01:12}/data/[air_pressure_at_mean_sea_level/air_temperature_at_2_metres/air_temperature_at_2_metres_1hour_Maximum/air_temperature_at_2_metres_1hour_Minimum/dew_point_temperature_at_2_metres/eastward_wind_at_100_metres/eastward_wind_at_10_metres/integral_wrt_time_of_surface_direct_downwelling_shortwave_flux_in_air_1hour_Accumulation/lwe_thickness_of_surface_snow_amount/northward_wind_at_100_metres/northward_wind_at_10_metres/precipitation_amount_1hour_Accumulation/sea_surface_temperature/snow_density/surface_air_pressure].zarr
import xarray as xr
var = "eastward_wind_at_10_metres"
ds = xr.open_zarr("s3://era5-pds/zarr/2023/01/data/{var}.zarr")
gsutil ls -l gs://gcp-public-data-arco-era5/ar/
https://github.com/google-research/arco-era5/blob/main/docs/0-Surface-Reanalysis-Walkthrough.ipynb
import xarray as xr
ds = xr.open_zarr("gs://gcp-public-data-arco-era5/ar/model-level-1h-0p25deg.zarr-v2", chunks=None,
storage_options=dict(token='anon'),
)
ds = xr.open_zarr("gs://gcp-public-data-arco-era5/co/single-level-surface.zarr-v2", chunks=None,
storage_options=dict(token='anon'),
)
import metview as mv
import xarray as xr
ds = xr.open_zarr(
"gs://gcp-public-data-arco-era5/co/single-level-reanalysis.zarr-v2",
chunks=None,
storage_options=dict(token="anon"),
)
valid_time_start = ds.attrs["valid_time_start"]
valid_time_stop = ds.attrs["valid_time_stop"]
# single_level_reanalysis = ds.sel(time=slice(valid_time_start, valid_time_stop))
single_level_reanalysis = ds.sel(time=slice("2024-01-01", "2024-01-01")).compute()
longitude = single_level_reanalysis["longitude"]
latitude = single_level_reanalysis["latitude"]
single_level_reanalysis_us = single_level_reanalysis.where(
(longitude > 235.0) & (latitude > 25.0) &
(longitude < 293.0) & (latitude < 50.0),
drop=True
)
ds = xr.open_zarr(
"gs://gcp-public-data-arco-era5/co/single-level-surface.zarr-v2/",
chunks=None,
storage_options=dict(token="anon"),
)
single_level_surface = ds.sel(time=slice(ds.attrs["valid_time_start"], ds.attrs["valid_time_stop"]))
# Filter bounding box
bbox = [171.79, 18.91, -66.96, 71.35] # E, S, W, N
def lon_to_360(lon: float) -> float:
return ((360 + (lon % 360)) % 360)
ds = single_level_forecasts.where(
(single_level_forecasts.longitude > lon_to_360(-125.0)) & (single_level_forecasts.latitude > 25.0) &
(single_level_forecasts.longitude < lon_to_360(-67.0)) & (single_level_forecasts.latitude < 50.0),
drop=True
)
ds = xr.open_zarr("gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3", chunks=None,
storage_options=dict(token='anon'),
)
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2022-6h-1440x721.zarr")
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2")
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2022-full_37-6h-0p25deg-chunk-1.zarr-v2")
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2022-full_37-6h-0p25deg_derived.zarr")
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2022-wb13-6h-0p25deg-chunk-1.zarr-v2")
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr")
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721.zarr")
ds = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr")
RDA
https://rda.ucar.edu/datasets/ds633.0/
https://thredds.rda.ucar.edu/thredds/catalog/files/g/ds633.0/e5.oper.an.sfc/catalog.html
https://thredds.rda.ucar.edu/thredds/dodsC/files/g/ds633.0/e5.oper.an.sfc/YYYY{1940:}MM{01:12}/e5.oper.an.sfc.VAR.ll025sc.YYYYMM0101_YYYYMMDD31.nc
var = "128_015_aluvp" # ?
var = "128_016_aluvd" # ?
var = "128_141_sd" # snow depth
var = "128_164_tcc" # Total cloud cover
var = "128_167_2t" # 2m air temperature
year = 2023
month = 1
from calendar import monthrange
dates = f"{year}{month:02d}0100_{year}{month:02d}{monthrange(year, month)[1]}23"
import xarray as xr
ds = xr.open_dataset(f"https://thredds.rda.ucar.edu/thredds/dodsC/files/g/ds633.0/e5.oper.an.sfc/{year}{month:02d}/e5.oper.an.sfc.{var}.ll025sc.{dates}.nc")
with xr.set_options(keep_attrs=True):
ds = ds.assign_coords({"longitude": (((ds["longitude"] + 180) % 360) - 180)})
ds = ds.sortby("longitude")
ds = ds.reindex({"latitude": ds["latitude"][::-1]})
da = ds[",".join(ds.data_vars).split(",")[1]]
CDS
import cdsapi
c = cdsapi.Client()
c.retrieve(
"reanalysis-era5-single-levels",
{
"product_type": "reanalysis",
"variable": "VAR", # e.g. "2m_temperature"
"year": "YYYY", # e.g. "2023"
"month": "MM", # e.g. "01"
"day": "DD", # e.g. "01"
"time": "HH:00", # e.g. "00:00"
"format": "netcdf",
},
"UUID.nc",
)
ds = xr.open_dataset("download.nc")
https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=overview
Earth Engine
https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_LAND_HOURLY
import ee
import xarray as xr
ee.Authenticate(auth_mode="notebook")
ee.Initialize(project="PROJECT", opt_url="https://earthengine-highvolume.googleapis.com")
ds = xr.open_dataset("ee://ECMWF/ERA5_LAND/HOURLY", engine="ee", scale=0.1)
enfo - ensemble forecast, scda - short cut-off high-res
s3://ecmwf-forecasts/YYYYMMDD/[00/06/12/18]z/0p4-beta/[enfo/scda/scwv/waef]/YYYYMMDDHH0000-[0:3:90]h-[enfo/scda/scwv/waef]-fc.grib2
Weatherbench
ds = xr.open_zarr("gs://weatherbench2/datasets/hres/2016-2022-0012-1440x721.zarr")
ds = xr.open_zarr("gs://weatherbench2/datasets/hres/2016-2022-12h-6h-0p25deg-chunk-1.zarr")
ds = xr.open_zarr("gs://weatherbench2/datasets/hres_t0/2016-2022-6h-1440x721.zarr")
ds = xr.open_zarr("gs://weatherbench2/datasets/hres_t0/forecasts_t0_analysis_2016-2022-6h-0p25deg-chunk-1.zarr")
2020-2022 data from weatherbench
ds = xr.open_zarr("gs://weatherbench2/datasets/ens/2020-2022-0p25.zarr")
https://confluence.ecmwf.int/display/TIGGE
https://confluence.ecmwf.int/display/TIGGE/Models
type: cf (control forecast), fc (forecast (high-res option)), pf (perturbed forecast (ensemble)) controlled by "number": "1/2/N/50"
from ecmwfapi import ECMWFDataServer
server = ECMWFDataServer()
server.retrieve({
"class": "ti",
"dataset": "tigge",
"date": "2023-01-01/to/2023-01-07",
"expver": "prod",
"grid": "0.25/0.25",
"levtype": "sfc",
"origin": "ecmf",
"param": "167",
"step": "0/6/12/18/24",
"time": "00:00:00/12:00:00",
"type": "cf",
"target": "output.grib"
})
https://www.ecmwf.int/en/research/climate-reanalysis/ocean-reanalysis
Now -> ~18 days. 3 hourly close in and 12 hour for longer forecast time. ~180 variables
import xarray as xr
ds = xr.open_dataset("https://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg/Best")
ds["Temperature_height_above_ground"].sel(height_above_ground3=2.0, lat=39, lon=283.5)
Archive source
s3://noaa-gfs-bdp-pds/gfs.YYYYMMDD/[00/06/12/18]/atmos/gfs.t[00/06/12/18]z.pgrb2.0p25.f[000:3:384]
s3://hrrrzarr/sfc/20240321/20240321_00z_anl.zarr/2m_above_ground/TMP/
from sklearn.datasets import fetch_openml
df = fetch_openml("Bike_Sharing_Demand", version=2, as_frame=True).frame