Geopandas
import geopandas as gpd
import pandas as pd
DataFrame
Datasets
gdf = gpd.read_file("https://naturalearth.s3.amazonaws.com/110m_cultural/ne_110m_admin_0_countries.zip")
high res land
gdf = gpd.read_file("https://naturalearth.s3.amazonaws.com/10m_physical/ne_10m_land.zip")
IO
US States as a shape file
url = "https://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_040_00_20m.json"
gdf = gpd.read_file(url)
gdf = gdf[gdf.NAME == 'Florida']
Read geoJSON from s3
import geopandas
import s3fs
fs = s3fs.S3FileSystem()
gdf = gpd.read_file(fs.open("s3://BUCKET/file.geojson"))
Read shape from s3
gpd.read_file("zip+s3://BUCKET/file.zip")
import s3fs
fs = s3fs.S3FileSystem()
with fs.open("s3://BUCKET/file.zip") as f:
gdf = gpd.read_file(f)
Read parquet
gpd.read_parquet("file.parquet")
gpd.read_parquet(fs.open("file.parquet"))
To parquet
import s3fs
gdf.to_parquet("file.parquet", filesystem=s3fs.S3FileSystem())
To csv
gdf.to_csv("file.csv", index=False)
To GeoJSON
gdf.to_file("file.geojson", driver="GeoJSON")
To shapefile
gdf.to_file("file.shp", driver="GeoJSON")
points
gdf = gpd.GeoDataFrame(
{"geometry": [Point(-83.12, 42.38)]}, geometry="geometry", crs="epsg:4326"
)
df = pd.DataFrame(
data={
"time": pd.date_range("2000", freq="D", periods=3),
"lon": [1, 2, 3],
"lat": [1, 2, 3],
}
)
df = pd.DataFrame(
data={
"time": pd.date_range("2000-01-04", freq="D", periods=3),
"lon": [4, 5, 6],
"lat": [4, 5, 6],
}
)
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df["lon"], df["lat"]),
#geometry=gpd.GeoSeries.from_wkt(df["point_wkt"])
crs="epsg:4326",
)
Transform
linestring
from shapely.geometry import LineString
line1 = LineString([(0, 0), (1, 1)]) # (lon, lat)
line2 = LineString([(2, 0), (3, 1)])
gdf = gpd.GeoDataFrame({"geometry": [line1, line2]}, crs="epsg:4326")
geodataframe of points to line string
from shapely.geometry import LineString
LineString(gdf['geometry'].tolist())
geodataframe of line string to points
l = []
for coord in gdf.iloc[0]["geometry"].coords:
l.append(coord)
_df = pd.DataFrame(l, columns=["lon", "lat"])
_gdf = gpd.GeoDataFrame(
_df.copy(),
geometry=gpd.points_from_xy(_df["lon"], _df["lat"]),
crs="epsg:4326",
).reset_index()
_gdf.plot(aspect=1, column="index")
First point of a linestring
from shapely.geometry import Point
gdf["first_point"] = gdf['geometry'].apply(lambda line: Point(line.coords[0]))
Longitudes from a linestring
Reverse linestrings
from shapely.ops import substring
gdf["geometry"] = gdf["geometry"].apply(lambda x: substring(x, 1, 0, normalized=True))
Meta data
Get bounds of polygon
lon_min, lat_min, lon_max, lat_max = gdf["geometry"].total_bounds
print("lon_min:", lon_min)
print("lat_min:", lat_min)
print("lon_max:", lon_max)
print("lat_max:", lat_max)
Get bounds for rows
gdf["minx"], gdf["miny"], gdf["maxx"], gdf["maxy"] = gdf["geometry"].bounds
Get lon and lat centroid
gdf["latitude"] = gdf.centroid.y
gdf["longitude"] = gdf.centroid.x
Set crs
gdf.set_crs("epsg:4326") # https://epsg.io/4326 (geodic)
Transform crs
gdf.to_crs("EPSG:3395") # https://epsg.io/3395 (mercator)
gdf.to_crs(epsg=3395)
Put multi polygons in one
gdf.dissolve(by='CONTINENT')
Geometry
Get wkt
gdf['wkt'] = gdf["geometry"].to_wkt()
Geocode (city to lon-lat)
See geopy
Indexing
gdf.iloc[[0]] # returns geodataframe
Joins
Field (attribute join)
gdf.merge(df, on="COL")
nearest spatial join
gdf_points.sjoin_nearest(gdf, distance_col="distances_crs")
Smoothing
gdf.simplify(1)
Remove some points
Plotting
gdf.plot(figsize=(5, 5), edgecolor="purple", facecolor="None")
gdf.plot(column="COL")
gdf.plot(aspect=1)