I've got 2 geodataframes:
import geopandas as gpd
from shapely.geometry import Point
gpd1 = gpd.GeoDataFrame([['John',1,Point(1,1)],['Smith',1,Point(2,2)],['Soap',1,Point(0,2)]],columns=['Name','ID','geometry'])
gpd2 = gpd.GeoDataFrame([['Work',Point(0,1.1)],['Shops',Point(2.5,2)],['Home',Point(1,1.1)]],columns=['Place','geometry'])
and I want to find the name of the nearest point in gpd2 for each row in gpd1:
desired_output =
Name ID geometry Nearest
0 John 1 POINT (1 1) Home
1 Smith 1 POINT (2 2) Shops
2 Soap 1 POINT (0 2) Work
I've been trying to get this working using a lambda function:
gpd1['Nearest'] = gpd1.apply(lambda row: min_dist(row.geometry,gpd2)['Place'] , axis=1)
with
def min_dist(point, gpd2):
geoseries = some_function()
return geoseries
Answer
You can directly use the Shapely function Nearest points (the geometries of the GeoSeries are Shapely geometries):
from shapely.ops import nearest_points
# unary union of the gpd2 geomtries
pts3 = gpd2.geometry.unary_union
def near(point, pts=pts3):
# find the nearest point and return the corresponding Place value
nearest = gpd2.geometry == nearest_points(point, pts)[1]
return gpd2[nearest].Place.get_values()[0]
gpd1['Nearest'] = gpd1.apply(lambda row: near(row.geometry), axis=1)
gpd1
Name ID geometry Nearest
0 John 1 POINT (1 1) Home
1 Smith 1 POINT (2 2) Shops
2 Soap 1 POINT (0 2) Work
Explication
for i, row in gpd1.iterrows():
print nearest_points(row.geometry, pts3)[0], nearest_points(row.geometry, pts3)[1]
POINT (1 1) POINT (1 1.1)
POINT (2 2) POINT (2.5 2)
POINT (0 2) POINT (0 1.1)
No comments:
Post a Comment