diff --git a/scripts/0.1_run_osmox.py b/scripts/0.1_run_osmox.py index 67af4ea..02c3689 100644 --- a/scripts/0.1_run_osmox.py +++ b/scripts/0.1_run_osmox.py @@ -1,8 +1,9 @@ import os import subprocess +import geopandas as gpd import requests -from pyrosm import get_data +from pyrosm import OSM, get_data from acbm.cli import acbm_cli from acbm.config import load_and_setup_config @@ -50,6 +51,53 @@ def main(config_file): ], check=False, ) + logger.info("osmox run complete") + + logger.info("Assigning linkID to facilities") + # Add linkID column (closest road to the facility). See #https://github.com/Urban-Analytics-Technology-Platform/acbm/issues/109 + + logger.info("Step 1: reading the poi data prepared by osmox") + # Read in the poi data + poi_fp = os.path.join( + config.osmox_path, f"{config.region}_epsg_{config.output_crs}.parquet" + ) + pois = gpd.read_parquet(poi_fp) + + logger.info("Step 2: reading the osm road network data from the pbf file") + # Read in the road data + osm = OSM(fp) + osm_roads = osm.get_network(network_type="driving") + osm_roads = osm_roads.to_crs(crs_value) + + logger.info( + "Step 3: Addign a road linkID to each facility (based on the closest osm link)" + ) + # Find the nearest road to each facility + pois_with_links = gpd.sjoin_nearest( + pois, + osm_roads[ + ["id", "geometry"] + ], # Only include the 'id' and 'geometry' columns from osm_roads + how="left", + max_distance=1000, + lsuffix="", # No suffix for pois_sample + # rsuffix="roads" # No suffix for osm_roads + ).drop(columns=["index_right"]) + + # Rename the columns to remove the trailing _ created by the sjoin + pois_with_links = pois_with_links.rename( + columns={"id_": "id", "id_right": "linkId"} + ) + + pois_with_links = pois_with_links.reset_index(drop=True) + + logger.info( + "Step 4: Saving the POIs with links, overwriting the original pois file" + ) + # Save the file (overwrite the original pois file) + pois_with_links.to_parquet(poi_fp, index=False) + + logger.info(f"pois_with_links saved to: {poi_fp}") if __name__ == "__main__": diff --git a/scripts/3.3_assign_facility_all.py b/scripts/3.3_assign_facility_all.py index 2f48de5..97b2648 100644 --- a/scripts/3.3_assign_facility_all.py +++ b/scripts/3.3_assign_facility_all.py @@ -230,6 +230,9 @@ def main(config_file): activity_chains_all["start_location_id"] = activity_chains_all.groupby("pid")[ "end_location_id" ].shift(1) + activity_chains_all["start_location_link_id"] = activity_chains_all.groupby("pid")[ + "end_location_link_id" + ].shift(1) activity_chains_all["start_location_geometry"] = activity_chains_all.groupby("pid")[ "end_location_geometry" ].shift(1) @@ -243,6 +246,9 @@ def main(config_file): activity_chains_all.loc[mask, "start_location_id"] = activity_chains_all.loc[ mask, "hid" ].map(activity_chains_home_agg.set_index("hid")["end_location_id"]) + activity_chains_all.loc[mask, "start_location_link_id"] = activity_chains_all.loc[ + mask, "hid" + ].map(activity_chains_home_agg.set_index("hid")["end_location_link_id"]) activity_chains_all.loc[mask, "start_location_geometry"] = activity_chains_all.loc[ mask, "hid" ].map(activity_chains_home_agg.set_index("hid")["end_location_geometry"]) @@ -267,8 +273,10 @@ def main(config_file): "tet", "duration", "start_location_id", + "start_location_link_id", "start_location_geometry", "end_location_id", + "end_location_link_id", "end_location_geometry", ] ] diff --git a/src/acbm/assigning/select_facility.py b/src/acbm/assigning/select_facility.py index 923d714..628e898 100644 --- a/src/acbm/assigning/select_facility.py +++ b/src/acbm/assigning/select_facility.py @@ -73,7 +73,7 @@ def _select_facility( logger.debug(f"Activity {row.name}: Destination zone is NA") # return {"id": np.nan, "geometry": np.nan} # TODO: check this replacement is correct - return {row[unique_id_col]: (np.nan, np.nan)} + return {row[unique_id_col]: (np.nan, np.nan, np.nan)} # Filter facilities within the specified destination zone facilities_in_zone = facilities_gdf[ @@ -134,7 +134,7 @@ def _select_facility( logger.debug( f"Activity {row.name}: No facilities in zone {destination_zone} with {gdf_facility_type_col} '{fallback_type or row[row_activity_type_col]}'" ) - return {row[unique_id_col]: (np.nan, np.nan)} + return {row[unique_id_col]: (np.nan, np.nan, np.nan)} # ----- Step 2. Sample a facility from the valid facilities @@ -158,7 +158,11 @@ def _select_facility( # Return the id and geometry of the selected facility return { - row[unique_id_col]: (facility["id"].values[0], facility["geometry"].values[0]) + row[unique_id_col]: ( + facility["id"].values[0], + facility["linkId"].values[0], + facility["geometry"].values[0], + ) } @@ -202,8 +206,9 @@ def select_facility( Returns ------- - dict[str, Tuple[str, Point ] | Tuple[float, float]]: Unique ID column as - keys with selected facility ID and facility ID's geometry, or (np.nan, np.nan) + dict[str, Tuple[str, str, Point ] | Tuple[float, float, float]]: Unique ID column as + keys with selected facility ID, linkID (nearest road link), and facility ID's geometry, + or (np.nan, np.nan, np.nan) """ # TODO: check if this is deterministic for a given seed (or pass seed to pool) with Pool(n_processes) as p: @@ -255,15 +260,24 @@ def map_activity_locations( ------- pd.DataFrame DataFrame with mapped activity locations. + It adds the following columns: + - 'end_location_id': The ID of the facility + - 'end_location_linkID': The id of the nearest road link to the facility + - 'end_location_geometry': The geometry of the facility """ activity_chains_df["end_location_id"] = activity_chains_df[id_col].map( lambda pid: activity_locations_dict[pid][0] if pid in activity_locations_dict else pd.NA ) - activity_chains_df["end_location_geometry"] = activity_chains_df[id_col].map( + activity_chains_df["end_location_link_id"] = activity_chains_df[id_col].map( lambda pid: activity_locations_dict[pid][1] if pid in activity_locations_dict else pd.NA ) + activity_chains_df["end_location_geometry"] = activity_chains_df[id_col].map( + lambda pid: activity_locations_dict[pid][2] + if pid in activity_locations_dict + else pd.NA + ) return activity_chains_df