Urban-Analytics-Technology-Platform · Hussein-Mahfouz · Apr 1, 2025 · Apr 1, 2025 · Apr 1, 2025
diff --git a/scripts/0.1_run_osmox.py b/scripts/0.1_run_osmox.py
@@ -1,8 +1,9 @@
 import os
 import subprocess
 
+import geopandas as gpd
 import requests
-from pyrosm import get_data
+from pyrosm import OSM, get_data
 
 from acbm.cli import acbm_cli
 from acbm.config import load_and_setup_config
@@ -50,6 +51,53 @@ def main(config_file):
         ],
         check=False,
     )
+    logger.info("osmox run complete")
+
+    logger.info("Assigning linkID to facilities")
+    # Add linkID column (closest road to the facility). See #https://github.com/Urban-Analytics-Technology-Platform/acbm/issues/109
+
+    logger.info("Step 1: reading the poi data prepared by osmox")
+    # Read in the poi data
+    poi_fp = os.path.join(
+        config.osmox_path, f"{config.region}_epsg_{config.output_crs}.parquet"
+    )
+    pois = gpd.read_parquet(poi_fp)
+
+    logger.info("Step 2: reading the osm road network data from the pbf file")
+    # Read in the road data
+    osm = OSM(fp)
+    osm_roads = osm.get_network(network_type="driving")
+    osm_roads = osm_roads.to_crs(crs_value)
+
+    logger.info(
+        "Step 3: Addign a road linkID to each facility (based on the closest osm link)"
+    )
+    # Find the nearest road to each facility
+    pois_with_links = gpd.sjoin_nearest(
+        pois,
+        osm_roads[
+            ["id", "geometry"]
+        ],  # Only include the 'id' and 'geometry' columns from osm_roads
+        how="left",
+        max_distance=1000,
+        lsuffix="",  # No suffix for pois_sample
+        # rsuffix="roads"   # No suffix for osm_roads
+    ).drop(columns=["index_right"])
+
+    # Rename the columns to remove the trailing _ created by the sjoin
+    pois_with_links = pois_with_links.rename(
+        columns={"id_": "id", "id_right": "linkId"}
+    )
+
+    pois_with_links = pois_with_links.reset_index(drop=True)
+
+    logger.info(
+        "Step 4: Saving the POIs with links, overwriting the original pois file"
+    )
+    # Save the file (overwrite the original pois file)
+    pois_with_links.to_parquet(poi_fp, index=False)
+
+    logger.info(f"pois_with_links saved to: {poi_fp}")
 
 
 if __name__ == "__main__":

diff --git a/scripts/3.3_assign_facility_all.py b/scripts/3.3_assign_facility_all.py
@@ -230,6 +230,9 @@ def main(config_file):
     activity_chains_all["start_location_id"] = activity_chains_all.groupby("pid")[
         "end_location_id"
     ].shift(1)
+    activity_chains_all["start_location_link_id"] = activity_chains_all.groupby("pid")[
+        "end_location_link_id"
+    ].shift(1)
     activity_chains_all["start_location_geometry"] = activity_chains_all.groupby("pid")[
         "end_location_geometry"
     ].shift(1)
@@ -243,6 +246,9 @@ def main(config_file):
     activity_chains_all.loc[mask, "start_location_id"] = activity_chains_all.loc[
         mask, "hid"
     ].map(activity_chains_home_agg.set_index("hid")["end_location_id"])
+    activity_chains_all.loc[mask, "start_location_link_id"] = activity_chains_all.loc[
+        mask, "hid"
+    ].map(activity_chains_home_agg.set_index("hid")["end_location_link_id"])
     activity_chains_all.loc[mask, "start_location_geometry"] = activity_chains_all.loc[
         mask, "hid"
     ].map(activity_chains_home_agg.set_index("hid")["end_location_geometry"])
@@ -267,8 +273,10 @@ def main(config_file):
             "tet",
             "duration",
             "start_location_id",
+            "start_location_link_id",
             "start_location_geometry",
             "end_location_id",
+            "end_location_link_id",
             "end_location_geometry",
         ]
     ]

diff --git a/src/acbm/assigning/select_facility.py b/src/acbm/assigning/select_facility.py
@@ -73,7 +73,7 @@ def _select_facility(
         logger.debug(f"Activity {row.name}: Destination zone is NA")
         # return {"id": np.nan, "geometry": np.nan}
         # TODO: check this replacement is correct
-        return {row[unique_id_col]: (np.nan, np.nan)}
+        return {row[unique_id_col]: (np.nan, np.nan, np.nan)}
 
     # Filter facilities within the specified destination zone
     facilities_in_zone = facilities_gdf[
@@ -134,7 +134,7 @@ def _select_facility(
         logger.debug(
             f"Activity {row.name}: No facilities in zone {destination_zone} with {gdf_facility_type_col} '{fallback_type or row[row_activity_type_col]}'"
         )
-        return {row[unique_id_col]: (np.nan, np.nan)}
+        return {row[unique_id_col]: (np.nan, np.nan, np.nan)}
 
     # ----- Step 2. Sample a facility from the valid facilities
 
@@ -158,7 +158,11 @@ def _select_facility(
 
     # Return the id and geometry of the selected facility
     return {
-        row[unique_id_col]: (facility["id"].values[0], facility["geometry"].values[0])
+        row[unique_id_col]: (
+            facility["id"].values[0],
+            facility["linkId"].values[0],
+            facility["geometry"].values[0],
+        )
     }
 
 
@@ -202,8 +206,9 @@ def select_facility(
 
     Returns
     -------
-    dict[str, Tuple[str, Point ] | Tuple[float, float]]: Unique ID column as
-        keys with selected facility ID and facility ID's geometry, or (np.nan, np.nan)
+    dict[str, Tuple[str, str, Point ] | Tuple[float, float, float]]: Unique ID column as
+        keys with selected facility ID, linkID (nearest road link), and facility ID's geometry,
+        or (np.nan, np.nan, np.nan)
     """
     # TODO: check if this is deterministic for a given seed (or pass seed to pool)
     with Pool(n_processes) as p:
@@ -255,15 +260,24 @@ def map_activity_locations(
     -------
     pd.DataFrame
         DataFrame with mapped activity locations.
+        It adds the following columns:
+        - 'end_location_id': The ID of the facility
+        - 'end_location_linkID': The id of the nearest road link to the facility
+        - 'end_location_geometry': The geometry of the facility
     """
     activity_chains_df["end_location_id"] = activity_chains_df[id_col].map(
         lambda pid: activity_locations_dict[pid][0]
         if pid in activity_locations_dict
         else pd.NA
     )
-    activity_chains_df["end_location_geometry"] = activity_chains_df[id_col].map(
+    activity_chains_df["end_location_link_id"] = activity_chains_df[id_col].map(
         lambda pid: activity_locations_dict[pid][1]
         if pid in activity_locations_dict
         else pd.NA
     )
+    activity_chains_df["end_location_geometry"] = activity_chains_df[id_col].map(
+        lambda pid: activity_locations_dict[pid][2]
+        if pid in activity_locations_dict
+        else pd.NA
+    )
     return activity_chains_df