Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion scripts/0.1_run_osmox.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
import subprocess

import geopandas as gpd
import requests
from pyrosm import get_data
from pyrosm import OSM, get_data

from acbm.cli import acbm_cli
from acbm.config import load_and_setup_config
Expand Down Expand Up @@ -50,6 +51,53 @@ def main(config_file):
],
check=False,
)
logger.info("osmox run complete")

logger.info("Assigning linkID to facilities")
# Add linkID column (closest road to the facility). See #https://github.com/Urban-Analytics-Technology-Platform/acbm/issues/109

logger.info("Step 1: reading the poi data prepared by osmox")
# Read in the poi data
poi_fp = os.path.join(
config.osmox_path, f"{config.region}_epsg_{config.output_crs}.parquet"
)
pois = gpd.read_parquet(poi_fp)

logger.info("Step 2: reading the osm road network data from the pbf file")
# Read in the road data
osm = OSM(fp)
osm_roads = osm.get_network(network_type="driving")
osm_roads = osm_roads.to_crs(crs_value)

logger.info(
"Step 3: Addign a road linkID to each facility (based on the closest osm link)"
)
# Find the nearest road to each facility
pois_with_links = gpd.sjoin_nearest(
pois,
osm_roads[
["id", "geometry"]
], # Only include the 'id' and 'geometry' columns from osm_roads
how="left",
max_distance=1000,
lsuffix="", # No suffix for pois_sample
# rsuffix="roads" # No suffix for osm_roads
).drop(columns=["index_right"])

# Rename the columns to remove the trailing _ created by the sjoin
pois_with_links = pois_with_links.rename(
columns={"id_": "id", "id_right": "linkId"}
)

pois_with_links = pois_with_links.reset_index(drop=True)

logger.info(
"Step 4: Saving the POIs with links, overwriting the original pois file"
)
# Save the file (overwrite the original pois file)
pois_with_links.to_parquet(poi_fp, index=False)

logger.info(f"pois_with_links saved to: {poi_fp}")


if __name__ == "__main__":
Expand Down
8 changes: 8 additions & 0 deletions scripts/3.3_assign_facility_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ def main(config_file):
activity_chains_all["start_location_id"] = activity_chains_all.groupby("pid")[
"end_location_id"
].shift(1)
activity_chains_all["start_location_link_id"] = activity_chains_all.groupby("pid")[
"end_location_link_id"
].shift(1)
activity_chains_all["start_location_geometry"] = activity_chains_all.groupby("pid")[
"end_location_geometry"
].shift(1)
Expand All @@ -243,6 +246,9 @@ def main(config_file):
activity_chains_all.loc[mask, "start_location_id"] = activity_chains_all.loc[
mask, "hid"
].map(activity_chains_home_agg.set_index("hid")["end_location_id"])
activity_chains_all.loc[mask, "start_location_link_id"] = activity_chains_all.loc[
mask, "hid"
].map(activity_chains_home_agg.set_index("hid")["end_location_link_id"])
activity_chains_all.loc[mask, "start_location_geometry"] = activity_chains_all.loc[
mask, "hid"
].map(activity_chains_home_agg.set_index("hid")["end_location_geometry"])
Expand All @@ -267,8 +273,10 @@ def main(config_file):
"tet",
"duration",
"start_location_id",
"start_location_link_id",
"start_location_geometry",
"end_location_id",
"end_location_link_id",
"end_location_geometry",
]
]
Expand Down
26 changes: 20 additions & 6 deletions src/acbm/assigning/select_facility.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _select_facility(
logger.debug(f"Activity {row.name}: Destination zone is NA")
# return {"id": np.nan, "geometry": np.nan}
# TODO: check this replacement is correct
return {row[unique_id_col]: (np.nan, np.nan)}
return {row[unique_id_col]: (np.nan, np.nan, np.nan)}

# Filter facilities within the specified destination zone
facilities_in_zone = facilities_gdf[
Expand Down Expand Up @@ -134,7 +134,7 @@ def _select_facility(
logger.debug(
f"Activity {row.name}: No facilities in zone {destination_zone} with {gdf_facility_type_col} '{fallback_type or row[row_activity_type_col]}'"
)
return {row[unique_id_col]: (np.nan, np.nan)}
return {row[unique_id_col]: (np.nan, np.nan, np.nan)}

# ----- Step 2. Sample a facility from the valid facilities

Expand All @@ -158,7 +158,11 @@ def _select_facility(

# Return the id and geometry of the selected facility
return {
row[unique_id_col]: (facility["id"].values[0], facility["geometry"].values[0])
row[unique_id_col]: (
facility["id"].values[0],
facility["linkId"].values[0],
facility["geometry"].values[0],
)
}


Expand Down Expand Up @@ -202,8 +206,9 @@ def select_facility(

Returns
-------
dict[str, Tuple[str, Point ] | Tuple[float, float]]: Unique ID column as
keys with selected facility ID and facility ID's geometry, or (np.nan, np.nan)
dict[str, Tuple[str, str, Point ] | Tuple[float, float, float]]: Unique ID column as
keys with selected facility ID, linkID (nearest road link), and facility ID's geometry,
or (np.nan, np.nan, np.nan)
"""
# TODO: check if this is deterministic for a given seed (or pass seed to pool)
with Pool(n_processes) as p:
Expand Down Expand Up @@ -255,15 +260,24 @@ def map_activity_locations(
-------
pd.DataFrame
DataFrame with mapped activity locations.
It adds the following columns:
- 'end_location_id': The ID of the facility
- 'end_location_linkID': The id of the nearest road link to the facility
- 'end_location_geometry': The geometry of the facility
"""
activity_chains_df["end_location_id"] = activity_chains_df[id_col].map(
lambda pid: activity_locations_dict[pid][0]
if pid in activity_locations_dict
else pd.NA
)
activity_chains_df["end_location_geometry"] = activity_chains_df[id_col].map(
activity_chains_df["end_location_link_id"] = activity_chains_df[id_col].map(
lambda pid: activity_locations_dict[pid][1]
if pid in activity_locations_dict
else pd.NA
)
activity_chains_df["end_location_geometry"] = activity_chains_df[id_col].map(
lambda pid: activity_locations_dict[pid][2]
if pid in activity_locations_dict
else pd.NA
)
return activity_chains_df
Loading