Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
4f60729
Migrate PlaceInfo to V2 API and add verification tests
SandeepTuniki Feb 18, 2026
27b5843
Clean up comments and fix bug in get_series_dates
SandeepTuniki Mar 2, 2026
8951e07
Untrack verify_live_migration.py to exclude from PR
SandeepTuniki Mar 2, 2026
e2a3d92
Address review comments: Fix V2 API parsing and optimize BFS
SandeepTuniki Mar 2, 2026
1c8e04d
Fix linting and formatting issues in V2 migration files
SandeepTuniki Mar 2, 2026
d3daa46
Merge remote-tracking branch 'origin/master' into migrate-place-info-v2
SandeepTuniki Mar 16, 2026
bcd6392
fix: prioritize specific place types over administrative areas
SandeepTuniki Mar 16, 2026
5740a97
fix: hardcode United States name for country/USA to match V1 behavior
SandeepTuniki Mar 16, 2026
f768f55
style: fix python lint errors using yapf
SandeepTuniki Mar 16, 2026
4fb6f36
fix: skip bogus places in get_place_info
SandeepTuniki Mar 16, 2026
8da012e
fix: hardcode New York City name for geoId/3651000
SandeepTuniki Mar 16, 2026
6bb1972
fix: use recursive traversal in get_series_dates and update unit tests
SandeepTuniki Mar 16, 2026
ba42524
revert: remove New York City hardcoded name
SandeepTuniki Mar 16, 2026
1029f44
feat: Update goldens from Cloud Build workflow (build 276e2d55-2442-4…
datacommons-robot-author Mar 18, 2026
ac9b296
Merge remote-tracking branch 'origin/master' into migrate-place-info-…
SandeepTuniki Mar 18, 2026
1665c9c
refactor: Remove hardcoded name override for country/USA
SandeepTuniki Mar 18, 2026
9804dfd
feat: Update goldens from Cloud Build workflow (build 89397212-0490-4…
datacommons-robot-author Mar 18, 2026
1f44722
Fix: remove unused json import in migration_verification_test.py
SandeepTuniki Mar 18, 2026
cd17e63
Refactor: remove unused endpoints and move PLACE_TYPE_RANK to constants
SandeepTuniki Mar 18, 2026
8e760dc
Fix: remove unused variable 'ent' in get_series_dates
SandeepTuniki Mar 18, 2026
6d7058e
Refactor: apply minor cleanups in datacommons.py
SandeepTuniki Mar 18, 2026
eddf2cf
Test: Update webdriver event page tests to expect United States of Am…
SandeepTuniki Mar 18, 2026
908c4a7
Fix: Python style formatting in get_series_dates
SandeepTuniki Mar 18, 2026
cc0afe3
Optimize: remove unused 'value' from v2observation in get_series_dates
SandeepTuniki Mar 18, 2026
26c1c55
Fix: apply Python style formatting for v2observation call
SandeepTuniki Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 211 additions & 11 deletions server/services/datacommons.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,6 @@ def v2event(node, prop):
return post(url, {"node": node, "property": prop})


def get_place_info(dcids: List[str]) -> Dict:
"""Retrieves Place Info given a list of DCIDs."""
url = get_service_url("/v1/bulk/info/place")
return post(f"{url}", {"nodes": sorted(set(dcids))})


def get_variable_group_info(nodes: List[str],
Expand Down Expand Up @@ -403,16 +399,220 @@ def get_variable_ancestors(dcid: str):
return get(url).get("ancestors", [])


PLACE_TYPE_RANK = {
"CensusZipCodeTabulationArea": 1,
"AdministrativeArea5": 2, "AdministrativeArea4": 2,
"Village": 5, "City": 5, "Town": 5, "Borough": 5, "AdministrativeArea3": 5,
"County": 10, "AdministrativeArea2": 10, "EurostatNUTS3": 10,
"CensusDivision": 15,
"State": 20, "AdministrativeArea1": 20, "EurostatNUTS2": 20, "EurostatNUTS1": 20,
"Country": 30,
"CensusRegion": 35, "GeoRegion": 38,
"Continent": 40,
"Place": 50,
}


def get_place_info(dcids: List[str]) -> Dict:
"""Retrieves Place Info given a list of DCIDs."""
# Get ancestors using BFS since v2/node doesn't support recursive ->containedInPlace+
ancestors_map = {dcid: set() for dcid in dcids}

parent_graph = {} # child_dcid -> list of parent_dcids
frontier = set(dcids)
visited = set()

# BFS to build parent graph (max depth 10)
for _ in range(10):
if not frontier:
break

# Filter visited nodes to avoid cycles
fetch_dcids = [d for d in frontier if d not in visited]
if not fetch_dcids:
break


resp = v2node(fetch_dcids, '->containedInPlace')
data = resp.get('data', {})

current_frontier = set()
for dcid in fetch_dcids:
visited.add(dcid)
node_data = data.get(dcid, {})

arcs_obj = node_data.get('arcs', {}).get('containedInPlace', {})
nodes_list = arcs_obj.get('nodes', []) if isinstance(arcs_obj, dict) else []

parents = [x['dcid'] for x in nodes_list if 'dcid' in x]
if parents:
parent_graph[dcid] = parents
current_frontier.update(parents)

frontier = current_frontier

# Build ancestors list from the graph
for dcid in dcids:
queue = [dcid]
seen = {dcid}
while queue:
curr = queue.pop(0)
parents = parent_graph.get(curr, [])
for p in parents:
if p not in seen:
seen.add(p)
# Add to ancestors if it's not the node itself
if p != dcid:
ancestors_map[dcid].add(p)
queue.append(p)

all_dcids = set()
for anc_set in ancestors_map.values():
all_dcids.update(anc_set)
all_dcids.update(dcids)

all_dcids_list = sorted(list(all_dcids))
if not all_dcids_list:
return {'data': []}

types_resp = v2node(all_dcids_list, '->typeOf')
names_resp = v2node(all_dcids_list, '->name')

def get_first_value(resp, dcid, prop, key='dcid'):
node_data = resp.get('data', {}).get(dcid, {})
arcs_obj = node_data.get('arcs', {}).get(prop, {})
if not arcs_obj:
# Try checking without arrow if key mismatch
arcs_obj = node_data.get('arcs', {}).get(prop.replace('->', ''), {})

nodes_list = arcs_obj.get('nodes', []) if isinstance(arcs_obj, dict) else []

if nodes_list:
return nodes_list[0].get(key, '')
return ''

result_data = []
for dcid in dcids:
self_type = get_first_value(types_resp, dcid, 'typeOf')
self_name = get_first_value(names_resp, dcid, 'name', 'value')

parents = []
for anc_dcid in ancestors_map.get(dcid, []):
if anc_dcid == dcid: continue

anc_type = get_first_value(types_resp, anc_dcid, 'typeOf')
anc_name = get_first_value(names_resp, anc_dcid, 'name', 'value')

if anc_type in PLACE_TYPE_RANK:
parents.append({
'dcid': anc_dcid,
'type': anc_type,
'name': anc_name,
'rank': PLACE_TYPE_RANK[anc_type]
})

parents.sort(key=lambda x: x['rank'])
for p in parents:
del p['rank']

result_data.append({
'node': dcid,
'info': {
'self': {'dcid': dcid, 'type': self_type, 'name': self_name},
'parents': parents
}
})

return {'data': result_data}


def get_series_dates(parent_entity, child_type, variables):
"""Get series dates."""
url = get_service_url("/v1/bulk/observation-dates/linked")
return post(
url, {
"linked_property": "containedInPlace",
"linked_entity": parent_entity,
"entity_type": child_type,
"variables": variables,
# Get direct children
children_resp = v2node([parent_entity], '<-containedInPlace')
child_dcids = []

node_data = children_resp.get('data', {}).get(parent_entity, {})
arcs = node_data.get('arcs', {}).get('containedInPlace', [])
possible_children = [x['dcid'] for x in arcs if 'dcid' in x]

# Filter by type if there are children
if possible_children:
# Filter children by requested type
type_resp = v2node(possible_children, 'typeOf')
for child in possible_children:
# Check node types
c_data = type_resp.get('data', {}).get(child, {})
c_types = c_data.get('arcs', {}).get('typeOf', [])
c_type_ids = [t.get('dcid') for t in c_types]
if child_type in c_type_ids:
child_dcids.append(child)

if not child_dcids:
return {"datesByVariable": [], "facets": {}}

# Get observation dates for the filtered children

obs_resp = v2observation(
select=['date', 'variable', 'entity', 'value', 'facet'],
entity={'dcids': child_dcids},
variable={'dcids': variables}
)

# Aggregate results
# Aggregate results: { variable: { date: { facet: count } } }
import collections
agg_data = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(int)))

# Iterate through V2 response
by_var = obs_resp.get('byVariable', {})
unique_facets = {} # facetId -> facetObj

all_facets = obs_resp.get('facets', {})

for var, var_data in by_var.items():
by_ent = var_data.get('byEntity', {})
for ent, ent_data in by_ent.items():

series = ent_data.get('series', [])
for obs in series:
date = obs.get('date')
if not date: continue

# Facet handling
facet_id = obs.get('facet', "")
agg_data[var][date][facet_id] += 1
# Assuming facets details are in 'facets' key of response?
# v2observation response should have 'facets' top level key if requested?
# 'facet' in select might return the ID in the series or the object?
# Usually it returns facetID and a top-level facets map.

# Construct response
resp_dates = []
for var, dates_map in agg_data.items():
obs_dates = []
for date, facet_counts in dates_map.items():
entity_counts = []
for facet_id, count in facet_counts.items():
entity_counts.append({
"count": count,
"facet": facet_id # V1 expects facet ID or object?
# V1 proto: EntityCount { count, facet } where facet is string (ID?).
# But typically it might expect the full facet object in a separate map.
})
obs_dates.append({
"date": date,
"entityCount": entity_counts
})
resp_dates.append({
"variable": var,
"observationDates": obs_dates
})

return {
"datesByVariable": resp_dates,
"facets": all_facets
}


def resolve(nodes, prop):
Expand Down
Loading
Loading