From 6698c76aa947715b1314b199a9e89bbc8cc1548e Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Mon, 9 Mar 2026 11:38:08 -0700 Subject: [PATCH 01/14] Implement initial metadata endpoint, moving functionality from V1 calls made directly via the frontend to a single collated Flask endpoint that returns the same data. Entities, now required, and now plumbed up from the various tile sources (except for the highlight). --- server/__init__.py | 3 + server/routes/shared_api/metadata.py | 375 ++++++++++++++++++ static/js/components/tiles/bar_tile.tsx | 1 + static/js/components/tiles/chart_tile.tsx | 4 + static/js/components/tiles/line_tile.tsx | 1 + static/js/components/tiles/map_tile.tsx | 11 + static/js/components/tiles/ranking_tile.tsx | 11 + static/js/components/tiles/scatter_tile.tsx | 1 + .../js/components/tiles/sv_ranking_units.tsx | 5 + static/js/place/chart_embed.tsx | 33 +- .../tools/shared/metadata/metadata_fetcher.ts | 57 +++ .../shared/metadata/tile_metadata_modal.tsx | 33 +- .../js/tools/shared/metadata/tile_sources.tsx | 4 + 13 files changed, 523 insertions(+), 16 deletions(-) create mode 100644 server/routes/shared_api/metadata.py diff --git a/server/__init__.py b/server/__init__.py index f8827cce3d..d0452c9da4 100644 --- a/server/__init__.py +++ b/server/__init__.py @@ -274,6 +274,9 @@ def register_routes_common(app): from server.routes.shared_api import variable_group as shared_variable_group app.register_blueprint(shared_variable_group.bp) + from server.routes.shared_api import metadata as shared_metadata + app.register_blueprint(shared_metadata.bp) + from server.routes.shared_api.observation import date as observation_date app.register_blueprint(observation_date.bp) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py new file mode 100644 index 0000000000..d8a0cc07a3 --- /dev/null +++ b/server/routes/shared_api/metadata.py @@ -0,0 +1,375 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import collections +import logging + +from flask import Blueprint +from flask import jsonify +from flask import request + +from server.services import datacommons as dc + +bp = Blueprint("metadata", __name__, url_prefix='/api/shared/metadata') + +MEASUREMENT_METHODS_SUPPRESSION_PROVENANCES = {"WikipediaStatsData"} + + +def title_case(string): + return " ".join([word.capitalize() for word in string.split("_")]) + + +def _get_arc_nodes(data_dict, node_id, arc_name): + """Extracts nodes for a given arc from a v2node response dictionary.""" + return data_dict.get('data', {}).get(node_id, + {}).get('arcs', + {}).get(arc_name, + {}).get('nodes', []) + + +def _get_node_name(node_list, linked_names_map): + """Helper to resolve a node's display name from either a literal value or linked reference.""" + if not node_list: + return None + node = node_list[0] + if 'value' in node: + return node['value'] + if 'dcid' in node: + return linked_names_map.get(node['dcid']) + return None + + +def _extract_active_facets(sv, obs_resp, stat_var_to_facets): + """Extracts active facets for a given stat var.""" + active_facets = stat_var_to_facets.get(sv, []) + if not active_facets: + by_entity = obs_resp.get('byVariable', {}).get(sv, + {}).get('byEntity', {}) + for ent_data in by_entity.values(): + for f in ent_data.get('orderedFacets', []): + active_facets.append(f.get('facetId')) + return list(set(active_facets)) + + +async def fetch_categories_async(stat_vars): + """Traverses the category hierarchy tree up to top-level topics.""" + parent_map = collections.defaultdict(list) + current_nodes = set(stat_vars) + visited = set() + depth = 0 + + while current_nodes and depth < 10: + visited.update(current_nodes) + + member_task = asyncio.to_thread(dc.v2node, list(current_nodes), + '->memberOf') + spec_task = asyncio.to_thread(dc.v2node, list(current_nodes), + '->specializationOf') + resp_member, resp_spec = await asyncio.gather(member_task, spec_task) + + next_nodes = set() + for node in current_nodes: + parents = set() + parents.update([ + n.get('dcid') + for n in _get_arc_nodes(resp_member, node, 'memberOf') + if n.get('dcid') + ]) + parents.update([ + n.get('dcid') + for n in _get_arc_nodes(resp_spec, node, 'specializationOf') + if n.get('dcid') + ]) + + parent_list = list(parents) + parent_map[node].extend(parent_list) + + for p in parent_list: + # Use visited set to prevent graph cycles + if p != 'dc/g/Root' and p not in visited: + next_nodes.add(p) + + current_nodes = next_nodes + depth += 1 + + sv_top_levels = collections.defaultdict(list) + all_top_level_dcids = set() + + for sv in stat_vars: + tops = set() + + def traverse(n, curr_visited): + if n in curr_visited: + return + curr_visited.add(n) + parents = parent_map.get(n, []) + valid_parents = [p for p in parents if p != 'dc/g/Root'] + + if not valid_parents: + if n != sv: + tops.add(n) + else: + for p in valid_parents: + traverse(p, curr_visited) + + traverse(sv, set()) + sv_top_levels[sv] = list(tops) + all_top_level_dcids.update(tops) + + category_map = {} + if all_top_level_dcids: + parent_name_resp = await asyncio.to_thread(dc.v2node, + list(all_top_level_dcids), + '->name') + parent_name_map = {} + for pid in all_top_level_dcids: + nodes = _get_arc_nodes(parent_name_resp, pid, 'name') + if nodes: + parent_name_map[pid] = nodes[0].get('value') + + for sv in stat_vars: + category_map[sv] = [ + parent_name_map.get(p) or p.split('/')[-1] + for p in sv_top_levels.get(sv, []) + ] + else: + category_map = {sv: [] for sv in stat_vars} + + return category_map + + +def _build_metadata_payload(stat_vars, stat_var_names, category_map, + sv_active_facets, v2_facets, facet_date_ranges, + prov_map, linked_names_map, mm_map, unit_map): + """Constructs the final aggregated metadata dictionary.""" + metadata_map = collections.defaultdict(list) + + for sv in stat_vars: + active_facets = sv_active_facets.get(sv, []) + + for fid in active_facets: + finfo = v2_facets.get(fid, {}) + import_name = finfo.get('importName') + if not import_name: + continue + + prov_id = f"dc/base/{import_name}" + pdata = prov_map.get(prov_id) + if not pdata: + continue + + date_ranges = facet_date_ranges.get(fid, {}) + unit = finfo.get('unit') + mm = finfo.get('measurementMethod') + + source_name = _get_node_name(pdata['source'], linked_names_map) + prov_name = _get_node_name(pdata['isPartOf'], linked_names_map) or \ + _get_node_name(pdata['name'], linked_names_map) or import_name + + mm_desc = None + if mm and prov_name not in MEASUREMENT_METHODS_SUPPRESSION_PROVENANCES: + mm_desc = mm_map.get(mm) or title_case(mm) + + resolved_unit = (unit_map.get(unit) or + unit.replace('_', ' ')) if unit else unit + license_name = _get_node_name(pdata['licenseType'], linked_names_map) + license_dcid = pdata['licenseType'][0].get('dcid') if pdata.get( + 'licenseType') else None + + metadata_map[sv].append({ + 'statVarId': + sv, + 'statVarName': + stat_var_names.get(sv, sv), + 'categories': + category_map.get(sv, []), + 'sourceName': + source_name, + 'provenanceUrl': + pdata.get('url')[0].get('value') if pdata.get('url') else None, + 'provenanceName': + prov_name, + 'dateRangeStart': + date_ranges.get('earliestDate'), + 'dateRangeEnd': + date_ranges.get('latestDate'), + 'unit': + resolved_unit, + 'observationPeriod': + finfo.get('observationPeriod'), + 'license': + license_name, + 'licenseDcid': + license_dcid, + 'measurementMethod': + mm, + 'measurementMethodDescription': + mm_desc + }) + + return metadata_map + + +@bp.route('', methods=['POST']) +async def get_metadata(): + # Input Validation + req_data = request.get_json(silent=True) + if not req_data: + return jsonify({'error': 'Must provide a valid JSON body'}), 400 + + entities = req_data.get('entities', []) + stat_vars = req_data.get('statVars', []) + stat_var_to_facets = req_data.get('statVarToFacets', {}) + frontend_facets = req_data.get('facets', {}) + + if not isinstance(entities, list) or not isinstance(stat_vars, list): + return jsonify({'error': 'entities and statVars must be lists'}), 400 + + if not entities or not stat_vars: + return jsonify({'metadata': {}, 'statVarList': []}) + + # Initial Data Fetching + try: + name_resp, obs_resp, category_map = await asyncio.gather( + asyncio.to_thread(dc.v2node, stat_vars, '->name'), + asyncio.to_thread(dc.v2observation, + select=['entity', 'variable', 'facet'], + entity={'dcids': entities}, + variable={'dcids': stat_vars}), + fetch_categories_async(stat_vars)) + except Exception as e: + logging.error(f"Failed to fetch primary metadata from DC: {e}") + return jsonify({'error': 'Failed to communicate with Data Commons service' + }), 502 + + # Process Stat Var Names into a lookup dictionary + stat_var_names = {} + stat_var_list = [] + if 'data' in name_resp: + for sv in stat_vars: + nodes = _get_arc_nodes(name_resp, sv, 'name') + name = nodes[0].get('value') if nodes else sv + stat_var_names[sv] = name + stat_var_list.append({"dcid": sv, "name": name}) + + # Collate active facets per stat var + sv_active_facets = { + sv: _extract_active_facets(sv, obs_resp, stat_var_to_facets) + for sv in stat_vars + } + + # Process Observations to determine dates and measurement methods/units/import names + v2_facets = obs_resp.get('facets', {}) + + # Merge the frontend's date-accurate facets so importNames are never dropped + for key, val in frontend_facets.items(): + if isinstance(val, dict) and 'importName' not in val: + v2_facets.update(val) + else: + v2_facets[key] = val + + facet_date_ranges = collections.defaultdict(dict) + provenance_endpoints, measurement_methods, units = set(), set(), set() + + for sv in stat_vars: + for fid in sv_active_facets[sv]: + # Aggregate measurement methods, units and import names + finfo = v2_facets.get(fid, {}) + if finfo.get('unit'): + units.add(finfo['unit']) + if finfo.get('measurementMethod'): + measurement_methods.add(finfo['measurementMethod']) + if finfo.get('importName'): + provenance_endpoints.add(f"dc/base/{finfo['importName']}") + + # Aggregate Date Ranges + by_entity = obs_resp.get('byVariable', {}).get(sv, + {}).get('byEntity', {}) + for ent_data in by_entity.values(): + for f in ent_data.get('orderedFacets', []): + if f.get('facetId') != fid: + continue + + earliest, latest = f.get('earliestDate'), f.get('latestDate') + if earliest and (not facet_date_ranges[fid].get('earliestDate') or + earliest < facet_date_ranges[fid]['earliestDate']): + facet_date_ranges[fid]['earliestDate'] = earliest + if latest and (not facet_date_ranges[fid].get('latestDate') or + latest > facet_date_ranges[fid]['latestDate']): + facet_date_ranges[fid]['latestDate'] = latest + + # Look up names and descriptions of provenances, measurement methods and units + try: + prov_res, mm_res, unit_res = await asyncio.gather( + asyncio.to_thread(dc.v2node, list(provenance_endpoints), '->*') + if provenance_endpoints else asyncio.sleep(0, result={}), + asyncio.to_thread(dc.v2node, list(measurement_methods), '->description') + if measurement_methods else asyncio.sleep(0, result={}), + asyncio.to_thread(dc.v2node, list(units), '->name') + if units else asyncio.sleep(0, result={})) + except Exception as e: + logging.error(f"Failed to fetch secondary metadata from DC: {e}") + return jsonify({'error': 'Failed to resolve secondary node data'}), 502 + + # Process secondary lookups + prov_map = {} + linked_prov_dcids = set() + + if 'data' in prov_res: + for dcid, node_data in prov_res['data'].items(): + prov_map[dcid] = { + 'source': _get_arc_nodes(prov_res, dcid, 'source'), + 'isPartOf': _get_arc_nodes(prov_res, dcid, 'isPartOf'), + 'name': _get_arc_nodes(prov_res, dcid, 'name'), + 'url': _get_arc_nodes(prov_res, dcid, 'url'), + 'licenseType': _get_arc_nodes(prov_res, dcid, 'licenseType'), + } + # Collect DCIDs of linked entities for human-readable resolution + for n in prov_map[dcid]['source'] + prov_map[dcid]['isPartOf'] + prov_map[ + dcid]['licenseType']: + if 'dcid' in n: + linked_prov_dcids.add(n['dcid']) + + linked_names_map = {} + if linked_prov_dcids: + try: + linked_names_resp = await asyncio.to_thread(dc.v2node, + list(linked_prov_dcids), + '->name') + for n_dcid in linked_prov_dcids: + n_arcs = _get_arc_nodes(linked_names_resp, n_dcid, 'name') + if n_arcs: + linked_names_map[n_dcid] = n_arcs[0].get('value') + except Exception as e: + logging.error(f"Failed to resolve linked provenance names: {e}") + + mm_map = { + mm: _get_arc_nodes(mm_res, mm, 'description')[0].get('value') + for mm in measurement_methods + if _get_arc_nodes(mm_res, mm, 'description') + } + unit_map = { + u: _get_arc_nodes(unit_res, u, 'name')[0].get('value') + for u in units + if _get_arc_nodes(unit_res, u, 'name') + } + + # Assemble and return the final response + metadata_map = _build_metadata_payload(stat_vars, stat_var_names, + category_map, sv_active_facets, + v2_facets, facet_date_ranges, prov_map, + linked_names_map, mm_map, unit_map) + + return jsonify({'metadata': metadata_map, 'statVarList': stat_var_list}) \ No newline at end of file diff --git a/static/js/components/tiles/bar_tile.tsx b/static/js/components/tiles/bar_tile.tsx index 2e20cb45ff..430317b5d4 100644 --- a/static/js/components/tiles/bar_tile.tsx +++ b/static/js/components/tiles/bar_tile.tsx @@ -269,6 +269,7 @@ export function BarTile(props: BarTilePropType): ReactElement { facets={barChartData?.facets} statVarToFacets={barChartData?.statVarToFacets} subtitle={props.subtitle} + entities={"places" in props ? props.places : [props.parentPlace]} title={props.title} statVarSpecs={props.variables} forwardRef={containerRef} diff --git a/static/js/components/tiles/chart_tile.tsx b/static/js/components/tiles/chart_tile.tsx index a562eaeef1..b51e669d9c 100644 --- a/static/js/components/tiles/chart_tile.tsx +++ b/static/js/components/tiles/chart_tile.tsx @@ -53,6 +53,8 @@ interface ChartTileContainerProp { statVarToFacets?: StatVarFacetMap; // A map of stat var dcids to their specific min and max date range from the chart statVarDateRanges?: Record; + // A list of entities used within the chart + entities?: string[]; children: React.ReactNode; replacementStrings: ReplacementStrings; // Whether or not to allow chart embedding action. @@ -126,6 +128,7 @@ export function ChartTileContainer( {showSources && ( + mapChartData + ? mapChartData.layerData.flatMap((layer) => + Object.keys(layer.dataValues || {}) + ) + : [], + [mapChartData] + ); + useEffect(() => { if (props.lazyLoad && !shouldLoad) { return; @@ -378,6 +388,7 @@ export function MapTile(props: MapTilePropType): ReactElement { return ( + rankingData + ? Object.values(rankingData).flatMap((svData) => + svData.points.map((p) => p.placeDcid) + ) + : [], + [rankingData] + ); + /** * Opens export modal window */ @@ -342,6 +352,7 @@ export function RankingTile(props: RankingTilePropType): ReactElement { facets={allFacets} statVarToFacets={allStatVarToFacets} apiRoot={props.apiRoot} + entities={entities} /> ); diff --git a/static/js/components/tiles/scatter_tile.tsx b/static/js/components/tiles/scatter_tile.tsx index d80b2850d3..29e2ca93a9 100644 --- a/static/js/components/tiles/scatter_tile.tsx +++ b/static/js/components/tiles/scatter_tile.tsx @@ -250,6 +250,7 @@ export function ScatterTile(props: ScatterTilePropType): ReactElement { getObservationSpecs={getObservationSpecs} errorMsg={scatterChartData && scatterChartData.errorMsg} id={props.id} + entities={[props.place.dcid]} isInitialLoading={_.isNull(scatterChartData)} isLoading={isLoading} replacementStrings={getReplacementStrings(props, scatterChartData)} diff --git a/static/js/components/tiles/sv_ranking_units.tsx b/static/js/components/tiles/sv_ranking_units.tsx index 66f6036c39..a1e2eb373a 100644 --- a/static/js/components/tiles/sv_ranking_units.tsx +++ b/static/js/components/tiles/sv_ranking_units.tsx @@ -370,6 +370,10 @@ export function getRankingUnit( rankingGroup, enableScroll ); + const entities = [ + ...(topPoints || []).map((p) => p.placeDcid), + ...(bottomPoints || []).map((p) => p.placeDcid), + ]; const title = getRankingUnitTitle( tileConfigTitle, rankingMetadata, @@ -401,6 +405,7 @@ export function getRankingUnit( containerRef={containerRef} sources={sources || rankingGroup.sources} facets={rankingGroup.facets} + entities={entities} statVarToFacets={rankingGroup.statVarToFacets} statVarSpecs={statVarSpecs} surface={surface} diff --git a/static/js/place/chart_embed.tsx b/static/js/place/chart_embed.tsx index 9acc0ac7ed..0a70a3ebc0 100644 --- a/static/js/place/chart_embed.tsx +++ b/static/js/place/chart_embed.tsx @@ -55,7 +55,10 @@ import { buildCitationParts, CitationPart, } from "../tools/shared/metadata/citations"; -import { fetchMetadata } from "../tools/shared/metadata/metadata_fetcher"; +import { + fetchMetadata, + fetchMetadataV2, +} from "../tools/shared/metadata/metadata_fetcher"; import { getDataCommonsClient } from "../utils/data_commons_client"; // SVG adjustment related constants @@ -70,6 +73,7 @@ interface ChartEmbedPropsType { container?: HTMLElement; statVarSpecs?: StatVarSpec[]; facets?: Record; + entities?: string[]; statVarToFacets?: StatVarFacetMap; // A map of stat var dcids to their specific min and max date range from the chart statVarDateRanges?: Record; @@ -415,13 +419,26 @@ class ChartEmbed extends React.Component< return []; } const dataCommonsClient = getDataCommonsClient(apiRoot, surface); - const metadataResp = await fetchMetadata( - statVarSet, - facets, - dataCommonsClient, - statVarToFacets, - apiRoot - ); + + let metadataResp; + if (this.props.entities && this.props.entities.length > 0) { + metadataResp = await fetchMetadataV2( + this.props.entities, + statVarSet, + statVarToFacets, + apiRoot, + facets + ); + } else { + metadataResp = await fetchMetadata( + statVarSet, + facets, + dataCommonsClient, + statVarToFacets, + apiRoot + ); + } + return buildCitationParts( metadataResp.statVarList, metadataResp.metadata, diff --git a/static/js/tools/shared/metadata/metadata_fetcher.ts b/static/js/tools/shared/metadata/metadata_fetcher.ts index 6c8f8fe181..5256f93630 100644 --- a/static/js/tools/shared/metadata/metadata_fetcher.ts +++ b/static/js/tools/shared/metadata/metadata_fetcher.ts @@ -528,3 +528,60 @@ export async function fetchMetadata( return { metadata, statVarList }; } + +//TODO (nick-nlb): Once metadata migration is complete remove old endpoint and remove "V2" from this one. + +/** + * Function to fetch comprehensive metadata for a list of entities and stat vars. + * This version utilizes a consolidated backend API endpoint that contains no + * V1 calls. + * + * @param entities - Array of entity DCIDs to fetch metadata for + * @param statVarSet - Set of stat var DCIDs to fetch metadata for + * @param statVarToFacets - Optional mapping of stat vars to their facets + * @param apiRoot - Optional API root URL for requests + * @param facets - Optional map of the facet id to StatMetadata + * @returns Promise resolving to an object containing two attributes, metadata and statVarList. + * The metadata attribute is a mapping of stat var ids to metadata. + * The statVarList is list of stat var nodes containing full names. + */ +export async function fetchMetadataV2( + entities: string[], + statVarSet: Set, + statVarToFacets?: StatVarFacetMap, + apiRoot?: string, + facets?: Record +): Promise<{ + metadata: Record; + statVarList: NamedNode[]; +}> { + const statVars = [...statVarSet]; + if (!statVars.length) return { metadata: {}, statVarList: [] }; + + const convertedStatVarToFacets: Record = {}; + if (statVarToFacets) { + for (const [sv, facetSet] of Object.entries(statVarToFacets)) { + convertedStatVarToFacets[sv] = Array.isArray(facetSet) + ? facetSet + : Array.from(facetSet); + } + } + + const response = await fetch(`${apiRoot || ""}/api/shared/metadata`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + entities, + statVars, + statVarToFacets: convertedStatVarToFacets, + facets, + }), + }); + + if (!response.ok) { + console.error("Failed to fetch metadata", await response.text()); + return { metadata: {}, statVarList: [] }; + } + + return response.json(); +} diff --git a/static/js/tools/shared/metadata/tile_metadata_modal.tsx b/static/js/tools/shared/metadata/tile_metadata_modal.tsx index cd1f274c6a..b374212a8c 100644 --- a/static/js/tools/shared/metadata/tile_metadata_modal.tsx +++ b/static/js/tools/shared/metadata/tile_metadata_modal.tsx @@ -42,7 +42,7 @@ import { NamedNode, StatVarFacetMap, StatVarSpec } from "../../../shared/types"; import { getDataCommonsClient } from "../../../utils/data_commons_client"; import { buildCitationParts, citationToPlainText } from "./citations"; import { StatVarMetadata } from "./metadata"; -import { fetchMetadata } from "./metadata_fetcher"; +import { fetchMetadata, fetchMetadataV2 } from "./metadata_fetcher"; import { TileMetadataModalContent } from "./tile_metadata_modal_content"; interface TileMetadataModalPropType { @@ -57,6 +57,8 @@ interface TileMetadataModalPropType { containerRef?: React.RefObject; // root URL used to generate stat var explorer and license links apiRoot?: string; + // array of entity dcids to use for fetching + entities?: string[]; // used in mixer usage logs. Indicates which surface (website, web components, etc) is making the call. surface: string; } @@ -104,13 +106,27 @@ export function TileMetadataModal( setLoading(true); setError(false); - fetchMetadata( - statVarSet, - props.facets, - dataCommonsClient, - props.statVarToFacets, - props.apiRoot - ) + + let fetchPromise; + if (props.entities && props.entities.length > 0) { + fetchPromise = fetchMetadataV2( + props.entities, + statVarSet, + props.statVarToFacets, + props.apiRoot, + props.facets + ); + } else { + fetchPromise = fetchMetadata( + statVarSet, + props.facets, + dataCommonsClient, + props.statVarToFacets, + props.apiRoot + ); + } + + fetchPromise .then((resp) => { // Sort stat vars: non-denominators first, then denominators. // Secondary sort is alphabetical. @@ -143,6 +159,7 @@ export function TileMetadataModal( props.apiRoot, props.statVarToFacets, props.facets, + props.entities, denomStatVarDcids, ]); diff --git a/static/js/tools/shared/metadata/tile_sources.tsx b/static/js/tools/shared/metadata/tile_sources.tsx index bf463e7356..c85dd7d79f 100644 --- a/static/js/tools/shared/metadata/tile_sources.tsx +++ b/static/js/tools/shared/metadata/tile_sources.tsx @@ -47,6 +47,8 @@ export function TileSources(props: { // the detailed metadata modal. If not supplied, we fall back to a simple // modal display using the sources. facets?: Record; + // Array of entity dcids to use for fetching + entities?: string[]; // A mapping of which stat var used which facets statVarToFacets?: StatVarFacetMap; // If available, the stat vars to link to. @@ -67,6 +69,7 @@ export function TileSources(props: { surface: string; }): ReactElement { const { + entities, facets, statVarToFacets, statVarSpecs, @@ -129,6 +132,7 @@ export function TileSources(props: { {facets && statVarToFacets ? ( Date: Mon, 9 Mar 2026 11:43:39 -0700 Subject: [PATCH 02/14] Entities now being passed up from the highlight tile. --- static/js/components/tiles/highlight_tile.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/static/js/components/tiles/highlight_tile.tsx b/static/js/components/tiles/highlight_tile.tsx index a05b99416a..5ef7aa001f 100644 --- a/static/js/components/tiles/highlight_tile.tsx +++ b/static/js/components/tiles/highlight_tile.tsx @@ -197,6 +197,7 @@ export function HighlightTile(props: HighlightTilePropType): ReactElement { statVarSpecs={[props.statVarSpec]} getObservationSpecs={getObservationSpecs} surface={props.surface} + entities={[props.place.dcid]} /> )} From 4c4e1d04caad683ffdb61625711f97bc09f4dfa8 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 16:08:22 -0700 Subject: [PATCH 03/14] Update facet handling so that the frontend metadata entries sent to the backend are used in a filter applied directly to the V2 call. --- server/routes/shared_api/metadata.py | 33 +++++++++++++--------------- server/services/datacommons.py | 11 ++++++---- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index d8a0cc07a3..d76e55762c 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -95,7 +95,7 @@ async def fetch_categories_async(stat_vars): parent_list = list(parents) parent_map[node].extend(parent_list) - + for p in parent_list: # Use visited set to prevent graph cycles if p != 'dc/g/Root' and p not in visited: @@ -151,7 +151,7 @@ def traverse(n, curr_visited): def _build_metadata_payload(stat_vars, stat_var_names, category_map, - sv_active_facets, v2_facets, facet_date_ranges, + sv_active_facets, facets, facet_date_ranges, prov_map, linked_names_map, mm_map, unit_map): """Constructs the final aggregated metadata dictionary.""" metadata_map = collections.defaultdict(list) @@ -160,7 +160,7 @@ def _build_metadata_payload(stat_vars, stat_var_names, category_map, active_facets = sv_active_facets.get(sv, []) for fid in active_facets: - finfo = v2_facets.get(fid, {}) + finfo = facets.get(fid, {}) import_name = finfo.get('importName') if not import_name: continue @@ -241,13 +241,18 @@ async def get_metadata(): return jsonify({'metadata': {}, 'statVarList': []}) # Initial Data Fetching + v2obs_kwargs = { + 'select': ['entity', 'variable', 'facet'], + 'entity': {'dcids': entities}, + 'variable': {'dcids': stat_vars} + } + if frontend_facets: + v2obs_kwargs['filter'] = {'facetIds': list(frontend_facets.keys())} + try: name_resp, obs_resp, category_map = await asyncio.gather( asyncio.to_thread(dc.v2node, stat_vars, '->name'), - asyncio.to_thread(dc.v2observation, - select=['entity', 'variable', 'facet'], - entity={'dcids': entities}, - variable={'dcids': stat_vars}), + asyncio.to_thread(dc.v2observation, **v2obs_kwargs), fetch_categories_async(stat_vars)) except Exception as e: logging.error(f"Failed to fetch primary metadata from DC: {e}") @@ -270,15 +275,7 @@ async def get_metadata(): for sv in stat_vars } - # Process Observations to determine dates and measurement methods/units/import names - v2_facets = obs_resp.get('facets', {}) - - # Merge the frontend's date-accurate facets so importNames are never dropped - for key, val in frontend_facets.items(): - if isinstance(val, dict) and 'importName' not in val: - v2_facets.update(val) - else: - v2_facets[key] = val + facets = obs_resp.get('facets', {}) facet_date_ranges = collections.defaultdict(dict) provenance_endpoints, measurement_methods, units = set(), set(), set() @@ -286,7 +283,7 @@ async def get_metadata(): for sv in stat_vars: for fid in sv_active_facets[sv]: # Aggregate measurement methods, units and import names - finfo = v2_facets.get(fid, {}) + finfo = facets.get(fid, {}) if finfo.get('unit'): units.add(finfo['unit']) if finfo.get('measurementMethod'): @@ -369,7 +366,7 @@ async def get_metadata(): # Assemble and return the final response metadata_map = _build_metadata_payload(stat_vars, stat_var_names, category_map, sv_active_facets, - v2_facets, facet_date_ranges, prov_map, + facets, facet_date_ranges, prov_map, linked_names_map, mm_map, unit_map) return jsonify({'metadata': metadata_map, 'statVarList': stat_var_list}) \ No newline at end of file diff --git a/server/services/datacommons.py b/server/services/datacommons.py index 72f753e307..5cfd201559 100644 --- a/server/services/datacommons.py +++ b/server/services/datacommons.py @@ -268,13 +268,13 @@ def point_within_facet(parent_entity, child_type, variables, date): }) -def v2observation(select, entity, variable): +def v2observation(select, entity, variable, filter=None): """ Args: select: A list of select props. entity: A dict in the form of {'dcids':, 'expression':} variable: A dict in the form of {'dcids':, 'expression':} - + filter: Optional dict in the form of {'facetIds': [...]} etc. """ # Remove None from dcids and sort them. Note do not sort in place to avoid # changing the original input. @@ -283,11 +283,14 @@ def v2observation(select, entity, variable): if "dcids" in variable: variable["dcids"] = sorted([x for x in variable["dcids"] if x]) url = get_service_url("/v2/observation") - return post(url, { + req = { "select": select, "entity": entity, "variable": variable, - }) + } + if filter: + req["filter"] = filter + return post(url, req) def v2node(nodes, prop): From 21c4618136ad009b5723e35aae0ccd90c03ead13 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 16:17:59 -0700 Subject: [PATCH 04/14] Update the metadata endpoint to accept a list of facet ids rather than the full facet to attribute mapping (as the endpoint is just discarding that information). --- server/routes/shared_api/metadata.py | 4 ++-- static/js/tools/shared/metadata/metadata_fetcher.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index d76e55762c..1b34f1f47a 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -232,7 +232,7 @@ async def get_metadata(): entities = req_data.get('entities', []) stat_vars = req_data.get('statVars', []) stat_var_to_facets = req_data.get('statVarToFacets', {}) - frontend_facets = req_data.get('facets', {}) + frontend_facets = req_data.get('facets', []) if not isinstance(entities, list) or not isinstance(stat_vars, list): return jsonify({'error': 'entities and statVars must be lists'}), 400 @@ -247,7 +247,7 @@ async def get_metadata(): 'variable': {'dcids': stat_vars} } if frontend_facets: - v2obs_kwargs['filter'] = {'facetIds': list(frontend_facets.keys())} + v2obs_kwargs['filter'] = {'facetIds': frontend_facets} try: name_resp, obs_resp, category_map = await asyncio.gather( diff --git a/static/js/tools/shared/metadata/metadata_fetcher.ts b/static/js/tools/shared/metadata/metadata_fetcher.ts index 5256f93630..a705cf7ca0 100644 --- a/static/js/tools/shared/metadata/metadata_fetcher.ts +++ b/static/js/tools/shared/metadata/metadata_fetcher.ts @@ -574,7 +574,7 @@ export async function fetchMetadataV2( entities, statVars, statVarToFacets: convertedStatVarToFacets, - facets, + facets: facets ? Object.keys(facets) : undefined, }), }); From c27b0ba93ebb7a94af176393c74860819a5365ab Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 16:18:24 -0700 Subject: [PATCH 05/14] Cleanup of some of the entity collation code to dedup. --- static/js/components/tiles/map_tile.tsx | 8 ++++++-- static/js/components/tiles/ranking_tile.tsx | 10 +++++++--- static/js/components/tiles/sv_ranking_units.tsx | 10 ++++++---- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/static/js/components/tiles/map_tile.tsx b/static/js/components/tiles/map_tile.tsx index 1289ff997e..9c709597ff 100644 --- a/static/js/components/tiles/map_tile.tsx +++ b/static/js/components/tiles/map_tile.tsx @@ -246,8 +246,12 @@ export function MapTile(props: MapTilePropType): ReactElement { const entities = useMemo( () => mapChartData - ? mapChartData.layerData.flatMap((layer) => - Object.keys(layer.dataValues || {}) + ? Array.from( + new Set( + mapChartData.layerData.flatMap((layer) => + Object.keys(layer.dataValues || {}) + ) + ) ) : [], [mapChartData] diff --git a/static/js/components/tiles/ranking_tile.tsx b/static/js/components/tiles/ranking_tile.tsx index bd71be4f53..9244762535 100644 --- a/static/js/components/tiles/ranking_tile.tsx +++ b/static/js/components/tiles/ranking_tile.tsx @@ -242,9 +242,13 @@ export function RankingTile(props: RankingTilePropType): ReactElement { const entities = useMemo( () => rankingData - ? Object.values(rankingData).flatMap((svData) => - svData.points.map((p) => p.placeDcid) - ) + ? Array.from( + new Set( + Object.values(rankingData).flatMap((svData) => + svData.points.map((p) => p.placeDcid) + ) + ) + ) : [], [rankingData] ); diff --git a/static/js/components/tiles/sv_ranking_units.tsx b/static/js/components/tiles/sv_ranking_units.tsx index a1e2eb373a..f89a6412c4 100644 --- a/static/js/components/tiles/sv_ranking_units.tsx +++ b/static/js/components/tiles/sv_ranking_units.tsx @@ -370,10 +370,12 @@ export function getRankingUnit( rankingGroup, enableScroll ); - const entities = [ - ...(topPoints || []).map((p) => p.placeDcid), - ...(bottomPoints || []).map((p) => p.placeDcid), - ]; + const entities = Array.from( + new Set([ + ...(topPoints || []).map((p) => p.placeDcid), + ...(bottomPoints || []).map((p) => p.placeDcid), + ]) + ); const title = getRankingUnitTitle( tileConfigTitle, rankingMetadata, From b77fa0d49d0a2dcea19c1f3296aac15d9c1382f6 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 16:27:57 -0700 Subject: [PATCH 06/14] Apply lint to metadata endpoint and small improvements. --- server/routes/shared_api/metadata.py | 38 +++++++++++++++------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index 1b34f1f47a..f038fcf5ed 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -53,10 +53,9 @@ def _get_node_name(node_list, linked_names_map): def _extract_active_facets(sv, obs_resp, stat_var_to_facets): """Extracts active facets for a given stat var.""" - active_facets = stat_var_to_facets.get(sv, []) + active_facets = list(stat_var_to_facets.get(sv, [])) if not active_facets: - by_entity = obs_resp.get('byVariable', {}).get(sv, - {}).get('byEntity', {}) + by_entity = obs_resp.get('byVariable', {}).get(sv, {}).get('byEntity', {}) for ent_data in by_entity.values(): for f in ent_data.get('orderedFacets', []): active_facets.append(f.get('facetId')) @@ -95,7 +94,7 @@ async def fetch_categories_async(stat_vars): parent_list = list(parents) parent_map[node].extend(parent_list) - + for p in parent_list: # Use visited set to prevent graph cycles if p != 'dc/g/Root' and p not in visited: @@ -243,19 +242,23 @@ async def get_metadata(): # Initial Data Fetching v2obs_kwargs = { 'select': ['entity', 'variable', 'facet'], - 'entity': {'dcids': entities}, - 'variable': {'dcids': stat_vars} + 'entity': { + 'dcids': entities + }, + 'variable': { + 'dcids': stat_vars + } } if frontend_facets: - v2obs_kwargs['filter'] = {'facetIds': frontend_facets} + v2obs_kwargs['filter'] = {'facetIds': frontend_facets} try: name_resp, obs_resp, category_map = await asyncio.gather( asyncio.to_thread(dc.v2node, stat_vars, '->name'), asyncio.to_thread(dc.v2observation, **v2obs_kwargs), fetch_categories_async(stat_vars)) - except Exception as e: - logging.error(f"Failed to fetch primary metadata from DC: {e}") + except Exception: + logging.exception("Failed to fetch primary metadata from DC") return jsonify({'error': 'Failed to communicate with Data Commons service' }), 502 @@ -292,8 +295,7 @@ async def get_metadata(): provenance_endpoints.add(f"dc/base/{finfo['importName']}") # Aggregate Date Ranges - by_entity = obs_resp.get('byVariable', {}).get(sv, - {}).get('byEntity', {}) + by_entity = obs_resp.get('byVariable', {}).get(sv, {}).get('byEntity', {}) for ent_data in by_entity.values(): for f in ent_data.get('orderedFacets', []): if f.get('facetId') != fid: @@ -316,8 +318,8 @@ async def get_metadata(): if measurement_methods else asyncio.sleep(0, result={}), asyncio.to_thread(dc.v2node, list(units), '->name') if units else asyncio.sleep(0, result={})) - except Exception as e: - logging.error(f"Failed to fetch secondary metadata from DC: {e}") + except Exception: + logging.exception("Failed to fetch secondary metadata from DC") return jsonify({'error': 'Failed to resolve secondary node data'}), 502 # Process secondary lookups @@ -349,8 +351,8 @@ async def get_metadata(): n_arcs = _get_arc_nodes(linked_names_resp, n_dcid, 'name') if n_arcs: linked_names_map[n_dcid] = n_arcs[0].get('value') - except Exception as e: - logging.error(f"Failed to resolve linked provenance names: {e}") + except Exception: + logging.exception("Failed to resolve linked provenance names") mm_map = { mm: _get_arc_nodes(mm_res, mm, 'description')[0].get('value') @@ -365,8 +367,8 @@ async def get_metadata(): # Assemble and return the final response metadata_map = _build_metadata_payload(stat_vars, stat_var_names, - category_map, sv_active_facets, - facets, facet_date_ranges, prov_map, + category_map, sv_active_facets, facets, + facet_date_ranges, prov_map, linked_names_map, mm_map, unit_map) - return jsonify({'metadata': metadata_map, 'statVarList': stat_var_list}) \ No newline at end of file + return jsonify({'metadata': metadata_map, 'statVarList': stat_var_list}) From fc2ece2a6227c9cec4fe0a71b35fff3882617b69 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 16:31:47 -0700 Subject: [PATCH 07/14] Remove need for sleep via a wrapper function --- server/routes/shared_api/metadata.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index f038fcf5ed..914b583639 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -221,6 +221,13 @@ def _build_metadata_payload(stat_vars, stat_var_names, category_map, return metadata_map +async def _fetch_node_data(dcids, prop): + """Helper to fetch node data only if the list of DCIDs is not empty.""" + if not dcids: + return {} + return await asyncio.to_thread(dc.v2node, list(dcids), prop) + + @bp.route('', methods=['POST']) async def get_metadata(): # Input Validation @@ -312,12 +319,9 @@ async def get_metadata(): # Look up names and descriptions of provenances, measurement methods and units try: prov_res, mm_res, unit_res = await asyncio.gather( - asyncio.to_thread(dc.v2node, list(provenance_endpoints), '->*') - if provenance_endpoints else asyncio.sleep(0, result={}), - asyncio.to_thread(dc.v2node, list(measurement_methods), '->description') - if measurement_methods else asyncio.sleep(0, result={}), - asyncio.to_thread(dc.v2node, list(units), '->name') - if units else asyncio.sleep(0, result={})) + _fetch_node_data(provenance_endpoints, '->*'), + _fetch_node_data(measurement_methods, '->description'), + _fetch_node_data(units, '->name')) except Exception: logging.exception("Failed to fetch secondary metadata from DC") return jsonify({'error': 'Failed to resolve secondary node data'}), 502 From bcc0c07fe4daffddccd0f4bc750cc53d8d14e4fe Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 16:58:07 -0700 Subject: [PATCH 08/14] Add type hints to metadata.py --- server/routes/shared_api/metadata.py | 67 +++++++++++++++++----------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index 914b583639..97fabbfd74 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -15,23 +15,26 @@ import asyncio import collections import logging +from typing import Any from flask import Blueprint from flask import jsonify from flask import request +from flask import Response from server.services import datacommons as dc bp = Blueprint("metadata", __name__, url_prefix='/api/shared/metadata') -MEASUREMENT_METHODS_SUPPRESSION_PROVENANCES = {"WikipediaStatsData"} +MEASUREMENT_METHODS_SUPPRESSION_PROVENANCES: set[str] = {"WikipediaStatsData"} -def title_case(string): +def title_case(string: str) -> str: return " ".join([word.capitalize() for word in string.split("_")]) -def _get_arc_nodes(data_dict, node_id, arc_name): +def _get_arc_nodes(data_dict: dict[str, Any], node_id: str, + arc_name: str) -> list[dict[str, Any]]: """Extracts nodes for a given arc from a v2node response dictionary.""" return data_dict.get('data', {}).get(node_id, {}).get('arcs', @@ -39,7 +42,8 @@ def _get_arc_nodes(data_dict, node_id, arc_name): {}).get('nodes', []) -def _get_node_name(node_list, linked_names_map): +def _get_node_name(node_list: list[dict[str, Any]], + linked_names_map: dict[str, str]) -> str | None: """Helper to resolve a node's display name from either a literal value or linked reference.""" if not node_list: return None @@ -51,7 +55,9 @@ def _get_node_name(node_list, linked_names_map): return None -def _extract_active_facets(sv, obs_resp, stat_var_to_facets): +def _extract_active_facets( + sv: str, obs_resp: dict[str, Any], + stat_var_to_facets: dict[str, list[str]]) -> list[str]: """Extracts active facets for a given stat var.""" active_facets = list(stat_var_to_facets.get(sv, [])) if not active_facets: @@ -62,7 +68,7 @@ def _extract_active_facets(sv, obs_resp, stat_var_to_facets): return list(set(active_facets)) -async def fetch_categories_async(stat_vars): +async def fetch_categories_async(stat_vars: list[str]) -> dict[str, list[str]]: """Traverses the category hierarchy tree up to top-level topics.""" parent_map = collections.defaultdict(list) current_nodes = set(stat_vars) @@ -109,7 +115,7 @@ async def fetch_categories_async(stat_vars): for sv in stat_vars: tops = set() - def traverse(n, curr_visited): + def traverse(n: str, curr_visited: set[str]) -> None: if n in curr_visited: return curr_visited.add(n) @@ -127,7 +133,7 @@ def traverse(n, curr_visited): sv_top_levels[sv] = list(tops) all_top_level_dcids.update(tops) - category_map = {} + category_map: dict[str, list[str]] = {} if all_top_level_dcids: parent_name_resp = await asyncio.to_thread(dc.v2node, list(all_top_level_dcids), @@ -149,9 +155,14 @@ def traverse(n, curr_visited): return category_map -def _build_metadata_payload(stat_vars, stat_var_names, category_map, - sv_active_facets, facets, facet_date_ranges, - prov_map, linked_names_map, mm_map, unit_map): +def _build_metadata_payload( + stat_vars: list[str], stat_var_names: dict[str, str], + category_map: dict[str, list[str]], sv_active_facets: dict[str, list[str]], + facets: dict[str, Any], facet_date_ranges: dict[str, dict[str, str]], + prov_map: dict[str, dict[str, Any]], linked_names_map: dict[str, str], + mm_map: dict[str, + str], unit_map: dict[str, + str]) -> dict[str, list[dict[str, Any]]]: """Constructs the final aggregated metadata dictionary.""" metadata_map = collections.defaultdict(list) @@ -221,7 +232,7 @@ def _build_metadata_payload(stat_vars, stat_var_names, category_map, return metadata_map -async def _fetch_node_data(dcids, prop): +async def _fetch_node_data(dcids: set[str], prop: str) -> dict[str, Any]: """Helper to fetch node data only if the list of DCIDs is not empty.""" if not dcids: return {} @@ -229,16 +240,16 @@ async def _fetch_node_data(dcids, prop): @bp.route('', methods=['POST']) -async def get_metadata(): +async def get_metadata() -> tuple[Response, int] | Response: # Input Validation req_data = request.get_json(silent=True) if not req_data: return jsonify({'error': 'Must provide a valid JSON body'}), 400 - entities = req_data.get('entities', []) - stat_vars = req_data.get('statVars', []) - stat_var_to_facets = req_data.get('statVarToFacets', {}) - frontend_facets = req_data.get('facets', []) + entities: list[str] = req_data.get('entities', []) + stat_vars: list[str] = req_data.get('statVars', []) + stat_var_to_facets: dict[str, list[str]] = req_data.get('statVarToFacets', {}) + frontend_facets: list[str] = req_data.get('facets', []) if not isinstance(entities, list) or not isinstance(stat_vars, list): return jsonify({'error': 'entities and statVars must be lists'}), 400 @@ -270,8 +281,8 @@ async def get_metadata(): }), 502 # Process Stat Var Names into a lookup dictionary - stat_var_names = {} - stat_var_list = [] + stat_var_names: dict[str, str] = {} + stat_var_list: list[dict[str, str]] = [] if 'data' in name_resp: for sv in stat_vars: nodes = _get_arc_nodes(name_resp, sv, 'name') @@ -280,15 +291,17 @@ async def get_metadata(): stat_var_list.append({"dcid": sv, "name": name}) # Collate active facets per stat var - sv_active_facets = { + sv_active_facets: dict[str, list[str]] = { sv: _extract_active_facets(sv, obs_resp, stat_var_to_facets) for sv in stat_vars } facets = obs_resp.get('facets', {}) - facet_date_ranges = collections.defaultdict(dict) - provenance_endpoints, measurement_methods, units = set(), set(), set() + facet_date_ranges: dict[str, dict[str, str]] = collections.defaultdict(dict) + provenance_endpoints: set[str] = set() + measurement_methods: set[str] = set() + units: set[str] = set() for sv in stat_vars: for fid in sv_active_facets[sv]: @@ -327,8 +340,8 @@ async def get_metadata(): return jsonify({'error': 'Failed to resolve secondary node data'}), 502 # Process secondary lookups - prov_map = {} - linked_prov_dcids = set() + prov_map: dict[str, dict[str, Any]] = {} + linked_prov_dcids: set[str] = set() if 'data' in prov_res: for dcid, node_data in prov_res['data'].items(): @@ -345,7 +358,7 @@ async def get_metadata(): if 'dcid' in n: linked_prov_dcids.add(n['dcid']) - linked_names_map = {} + linked_names_map: dict[str, str] = {} if linked_prov_dcids: try: linked_names_resp = await asyncio.to_thread(dc.v2node, @@ -358,12 +371,12 @@ async def get_metadata(): except Exception: logging.exception("Failed to resolve linked provenance names") - mm_map = { + mm_map: dict[str, str] = { mm: _get_arc_nodes(mm_res, mm, 'description')[0].get('value') for mm in measurement_methods if _get_arc_nodes(mm_res, mm, 'description') } - unit_map = { + unit_map: dict[str, str] = { u: _get_arc_nodes(unit_res, u, 'name')[0].get('value') for u in units if _get_arc_nodes(unit_res, u, 'name') From 73e04fc0f5c8622a0ca194920c9d45a909814d55 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 17:05:17 -0700 Subject: [PATCH 09/14] Update the scatter tile so that all plotted entities are collated into entities, rather than just the parent --- static/js/components/tiles/scatter_tile.tsx | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/static/js/components/tiles/scatter_tile.tsx b/static/js/components/tiles/scatter_tile.tsx index 29e2ca93a9..b9c5dfc1d0 100644 --- a/static/js/components/tiles/scatter_tile.tsx +++ b/static/js/components/tiles/scatter_tile.tsx @@ -155,6 +155,19 @@ export function ScatterTile(props: ScatterTilePropType): ReactElement { >(null); const [isLoading, setIsLoading] = useState(true); const { shouldLoad, containerRef } = useLazyLoad(props.lazyLoadMargin); + + const entities = useMemo( + () => + scatterChartData + ? Array.from( + new Set( + Object.values(scatterChartData.points).map((p) => p.place.dcid) + ) + ) + : [], + [scatterChartData] + ); + /* TODO: (nick-next) destructure the props similarly to highlight to allow a complete dependency array. @@ -250,7 +263,7 @@ export function ScatterTile(props: ScatterTilePropType): ReactElement { getObservationSpecs={getObservationSpecs} errorMsg={scatterChartData && scatterChartData.errorMsg} id={props.id} - entities={[props.place.dcid]} + entities={entities} isInitialLoading={_.isNull(scatterChartData)} isLoading={isLoading} replacementStrings={getReplacementStrings(props, scatterChartData)} From 6663c89e90416926328185dc3271599942bc75d3 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Thu, 12 Mar 2026 17:15:18 -0700 Subject: [PATCH 10/14] Move the max depth stopgap to a constant and increased it (10 seemed a little too small). --- server/routes/shared_api/metadata.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index 97fabbfd74..fbefc5ec72 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -26,6 +26,8 @@ bp = Blueprint("metadata", __name__, url_prefix='/api/shared/metadata') +MAX_CATEGORY_DEPTH = 50 + MEASUREMENT_METHODS_SUPPRESSION_PROVENANCES: set[str] = {"WikipediaStatsData"} @@ -75,7 +77,7 @@ async def fetch_categories_async(stat_vars: list[str]) -> dict[str, list[str]]: visited = set() depth = 0 - while current_nodes and depth < 10: + while current_nodes and depth < MAX_CATEGORY_DEPTH: visited.update(current_nodes) member_task = asyncio.to_thread(dc.v2node, list(current_nodes), From aa405cff91098943be445e3b453117a96929ee13 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Fri, 13 Mar 2026 13:15:05 -0700 Subject: [PATCH 11/14] Add safety check on license type get. --- server/routes/shared_api/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index fbefc5ec72..181edba457 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -197,8 +197,8 @@ def _build_metadata_payload( resolved_unit = (unit_map.get(unit) or unit.replace('_', ' ')) if unit else unit license_name = _get_node_name(pdata['licenseType'], linked_names_map) - license_dcid = pdata['licenseType'][0].get('dcid') if pdata.get( - 'licenseType') else None + license_dcid = pdata['licenseType'][0].get( + 'dcid') if pdata.get('licenseType') and pdata['licenseType'] else None metadata_map[sv].append({ 'statVarId': From 6940c22308ddf73f82b824e8d271449a832d36e2 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Fri, 13 Mar 2026 13:18:02 -0700 Subject: [PATCH 12/14] Remove unused deconstructed variable. --- server/routes/shared_api/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index 181edba457..242d467065 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -346,7 +346,7 @@ async def get_metadata() -> tuple[Response, int] | Response: linked_prov_dcids: set[str] = set() if 'data' in prov_res: - for dcid, node_data in prov_res['data'].items(): + for dcid in prov_res['data'].items(): prov_map[dcid] = { 'source': _get_arc_nodes(prov_res, dcid, 'source'), 'isPartOf': _get_arc_nodes(prov_res, dcid, 'isPartOf'), From 963800e1c951e93cfa61c9ddc7cbd4adca365d66 Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Sun, 15 Mar 2026 20:58:42 -0700 Subject: [PATCH 13/14] Remove stray items that was causing issues. --- server/routes/shared_api/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index 242d467065..6ee9c6d660 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -346,7 +346,7 @@ async def get_metadata() -> tuple[Response, int] | Response: linked_prov_dcids: set[str] = set() if 'data' in prov_res: - for dcid in prov_res['data'].items(): + for dcid in prov_res['data']: prov_map[dcid] = { 'source': _get_arc_nodes(prov_res, dcid, 'source'), 'isPartOf': _get_arc_nodes(prov_res, dcid, 'isPartOf'), From 434746077da39cdbed9f083dbaa40a24e0e2889c Mon Sep 17 00:00:00 2001 From: Nick Blumberg Date: Mon, 16 Mar 2026 13:18:42 -0700 Subject: [PATCH 14/14] Remove the "shared" from the route, refactor the traverse to top function out from a closure to a separate top-level function, and more thoroughly document the more complex areas of the metadata endpoint. --- server/routes/shared_api/metadata.py | 76 ++++++++++++++----- .../tools/shared/metadata/metadata_fetcher.ts | 4 +- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/server/routes/shared_api/metadata.py b/server/routes/shared_api/metadata.py index 6ee9c6d660..71444a62db 100644 --- a/server/routes/shared_api/metadata.py +++ b/server/routes/shared_api/metadata.py @@ -24,10 +24,14 @@ from server.services import datacommons as dc -bp = Blueprint("metadata", __name__, url_prefix='/api/shared/metadata') +bp = Blueprint("metadata", __name__, url_prefix='/api/metadata') +# Limits the recursion when traversing parent hierarchies (memberOf/specializationOf) +# to prevent infinite loops or excessive API calls in deep graphs. MAX_CATEGORY_DEPTH = 50 +# A list of specific provenance DCIDs where the 'measurementMethod' attribute +# should be hidden, because it is flawed or not meaningful. MEASUREMENT_METHODS_SUPPRESSION_PROVENANCES: set[str] = {"WikipediaStatsData"} @@ -70,13 +74,57 @@ def _extract_active_facets( return list(set(active_facets)) +def _traverse_to_top_category(node: str, parent_map: dict[str, list[str]], + visited: set[str], top_nodes: set[str], + original_sv: str) -> None: + """Recursively traces paths from a node to its top-level ancestors.""" + if node in visited: + return + visited.add(node) + + parents = parent_map.get(node, []) + valid_parents = [p for p in parents if p != 'dc/g/Root'] + + if not valid_parents: + # If the node is not the starting SV, it's a top-level category + # This is for the case where a stat var does not have a category, where + # it should not itself be considered a category. + if node != original_sv: + top_nodes.add(node) + else: + for p in valid_parents: + _traverse_to_top_category(p, parent_map, visited, top_nodes, original_sv) + + async def fetch_categories_async(stat_vars: list[str]) -> dict[str, list[str]]: - """Traverses the category hierarchy tree up to top-level topics.""" + """Traverses the category hierarchy tree up to top-level topics. + + This function identifies the categories (top-level topics) associated with a list + of Statistical Variables. It returns a mapping where each key is a stat_var + DCID and the value is a list of human-readable names of its top-level parents. + + The implementation uses a two-stage traversal: + 1. Breadth-First Search (BFS): Iteratively climbs the 'memberOf' and + 'specializationOf' arcs across all input variables simultaneously to + map the parent hierarchy. + 2. Depth-First Search (DFS): Performed locally on the resulting parent_map + to trace individual paths from each stat_var to its root-level ancestors + (excluding the generic 'dc/g/Root'). + + Args: + stat_vars: A list of Statistical Variable DCIDs. + + Returns: + A dictionary mapping stat_var DCIDs to a list of display names for their + top-level categories. + """ parent_map = collections.defaultdict(list) current_nodes = set(stat_vars) visited = set() depth = 0 + # Progressively fetch parent nodes level-by-level (BFS). + # This batches v2node calls by depth to minimize network round-trips. while current_nodes and depth < MAX_CATEGORY_DEPTH: visited.update(current_nodes) @@ -114,27 +162,19 @@ async def fetch_categories_async(stat_vars: list[str]) -> dict[str, list[str]]: sv_top_levels = collections.defaultdict(list) all_top_level_dcids = set() + # Traverse individual paths from each stat_var to its top-level categories. + # Using the parent_map built above, we resolve which topic-level topics + # each variable eventually rolls up to. for sv in stat_vars: tops = set() - def traverse(n: str, curr_visited: set[str]) -> None: - if n in curr_visited: - return - curr_visited.add(n) - parents = parent_map.get(n, []) - valid_parents = [p for p in parents if p != 'dc/g/Root'] - - if not valid_parents: - if n != sv: - tops.add(n) - else: - for p in valid_parents: - traverse(p, curr_visited) - - traverse(sv, set()) + _traverse_to_top_category(sv, parent_map, set(), tops, sv) sv_top_levels[sv] = list(tops) all_top_level_dcids.update(tops) + # Resolve human-readable names for the top-level categories. + # If a name isn't found in the Knowledge Graph, we fall back to a + # simplified version of the DCID. category_map: dict[str, list[str]] = {} if all_top_level_dcids: parent_name_resp = await asyncio.to_thread(dc.v2node, @@ -148,6 +188,8 @@ def traverse(n: str, curr_visited: set[str]) -> None: for sv in stat_vars: category_map[sv] = [ + # Use the official name if available; otherwise, extract the last + # chunk of the DCIC (if it contains multiple parts delimited by slashes) parent_name_map.get(p) or p.split('/')[-1] for p in sv_top_levels.get(sv, []) ] diff --git a/static/js/tools/shared/metadata/metadata_fetcher.ts b/static/js/tools/shared/metadata/metadata_fetcher.ts index a705cf7ca0..e250a05fc0 100644 --- a/static/js/tools/shared/metadata/metadata_fetcher.ts +++ b/static/js/tools/shared/metadata/metadata_fetcher.ts @@ -533,8 +533,6 @@ export async function fetchMetadata( /** * Function to fetch comprehensive metadata for a list of entities and stat vars. - * This version utilizes a consolidated backend API endpoint that contains no - * V1 calls. * * @param entities - Array of entity DCIDs to fetch metadata for * @param statVarSet - Set of stat var DCIDs to fetch metadata for @@ -567,7 +565,7 @@ export async function fetchMetadataV2( } } - const response = await fetch(`${apiRoot || ""}/api/shared/metadata`, { + const response = await fetch(`${apiRoot || ""}/api/metadata`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({