|
| 1 | +""" |
| 2 | +Generate files of global ID descriptors for testing and development |
| 3 | +""" |
| 4 | + |
| 5 | +import os |
| 6 | +from typing import Optional |
| 7 | +from pprint import pformat |
| 8 | +import logging |
| 9 | + |
| 10 | +import pandas |
| 11 | + |
| 12 | +from d3b_api_client_cli.config import ( |
| 13 | + config, FhirResourceType, ROOT_FAKE_DATA_DIR |
| 14 | +) |
| 15 | + |
| 16 | +FHIR_RESOURCE_TYPES: dict = config["faker"]["global_id"]["fhir_resource_types"] |
| 17 | +DEFAULT_FHIR_RESOURCE_TYPE: str = "DocumentReference" |
| 18 | + |
| 19 | +logger = logging.getLogger(__name__) |
| 20 | + |
| 21 | + |
| 22 | +def generate_global_id_file( |
| 23 | + fhir_resource_type: Optional[str] = DEFAULT_FHIR_RESOURCE_TYPE, |
| 24 | + with_global_ids: Optional[bool] = True, |
| 25 | + total_rows: Optional[int] = 10, |
| 26 | + output_dir: Optional[str] = None |
| 27 | +) -> str: |
| 28 | + """ |
| 29 | + Generate a csv file with global IDs and descriptors |
| 30 | +
|
| 31 | + Options: |
| 32 | + - fhir_resource_type: the FHIR resource type and global ID prefix |
| 33 | + to populate the file with |
| 34 | +
|
| 35 | + - with_global_ids: Whether or not to include a column for global IDs |
| 36 | + if global IDs are not included and this file is used in |
| 37 | + upsert_global_descriptors, then new global IDs will be created by |
| 38 | + Dewrangle |
| 39 | +
|
| 40 | + - total_rows: Number of rows to generate |
| 41 | +
|
| 42 | + Returns: |
| 43 | + Path to file |
| 44 | + """ |
| 45 | + logger.info( |
| 46 | + "🏭 Generating %s rows for fake global ID descriptors file", |
| 47 | + total_rows |
| 48 | + ) |
| 49 | + if not output_dir: |
| 50 | + output_dir = ROOT_FAKE_DATA_DIR |
| 51 | + os.makedirs(output_dir, exist_ok=True) |
| 52 | + |
| 53 | + fhir_resource_type = FHIR_RESOURCE_TYPES.get(fhir_resource_type) |
| 54 | + |
| 55 | + data = [] |
| 56 | + for i in range(total_rows): |
| 57 | + row = { |
| 58 | + "fhirResourceType": fhir_resource_type.resource_type, |
| 59 | + "descriptor": f"{fhir_resource_type.resource_type}-{i}" |
| 60 | + } |
| 61 | + if with_global_ids: |
| 62 | + row["globalId"] = f"{fhir_resource_type.id_prefix}-{i}000" |
| 63 | + data.append(row) |
| 64 | + |
| 65 | + logger.info("Wrote %s to file", pformat(row)) |
| 66 | + |
| 67 | + df = pandas.DataFrame(data) |
| 68 | + |
| 69 | + filepath = os.path.join(output_dir, "fake_global_descriptors.csv") |
| 70 | + df.to_csv(filepath, index=False) |
| 71 | + |
| 72 | + logger.info( |
| 73 | + "✅ Completed writing global ID descriptors to %s", filepath |
| 74 | + ) |
| 75 | + |
| 76 | + return filepath |
0 commit comments