Skip to content

Commit 933c189

Browse files
committed
🌱 Generate fake descriptors file for testing
1 parent cee9287 commit 933c189

7 files changed

Lines changed: 186 additions & 0 deletions

File tree

d3b_api_client_cli/cli/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@
77
import click
88
from d3b_api_client_cli.cli.dewrangle import *
99
from d3b_api_client_cli.cli.postgres import *
10+
from d3b_api_client_cli.cli.faker import *
11+
12+
13+
@click.group()
14+
def faker():
15+
"""
16+
Group of lower level CLI commands related to generating fake data
17+
"""
1018

1119

1220
@click.group()
@@ -35,6 +43,9 @@ def main():
3543
"""
3644

3745

46+
# Fake data commands
47+
faker.add_command(generate_global_id_file)
48+
3849
# Postgres API commands
3950
postgres.add_command(save_file_to_db)
4051

@@ -61,3 +72,4 @@ def main():
6172
# Add command groups to the root CLI
6273
main.add_command(dewrangle)
6374
main.add_command(postgres)
75+
main.add_command(faker)

d3b_api_client_cli/cli/dewrangle/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@
1010
from d3b_api_client_cli.cli.dewrangle.volume_commands import *
1111
from d3b_api_client_cli.cli.dewrangle.job_commands import *
1212
from d3b_api_client_cli.cli.dewrangle.billing_group_commands import *
13+
from d3b_api_client_cli.cli.dewrangle.global_id_commands import *
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""
2+
Package containing commands for fake data generation
3+
"""
4+
5+
from d3b_api_client_cli.cli.faker.global_id_commands import *
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""
2+
Commands to generate fake global ID descriptors
3+
"""
4+
import os
5+
import logging
6+
import click
7+
8+
from d3b_api_client_cli.config import (
9+
log, FHIR_RESOURCE_TYPES, FhirResourceType
10+
)
11+
from d3b_api_client_cli.faker.global_id import (
12+
generate_global_id_file as _generate_global_id_file
13+
)
14+
15+
logger = logging.getLogger(__name__)
16+
17+
DEFAULT_FHIR_RESOURCE_TYPE: FhirResourceType = FHIR_RESOURCE_TYPES["DocumentReference"]
18+
19+
20+
@click.command()
21+
@click.option(
22+
"--output-dir",
23+
type=click.Path(exists=True, file_okay=False, dir_okay=True),
24+
help="Where the output file will be written"
25+
)
26+
@click.option(
27+
"--fhir-resource-type",
28+
default=DEFAULT_FHIR_RESOURCE_TYPE.resource_type,
29+
type=click.Choice(rt for rt in FHIR_RESOURCE_TYPES.keys()),
30+
help="What the fhirResourceType column will be populated with"
31+
)
32+
@click.option(
33+
"--with-global-ids",
34+
default=True,
35+
is_flag=True,
36+
help="Whether or not to generate a globalId column"
37+
)
38+
@click.option(
39+
"--total-rows",
40+
type=int,
41+
default=10,
42+
help="Total number of rows to generate"
43+
)
44+
def generate_global_id_file(
45+
total_rows, with_global_ids, fhir_resource_type, output_dir
46+
):
47+
"""
48+
Send request to upsert global ID descriptors in Dewrangle and
49+
download the resulting global ID descriptors.
50+
51+
In order to create new global IDs provide a CSV file with the columns:
52+
descriptor, fhirResourceType
53+
54+
In order to update existing global IDs provide a CSV file with the columns:
55+
descriptor, fhirResourceType, globalId
56+
"""
57+
58+
log.init_logger()
59+
60+
return _generate_global_id_file(
61+
fhir_resource_type, total_rows=total_rows,
62+
with_global_ids=with_global_ids, output_dir=output_dir
63+
)

d3b_api_client_cli/config/__init__.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
"""
44

55
import os
6+
from dataclasses import dataclass
67

78
from dotenv import find_dotenv, load_dotenv
89

910
# File paths and directories
1011
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname((__file__))))
1112
ROOT_DATA_DIR = os.path.join(ROOT_DIR, "data")
13+
ROOT_FAKE_DATA_DIR = os.path.join(ROOT_DATA_DIR, "fake_data")
1214
LOG_DIR = os.path.join(ROOT_DATA_DIR, "logs")
1315

1416
DOTENV_PATH = find_dotenv()
@@ -27,6 +29,23 @@
2729
DB_USER_PW = os.environ.get("DB_USER_PW")
2830

2931

32+
@dataclass
33+
class FhirResourceType:
34+
"""
35+
Wrapper class to define a FHIR resource type along with a global ID
36+
prefix
37+
"""
38+
resource_type: str
39+
id_prefix: str
40+
41+
42+
FHIR_RESOURCE_TYPES: dict = {
43+
resource_type: FhirResourceType(resource_type, prefix)
44+
for resource_type, prefix in
45+
[("DocumentReference", "dr")]
46+
}
47+
48+
3049
class SECRETS:
3150
"""
3251
Used in logger initialization to obfuscate sensitive env variables
@@ -69,6 +88,12 @@ def check_dewrangle_http_config():
6988
"credential_type": "AWS",
7089
"billing_group_id": os.environ.get("CAVATICA_BILLING_GROUP_ID"),
7190
},
91+
"faker": {
92+
"global_id": {
93+
"fhir_resource_types": FHIR_RESOURCE_TYPES
94+
}
95+
96+
},
7297
"aws": {
7398
"region": os.environ.get("AWS_DEFAULT_REGION") or "us-east-1",
7499
"s3": {
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
Package dedicated to generating fake data needed for development and testing
3+
"""
4+
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""
2+
Generate files of global ID descriptors for testing and development
3+
"""
4+
5+
import os
6+
from typing import Optional
7+
from pprint import pformat
8+
import logging
9+
10+
import pandas
11+
12+
from d3b_api_client_cli.config import (
13+
config, FhirResourceType, ROOT_FAKE_DATA_DIR
14+
)
15+
16+
FHIR_RESOURCE_TYPES: dict = config["faker"]["global_id"]["fhir_resource_types"]
17+
DEFAULT_FHIR_RESOURCE_TYPE: str = "DocumentReference"
18+
19+
logger = logging.getLogger(__name__)
20+
21+
22+
def generate_global_id_file(
23+
fhir_resource_type: Optional[str] = DEFAULT_FHIR_RESOURCE_TYPE,
24+
with_global_ids: Optional[bool] = True,
25+
total_rows: Optional[int] = 10,
26+
output_dir: Optional[str] = None
27+
) -> str:
28+
"""
29+
Generate a csv file with global IDs and descriptors
30+
31+
Options:
32+
- fhir_resource_type: the FHIR resource type and global ID prefix
33+
to populate the file with
34+
35+
- with_global_ids: Whether or not to include a column for global IDs
36+
if global IDs are not included and this file is used in
37+
upsert_global_descriptors, then new global IDs will be created by
38+
Dewrangle
39+
40+
- total_rows: Number of rows to generate
41+
42+
Returns:
43+
Path to file
44+
"""
45+
logger.info(
46+
"🏭 Generating %s rows for fake global ID descriptors file",
47+
total_rows
48+
)
49+
if not output_dir:
50+
output_dir = ROOT_FAKE_DATA_DIR
51+
os.makedirs(output_dir, exist_ok=True)
52+
53+
fhir_resource_type = FHIR_RESOURCE_TYPES.get(fhir_resource_type)
54+
55+
data = []
56+
for i in range(total_rows):
57+
row = {
58+
"fhirResourceType": fhir_resource_type.resource_type,
59+
"descriptor": f"{fhir_resource_type.resource_type}-{i}"
60+
}
61+
if with_global_ids:
62+
row["globalId"] = f"{fhir_resource_type.id_prefix}-{i}000"
63+
data.append(row)
64+
65+
logger.info("Wrote %s to file", pformat(row))
66+
67+
df = pandas.DataFrame(data)
68+
69+
filepath = os.path.join(output_dir, "fake_global_descriptors.csv")
70+
df.to_csv(filepath, index=False)
71+
72+
logger.info(
73+
"✅ Completed writing global ID descriptors to %s", filepath
74+
)
75+
76+
return filepath

0 commit comments

Comments
 (0)