Skip to content

Commit e39e671

Browse files
committed
Update categorization of geo_types
1 parent 41a1f52 commit e39e671

File tree

3 files changed

+30
-15
lines changed

3 files changed

+30
-15
lines changed

src/acquisition/rvdss/run.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import argparse
99
from datetime import datetime
1010

11-
from delphi.epidata.acquisition.rvdss.utils import fetch_current_dashboard_data, check_most_recent_update_date,get_dashboard_update_date, combine_tables, duplicate_provincial_detections,expand_detections_columns
11+
from delphi.epidata.acquisition.rvdss.utils import create_geo_types, abbreviate_geo, fetch_current_dashboard_data, check_most_recent_update_date,get_dashboard_update_date, combine_tables, duplicate_provincial_detections,expand_detections_columns
1212
from delphi.epidata.acquisition.rvdss.constants import DASHBOARD_BASE_URL, RESP_DETECTIONS_OUTPUT_FILE, POSITIVE_TESTS_OUTPUT_FILE,UPDATE_DATES_FILE
1313
from delphi.epidata.acquisition.rvdss.pull_historic import fetch_report_data,fetch_historical_dashboard_data
1414
from delphi.epidata.acquisition.rvdss.database import update
@@ -88,6 +88,8 @@ def patch_seasons(season_start_years,logger):
8888

8989
if start >=2024:
9090
data = pd.read_csv(f"{start}_{end}_respiratory_detections.csv")
91+
data['geo_value'] = [abbreviate_geo(g) for g in data['geo_value']]
92+
data["geo_type"] = [create_geo_types(g,"lab") for g in data["geo_value"]]
9193

9294
# current dashboard only needs one table
9395
new_data = expand_detections_columns(data)
@@ -98,7 +100,10 @@ def patch_seasons(season_start_years,logger):
98100
update(new_data,logger)
99101
else:
100102
resp_data = pd.read_csv(f"{start}_{end}_respiratory_detections.csv")
103+
resp_data['geo_value'] = [abbreviate_geo(g) for g in resp_data['geo_value']]
104+
101105
pos_data = pd.read_csv(f"{start}_{end}_positive_tests.csv")
106+
102107
data_dict={"positive":pos_data,"respiratory_detection":resp_data}
103108

104109
# Combine all rables into a single table

src/acquisition/rvdss/utils.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def create_geo_types(geo,default_geo):
5151
lowercase_geo = geo.lower()
5252
if lowercase_geo in NATION:
5353
geo_type="nation"
54+
elif geo in PROVINCES:
55+
geo_type="province"
5456
elif geo in REGIONS:
5557
geo_type="region"
5658
else:
@@ -181,7 +183,7 @@ def get_positive_data(base_url,headers,update_date):
181183
df['province'] = [abbreviate_geo(g) for g in df['province']]
182184
df=df.rename(columns={'province':"geo_value",'date':'time_value',"detections":"positivetests"})
183185
df['time_value'] = [check_date_format(d) for d in df['time_value']]
184-
df['geo_type'] = [create_geo_types(g,"province") for g in df['geo_value']]
186+
df['geo_type'] = [create_geo_types(g,"lab") for g in df['geo_value']]
185187
df.insert(1,"issue",update_date)
186188
df['region'] = [abbreviate_geo(g) for g in df['region']]
187189

@@ -304,12 +306,19 @@ def expand_detections_columns(new_data):
304306
return(new_data.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']))
305307

306308
def duplicate_provincial_detections(data):
309+
'''
310+
The data has geo_type = lab for all labs, geo_type = province for all province and territories
311+
and geo_type region for prairies, territories and atlantic, but ontario, quebec and bc are
312+
both regions and provinces, so duplicate the data for those three geos with the geo_type = region
313+
for completeness
314+
'''
307315
dat = data.copy(deep=True)
308316
dat = dat.reset_index()
309317

310318
# provincial data
311-
dat.loc[dat['geo_value'].isin(PROVINCES),'geo_type'] = "province"
312-
provincial_detections = dat.loc[dat['geo_value'].isin(PROVINCES)]
319+
provincial_regions = ['bc','on','qc']
320+
dat.loc[dat['geo_value'].isin(provincial_regions),'geo_type'] = "region"
321+
provincial_detections = dat.loc[dat['geo_value'].isin(provincial_regions)]
313322

314323
if not provincial_detections.empty:
315324
#provincial_detections['geo_type']="province"
@@ -329,14 +338,14 @@ def combine_tables(data_dict):
329338
positive["epiweek"] = pd.to_numeric(positive["epiweek"],downcast="integer")
330339
positive["time_value"] = pd.to_datetime(positive["time_value"])
331340
positive["issue"] = pd.to_datetime(positive["issue"])
341+
positive['geo_type'] = [create_geo_types(g,'lab') for g in positive['geo_value']]
332342

333343
detections["epiweek"] = pd.to_numeric(detections["epiweek"],downcast="integer")
334344
detections["time_value"] = pd.to_datetime(detections["time_value"])
335345
detections["issue"] = pd.to_datetime(detections["issue"])
346+
detections['geo_type'] = [create_geo_types(g,'lab') for g in detections['geo_value']]
336347

337348
detections = expand_detections_columns(detections)
338-
positive = positive.drop(['geo_type'], axis=1)
339-
positive['geo_type'] = [create_geo_types(g,'lab') for g in positive['geo_value']]
340349
positive=positive.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
341350

342351
positive = positive.fillna(np.nan)

tests/acquisition/rvdss/test_utils.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,9 @@
281281
'issue': list(np.repeat(np.array([pd.to_datetime("2012-09-10"),
282282
pd.to_datetime("2012-09-11")]),
283283
[13, 3], axis=0)),
284-
'geo_type':["lab","lab","lab","lab","lab","lab","lab","lab","lab","lab","lab",
285-
"lab","lab","lab","lab","lab"],
284+
'geo_type':["province","province","province","province","province","province","province",
285+
"province","province","province","province","province","province","nation",
286+
"lab","region"],
286287
'geo_value':['nl','pe','ns','nb','qc','on','mb','sk','ab','bc','yt','nt','nu','ca','phol-toronto',
287288
'atlantic'],
288289
'adv_positive_tests': [1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9],
@@ -306,12 +307,11 @@
306307
pd.to_datetime("2012-09-11"),
307308
pd.to_datetime("2012-09-10")]),
308309
[13, 3,13], axis=0)),
309-
'geo_type':["lab","lab","lab","lab","lab","lab","lab","lab","lab","lab","lab",
310-
"lab","lab","lab","lab","lab","province","province","province","province",
311-
"province","province","province","province","province","province","province",
312-
"province","province"],
310+
'geo_type':["province","province","province","province","province","province","province",
311+
"province","province","province","province","province","province","nation",
312+
"lab","region",'region','region','region'],
313313
'geo_value':['nl','pe','ns','nb','qc','on','mb','sk','ab','bc','yt','nt','nu','ca','phol-toronto',
314-
'atlantic','nl','pe','ns','nb','qc','on','mb','sk','ab','bc','yt','nt','nu'],
314+
'atlantic','qc','on','bc'],
315315
'adv_positive_tests': [1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,1,1,1,1,1,1,1,1,1,1,1,1,1],
316316
}).set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']),
317317
pd.DataFrame({'epiweek': [1,2,3],
@@ -338,14 +338,15 @@ def test_abbreviate_geo(self):
338338
assert abbreviate_geo("british columbia") == "bc"
339339
assert abbreviate_geo("québec") == "qc" # recognise accents in provinces
340340
assert abbreviate_geo("Région Nord-Est") == "région nord est" # remove dashes, make lowercase
341-
assert abbreviate_geo("P.H.O.L. - Sault Ste. Marie") == "phol sault ste marie"
341+
assert abbreviate_geo("P.H.O.L. - Sault Ste. Marie") == "sault ste marie phl"
342342
assert abbreviate_geo("random lab") == "random lab" #unknown geos remain unchanged
343343
# only province names on their own should be abbreviated, not as part of a larger name
344344
assert abbreviate_geo("british columbia lab") == "british columbia lab"
345345

346346
def test_create_geo_types(self):
347347
assert create_geo_types("canada","lab") == "nation"
348-
assert create_geo_types("bc","lab") == "region"
348+
assert create_geo_types("bc","lab") == "province"
349+
assert create_geo_types("prairies","lab") == "region"
349350
assert create_geo_types("random lab","lab") == "lab"
350351
assert create_geo_types("Canada","province") == "nation"
351352

0 commit comments

Comments
 (0)