|
| 1 | +""" |
| 2 | +Defines command line interface for the rvdss indicator. Current data (covering the most recent epiweek) and historical data (covering all data before the most recent epiweek) can be generated together or separately. |
| 3 | +
|
| 4 | +Defines top-level functions to fetch data and save to disk or DB. |
| 5 | +""" |
| 6 | + |
1 | 7 | import pandas as pd |
| 8 | +import os |
| 9 | + |
| 10 | +from delphi.epidata.acquisition.rvdss.utils import get_weekly_data, get_revised_data, get_dashboard_update_date |
| 11 | +from delphi.epidata.acquisition.rvdss.constants import DASHBOARD_BASE_URL, RESP_DETECTIONS_OUTPUT_FILE, POSITIVE_TESTS_OUTPUT_FILE, COUNTS_OUTPUT_FILE |
| 12 | + |
| 13 | + |
| 14 | +def update_current_data(): |
| 15 | + ## TODO: what is the base path for these files? |
| 16 | + base_path = "." |
2 | 17 |
|
| 18 | + data_dict = fetch_dashboard_data(DASHBOARD_BASE_URL, 2024) |
3 | 19 |
|
4 | | -def fetch_archived_dashboard_urls(): |
5 | | - ## TODO: paste in Christine's code for scraping this list https://health-infobase.canada.ca/respiratory-virus-detections/archive.html |
| 20 | + table_types = { |
| 21 | + "respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE, |
| 22 | + "positive": POSITIVE_TESTS_OUTPUT_FILE, |
| 23 | + # "count": COUNTS_OUTPUT_FILE, # Dashboards don't contain this data. |
| 24 | + } |
| 25 | + for tt in table_types.keys(): |
| 26 | + data = data_dict[table_types] |
6 | 27 |
|
7 | | -def fetch_dashboard_data(url = None): |
8 | | - """Get data from current or archived dashboard""" |
9 | | - pass |
| 28 | + # Write the tables to separate csvs |
| 29 | + path = base_path + "/" + table_types[tt] |
10 | 30 |
|
| 31 | + # Since this function generates new data weekly, we need to combine it with the existing data, if it exists. |
| 32 | + if not os.path.exists(path): |
| 33 | + data.to_csv(path,index=True) |
| 34 | + else: |
| 35 | + old_data = pd.read_csv(path).set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']) |
11 | 36 |
|
12 | | -def fetch_current_dashboard_data(): |
13 | | - return fetch_dashboard_data(DEFAULT_DASHBOARD_URL) |
| 37 | + # If index already exists in the data on disk, don't add the new data -- we may have already run the weekly data fetch. |
| 38 | + ## TODO: The check on index maybe should be stricter? Although we do deduplication upstream, so this probably won't find true duplicates |
| 39 | + if not data.index.isin(old_data.index).any(): |
| 40 | + old_data= pd.concat([old_data,data],axis=0) |
| 41 | + old_data.to_csv(path,index=True) |
| 42 | + |
| 43 | + # ## TODO |
| 44 | + # update_database(data) |
14 | 45 |
|
15 | | -def update_current_data(start_date, end_date): |
16 | | - data = fetch_current_dashboard_data() |
17 | | - update_database(data) |
18 | 46 |
|
19 | 47 | def update_historical_data(): |
| 48 | + ## TODO: what is the base path for these files? |
| 49 | + base_path = "." |
| 50 | + |
20 | 51 | report_dict_list = fetch_report_data() |
21 | 52 | dashboard_dict_list = fetch_historical_dashboard_data() |
22 | 53 |
|
23 | | - table_types = ( |
24 | | - "respiratory_detection", |
25 | | - "positive", |
26 | | - "count", |
27 | | - ) |
28 | | - for tt in table_types: |
29 | | - ## TODO: need to merge tables together from dashboards and reports. Expect 3 tables out. |
30 | | - pass |
31 | | - # ?? |
| 54 | + table_types = { |
| 55 | + "respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE, |
| 56 | + "positive": POSITIVE_TESTS_OUTPUT_FILE, |
| 57 | + "count": COUNTS_OUTPUT_FILE, |
| 58 | + } |
| 59 | + for tt in table_types.keys(): |
| 60 | + # Merge tables together from dashboards and reports for each table type. |
| 61 | + dashboard_data = [elem.get(tt, None) for elem in dashboard_dict_list] |
| 62 | + report_data = [elem.get(tt, None) for elem in report_dict_list] |
32 | 63 | data = [report_data, dashboard_data].concat() |
33 | 64 |
|
34 | | - # Write the three tables to separate csvs |
35 | | - all_respiratory_detection_tables.to_csv(path+"/" + RESP_COUNTS_OUTPUT_FILE, index=True) |
36 | | - all_positive_tables.to_csv(path+"/" + POSITIVE_TESTS_OUTPUT_FILE, index=True) |
37 | | - |
38 | | - # Write the number of detections table to csv if it exists (i.e has rows) |
39 | | - if len(all_number_tables) != 0: |
40 | | - all_number_tables.to_csv(path+"/number_of_detections.csv", index=True) |
| 65 | + # Write the tables to separate csvs |
| 66 | + data.to_csv(base_path +"/" + table_types[tt], index=True) |
41 | 67 |
|
42 | | - update_database(data) |
| 68 | + # ## TODO |
| 69 | + # update_database(data) |
43 | 70 |
|
44 | 71 |
|
45 | 72 | def main(): |
|
0 commit comments