From 2fe689bd007e5aff78a29b80911dc4fef0f9e994 Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Wed, 14 Dec 2022 17:55:55 +0100 Subject: [PATCH] black --line-length 120 . --exclude renv* --- batch/inference_job.py | 106 +-- batch/inference_job_status.py | 20 +- batch/launch_job.py | 19 +- gempyor_pkg/docs/integration_benchmark.ipynb | 52 +- gempyor_pkg/docs/integration_doc.ipynb | 36 +- .../src/gempyor/NPI/MultiTimeReduce.py | 55 +- gempyor_pkg/src/gempyor/NPI/Reduce.py | 53 +- .../src/gempyor/NPI/ReduceIntervention.py | 62 +- gempyor_pkg/src/gempyor/NPI/ReduceR0.py | 10 +- gempyor_pkg/src/gempyor/NPI/Stacked.py | 20 +- gempyor_pkg/src/gempyor/NPI/helpers.py | 4 +- gempyor_pkg/src/gempyor/compartments.py | 245 ++--- gempyor_pkg/src/gempyor/dev/dev_seir.py | 11 +- gempyor_pkg/src/gempyor/dev/steps.py | 736 ++++---------- gempyor_pkg/src/gempyor/file_paths.py | 14 +- gempyor_pkg/src/gempyor/interface.py | 98 +- gempyor_pkg/src/gempyor/outcomes.py | 218 ++--- gempyor_pkg/src/gempyor/parameters.py | 96 +- gempyor_pkg/src/gempyor/results.py | 24 +- gempyor_pkg/src/gempyor/seeding_ic.py | 83 +- gempyor_pkg/src/gempyor/seir.py | 48 +- gempyor_pkg/src/gempyor/setup.py | 79 +- gempyor_pkg/src/gempyor/simulate_outcome.py | 4 +- gempyor_pkg/src/gempyor/simulate_seir.py | 7 +- gempyor_pkg/src/gempyor/steps_rk4.py | 167 ++-- gempyor_pkg/src/gempyor/steps_source.py | 97 +- gempyor_pkg/src/gempyor/utils.py | 12 +- gempyor_pkg/tests/npi/test_npis.py | 48 +- .../tests/outcomes/make_seir_test_file.py | 8 +- gempyor_pkg/tests/outcomes/test_outcomes.py | 896 +++++------------- gempyor_pkg/tests/seir/dev_new_test.py | 21 +- gempyor_pkg/tests/seir/interface.ipynb | 16 +- gempyor_pkg/tests/seir/test_compartments.py | 4 +- gempyor_pkg/tests/seir/test_new_seir.py | 8 +- gempyor_pkg/tests/seir/test_parameters.py | 99 +- gempyor_pkg/tests/seir/test_seir.py | 146 +-- scripts/clean_s3.py | 4 +- scripts/copy_for_continuation.py | 8 +- scripts/csv_to_csr.py | 16 +- scripts/fast_report.py | 4 +- scripts/merge_past_dynamics.py | 8 +- scripts/quantile_summarize_geoid_level.py | 23 +- slurm_batch/inference_job.py | 62 +- test/run_tests.py | 58 +- 44 files changed, 1048 insertions(+), 2757 deletions(-) diff --git a/batch/inference_job.py b/batch/inference_job.py index 33af4251f..36a2b4994 100755 --- a/batch/inference_job.py +++ b/batch/inference_job.py @@ -167,13 +167,12 @@ @click.option( "--reset-chimerics-on-global-accept", "--reset-chimerics-on-global-accept", - "reset_chimerics", - envvar="COVID_RESET_CHIMERICS", - type=bool, - default=True, - help="Flag determining whether to reset chimeric values on any global acceptances", + "reset_chimerics", + envvar="COVID_RESET_CHIMERICS", + type=bool, + default=True, + help="Flag determining whether to reset chimeric values on any global acceptances", ) - def launch_batch( config_file, run_id, @@ -192,7 +191,7 @@ def launch_batch( resume_discard_seeding, max_stacked_interventions, last_validation_date, - reset_chimerics + reset_chimerics, ): config = None @@ -211,9 +210,7 @@ def launch_batch( if "filtering" in config: config["filtering"]["simulations_per_slot"] = sims_per_job if not os.path.exists(config["filtering"]["data_path"]): - print( - f"ERROR: filtering.data_path path {config['filtering']['data_path']} does not exist!" - ) + print(f"ERROR: filtering.data_path path {config['filtering']['data_path']} does not exist!") return 1 else: print(f"WARNING: no filtering section found in {config_file}!") @@ -258,9 +255,7 @@ def autodetect_params(config, *, num_jobs=None, sims_per_job=None, num_blocks=No return (num_jobs, sims_per_job, num_blocks) if "filtering" not in config or "simulations_per_slot" not in config["filtering"]: - raise click.UsageError( - "filtering::simulations_per_slot undefined in config, can't autodetect parameters" - ) + raise click.UsageError("filtering::simulations_per_slot undefined in config, can't autodetect parameters") sims_per_slot = int(config["filtering"]["simulations_per_slot"]) if num_jobs is None: @@ -270,17 +265,10 @@ def autodetect_params(config, *, num_jobs=None, sims_per_job=None, num_blocks=No if sims_per_job is None: if num_blocks is not None: sims_per_job = int(math.ceil(sims_per_slot / num_blocks)) - print( - f"Setting number of blocks to {num_blocks} [via num_blocks (-k) argument]" - ) - print( - f"Setting sims per job to {sims_per_job} [via {sims_per_slot} simulations_per_slot in config]" - ) + print(f"Setting number of blocks to {num_blocks} [via num_blocks (-k) argument]") + print(f"Setting sims per job to {sims_per_job} [via {sims_per_slot} simulations_per_slot in config]") else: - geoid_fname = ( - pathlib.Path(config["spatial_setup"]["base_path"]) - / config["spatial_setup"]["geodata"] - ) + geoid_fname = pathlib.Path(config["spatial_setup"]["base_path"]) / config["spatial_setup"]["geodata"] with open(geoid_fname) as geoid_fp: num_geoids = sum(1 for line in geoid_fp) @@ -298,9 +286,7 @@ def autodetect_params(config, *, num_jobs=None, sims_per_job=None, num_blocks=No if num_blocks is None: num_blocks = int(math.ceil(sims_per_slot / sims_per_job)) - print( - f"Setting number of blocks to {num_blocks} [via {sims_per_slot} simulations_per_slot in config]" - ) + print(f"Setting number of blocks to {num_blocks} [via {sims_per_slot} simulations_per_slot in config]") return (num_jobs, sims_per_job, num_blocks) @@ -312,9 +298,7 @@ def get_job_queues(job_queue_prefix): for q in resp["jobQueues"]: queue_name = q["jobQueueName"] if queue_name.startswith(job_queue_prefix): - job_list_resp = batch_client.list_jobs( - jobQueue=queue_name, jobStatus="PENDING" - ) + job_list_resp = batch_client.list_jobs(jobQueue=queue_name, jobStatus="PENDING") queues_with_jobs[queue_name] = len(job_list_resp["jobSummaryList"]) # Return the least-loaded queues first return sorted(queues_with_jobs, key=queues_with_jobs.get) @@ -363,9 +347,7 @@ def launch(self, job_name, config_file, scenarios, p_death_names, job_queues): manifest["cmd"] = " ".join(sys.argv[:]) manifest["job_name"] = job_name manifest["data_sha"] = subprocess.getoutput("git rev-parse HEAD") - manifest["csp_sha"] = subprocess.getoutput( - "cd COVIDScenarioPipeline; git rev-parse HEAD" - ) + manifest["csp_sha"] = subprocess.getoutput("cd COVIDScenarioPipeline; git rev-parse HEAD") # Prepare to tar up the current directory, excluding any dvc outputs, so it # can be shipped to S3 @@ -386,25 +368,17 @@ def launch(self, job_name, config_file, scenarios, p_death_names, job_queues): elif q == "sample_data": for r in os.listdir("COVIDScenarioPipeline/sample_data"): if r != "united-states-commutes": - tar.add( - os.path.join( - "COVIDScenarioPipeline", "sample_data", r - ) - ) + tar.add(os.path.join("COVIDScenarioPipeline", "sample_data", r)) elif not (p.startswith(".") or p.endswith("tar.gz") or p in self.outputs): tar.add( p, - filter=lambda x: None - if os.path.basename(x.name).startswith(".") - else x, + filter=lambda x: None if os.path.basename(x.name).startswith(".") else x, ) tar.close() # Upload the tar'd contents of this directory and the runner script to S3 runner_script_name = f"{job_name}-runner.sh" - local_runner_script = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "inference_runner.sh" - ) + local_runner_script = os.path.join(os.path.dirname(os.path.realpath(__file__)), "inference_runner.sh") s3_client = boto3.client("s3") s3_client.upload_file(local_runner_script, self.s3_bucket, runner_script_name) s3_client.upload_file(tarfile_name, self.s3_bucket, tarfile_name) @@ -413,15 +387,11 @@ def launch(self, job_name, config_file, scenarios, p_death_names, job_queues): # Save the manifest file to S3 with open("manifest.json", "w") as f: json.dump(manifest, f, indent=4) - s3_client.upload_file( - "manifest.json", self.s3_bucket, f"{job_name}/manifest.json" - ) + s3_client.upload_file("manifest.json", self.s3_bucket, f"{job_name}/manifest.json") # Create job to copy output to appropriate places copy_script_name = f"{job_name}-copy.sh" - local_runner_script = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "inference_copy.sh" - ) + local_runner_script = os.path.join(os.path.dirname(os.path.realpath(__file__)), "inference_copy.sh") s3_client.upload_file(local_runner_script, self.s3_bucket, copy_script_name) # Prepare and launch the num_jobs via AWS Batch. @@ -462,15 +432,11 @@ def launch(self, job_name, config_file, scenarios, p_death_names, job_queues): cur_env_vars = base_env_vars.copy() cur_env_vars.append({"name": "COVID_SCENARIOS", "value": s}) cur_env_vars.append({"name": "COVID_DEATHRATES", "value": d}) - cur_env_vars.append( - {"name": "COVID_PREFIX", "value": f"{config['name']}/{s}/{d}"} - ) + cur_env_vars.append({"name": "COVID_PREFIX", "value": f"{config['name']}/{s}/{d}"}) cur_env_vars.append({"name": "COVID_BLOCK_INDEX", "value": "1"}) cur_env_vars.append({"name": "COVID_RUN_INDEX", "value": f"{self.run_id}"}) if not (self.restart_from_s3_bucket is None): - cur_env_vars.append( - {"name": "S3_LAST_JOB_OUTPUT", "value": self.restart_from_s3_bucket} - ) + cur_env_vars.append({"name": "S3_LAST_JOB_OUTPUT", "value": self.restart_from_s3_bucket}) cur_env_vars.append( { "name": "COVID_OLD_RUN_INDEX", @@ -501,24 +467,12 @@ def launch(self, job_name, config_file, scenarios, p_death_names, job_queues): cur_env_vars = base_env_vars.copy() cur_env_vars.append({"name": "COVID_SCENARIOS", "value": s}) cur_env_vars.append({"name": "COVID_DEATHRATES", "value": d}) - cur_env_vars.append( - {"name": "COVID_PREFIX", "value": f"{config['name']}/{s}/{d}"} - ) - cur_env_vars.append( - {"name": "COVID_BLOCK_INDEX", "value": f"{block_idx+1}"} - ) - cur_env_vars.append( - {"name": "COVID_RUN_INDEX", "value": f"{self.run_id}"} - ) - cur_env_vars.append( - {"name": "COVID_OLD_RUN_INDEX", "value": f"{self.run_id}"} - ) - cur_env_vars.append( - {"name": "S3_LAST_JOB_OUTPUT", "value": f"{results_path}/"} - ) - cur_env_vars.append( - {"name": "JOB_NAME", "value": f"{cur_job_name}_block{block_idx}"} - ) + cur_env_vars.append({"name": "COVID_PREFIX", "value": f"{config['name']}/{s}/{d}"}) + cur_env_vars.append({"name": "COVID_BLOCK_INDEX", "value": f"{block_idx+1}"}) + cur_env_vars.append({"name": "COVID_RUN_INDEX", "value": f"{self.run_id}"}) + cur_env_vars.append({"name": "COVID_OLD_RUN_INDEX", "value": f"{self.run_id}"}) + cur_env_vars.append({"name": "S3_LAST_JOB_OUTPUT", "value": f"{results_path}/"}) + cur_env_vars.append({"name": "JOB_NAME", "value": f"{cur_job_name}_block{block_idx}"}) cur_job = batch_client.submit_job( jobName=f"{cur_job_name}_block{block_idx}", jobQueue=cur_job_queue, @@ -567,10 +521,8 @@ def launch(self, job_name, config_file, scenarios, p_death_names, job_queues): ) if not (self.restart_from_s3_bucket is None): - print( - f"Resuming from run id is {self.restart_from_run_id} located in {self.restart_from_s3_bucket}" - ) - if (self.resume_discard_seeding): + print(f"Resuming from run id is {self.restart_from_run_id} located in {self.restart_from_s3_bucket}") + if self.resume_discard_seeding: print(f"Discarding seeding results") print(f"Final output will be: {results_path}/model_output/") print(f"Run id is {self.run_id}") diff --git a/batch/inference_job_status.py b/batch/inference_job_status.py index 739fcb027..e73f7fdfb 100755 --- a/batch/inference_job_status.py +++ b/batch/inference_job_status.py @@ -7,9 +7,7 @@ def process_child_jobs(parent_job_id, tracker, next_token=None): if next_token is not None: - child_jobs = batch.list_jobs( - arrayJobId=parent_job_id, jobStatus="RUNNING", nextToken=next_token - ) + child_jobs = batch.list_jobs(arrayJobId=parent_job_id, jobStatus="RUNNING", nextToken=next_token) else: child_jobs = batch.list_jobs(arrayJobId=parent_job_id, jobStatus="RUNNING") tracker["RUNNING"] = tracker.get("RUNNING", 0) + len(child_jobs["jobSummaryList"]) @@ -18,9 +16,7 @@ def process_child_jobs(parent_job_id, tracker, next_token=None): def process_parent_jobs(job_queue, parent_tracker, next_token=None): if next_token is not None: - parent_jobs = batch.list_jobs( - jobQueue=job_queue, jobStatus="PENDING", nextToken=next_token - ) + parent_jobs = batch.list_jobs(jobQueue=job_queue, jobStatus="PENDING", nextToken=next_token) else: parent_jobs = batch.list_jobs(jobQueue=job_queue, jobStatus="PENDING") @@ -29,16 +25,10 @@ def process_parent_jobs(job_queue, parent_tracker, next_token=None): tracker = {} next_child_token = process_child_jobs(job["jobId"], tracker) while next_child_token is not None: - next_child_token = process_child_jobs( - job["jobId"], tracker, next_child_token - ) + next_child_token = process_child_jobs(job["jobId"], tracker, next_child_token) if tracker["RUNNING"]: - print( - f"Parent job {job['jobName']} had {tracker['RUNNING']} running child jobs." - ) - parent_tracker["CHILD_JOBS"] = ( - parent_tracker.get("CHILD_JOBS", 0) + tracker["RUNNING"] - ) + print(f"Parent job {job['jobName']} had {tracker['RUNNING']} running child jobs.") + parent_tracker["CHILD_JOBS"] = parent_tracker.get("CHILD_JOBS", 0) + tracker["RUNNING"] return parent_jobs["nextToken"] if "nextToken" in parent_jobs else None diff --git a/batch/launch_job.py b/batch/launch_job.py index e5d90cd2e..b92bb386e 100755 --- a/batch/launch_job.py +++ b/batch/launch_job.py @@ -198,20 +198,13 @@ def launch_job_inner( tarfile_name = f"{job_name}.tar.gz" tar = tarfile.open(tarfile_name, "w:gz") for p in os.listdir("."): - if not ( - p.startswith(".") - or p.endswith("tar.gz") - or p in dvc_outputs - or p == "batch" - ): + if not (p.startswith(".") or p.endswith("tar.gz") or p in dvc_outputs or p == "batch"): tar.add(p, filter=lambda x: None if x.name.startswith(".") else x) tar.close() # Upload the tar'd contents of this directory and the runner script to S3 runner_script_name = f"{job_name}-runner.sh" - local_runner_script = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "runner.sh" - ) + local_runner_script = os.path.join(os.path.dirname(os.path.realpath(__file__)), "runner.sh") s3_client = boto3.client("s3") s3_client.upload_file(local_runner_script, s3_input_bucket, runner_script_name) s3_client.upload_file(tarfile_name, s3_input_bucket, tarfile_name) @@ -228,9 +221,7 @@ def launch_job_inner( {"name": "S3_RESULTS_PATH", "value": results_path}, {"name": "SLOTS_PER_JOB", "value": str(slots_per_job)}, ] - s3_cp_run_script = ( - f"aws s3 cp s3://{s3_input_bucket}/{runner_script_name} $PWD/run-covid-pipeline" - ) + s3_cp_run_script = f"aws s3 cp s3://{s3_input_bucket}/{runner_script_name} $PWD/run-covid-pipeline" command = ["sh", "-c", f"{s3_cp_run_script}; /bin/bash $PWD/run-covid-pipeline"] container_overrides = { "vcpus": vcpu, @@ -257,9 +248,7 @@ def launch_job_inner( containerOverrides=container_overrides, ) - print( - f"Batch job with id {resp['jobId']} launched; output will be written to {results_path}" - ) + print(f"Batch job with id {resp['jobId']} launched; output will be written to {results_path}") def get_dvc_outputs(): diff --git a/gempyor_pkg/docs/integration_benchmark.ipynb b/gempyor_pkg/docs/integration_benchmark.ipynb index d50485d25..e19f784a6 100644 --- a/gempyor_pkg/docs/integration_benchmark.ipynb +++ b/gempyor_pkg/docs/integration_benchmark.ipynb @@ -122,9 +122,7 @@ "config.set_file(config_path)\n", "\n", "spatial_config = config[\"spatial_setup\"]\n", - "spatial_base_path = pathlib.Path(\n", - " \"../../COVID19_USA/\" + spatial_config[\"base_path\"].get()\n", - ")\n", + "spatial_base_path = pathlib.Path(\"../../COVID19_USA/\" + spatial_config[\"base_path\"].get())\n", "scenario = scenario\n", "deathrate = deathrate\n", "stoch_traj_flag = stoch_traj_flag # Truthy: stochastic simulation, Falsy: determnistic mean of the binomial draws\n", @@ -146,9 +144,7 @@ "from functools import wraps\n", "\n", "\n", - "def profile(\n", - " output_file=None, sort_by=\"cumulative\", lines_to_print=None, strip_dirs=False\n", - "):\n", + "def profile(output_file=None, sort_by=\"cumulative\", lines_to_print=None, strip_dirs=False):\n", " \"\"\"A time profiler decorator.\n", " Inspired by and modified the profile decorator of Giampaolo Rodola:\n", " http://code.activestate.com/recipes/577817-profile-decorator/\n", @@ -197,9 +193,7 @@ "logger = logging.getLogger()\n", "handler = logging.StreamHandler()\n", "# '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'\n", - "formatter = logging.Formatter(\n", - " \"%(asctime)s [%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s\"\n", - ")\n", + "formatter = logging.Formatter(\"%(asctime)s [%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s\")\n", "\n", "handler.setFormatter(formatter)\n", "print()\n", @@ -288,9 +282,7 @@ " warnings.simplefilter(\"ignore\")\n", " from SEIR.steps import steps_SEIR_nb\n", " except ModuleNotFoundError as e:\n", - " raise RuntimeError(\n", - " \"Missing compiled module, please run `python setup.py install`\"\n", - " ) from e\n", + " raise RuntimeError(\"Missing compiled module, please run `python setup.py install`\") from e\n", "\n", "try:\n", " with warnings.catch_warnings() as w: # ignore DeprecationWarning inside numba\n", @@ -302,9 +294,7 @@ " warnings.simplefilter(\"ignore\")\n", " from SEIR.integrators import rk4_aot\n", " except ModuleNotFoundError as e:\n", - " raise RuntimeError(\n", - " \"Missing compiled module, please run `python setup.py install`\"\n", - " ) from e\n", + " raise RuntimeError(\"Missing compiled module, please run `python setup.py install`\") from e\n", "\n", "\n", "def steps_SEIR(\n", @@ -396,9 +386,7 @@ " f\"with method {s.integration_method}, only deterministic\"\n", " f\"integration is possible (got stoch_straj_flag={stoch_traj_flag}\"\n", " )\n", - " seir_sim = steps_ode.ode_integration(\n", - " *fnct_args, integration_method=s.integration_method\n", - " )\n", + " seir_sim = steps_ode.ode_integration(*fnct_args, integration_method=s.integration_method)\n", " elif s.integration_method == \"rk4.jit1\":\n", " seir_sim = steps_ode.rk4_integration1(*fnct_args)\n", " elif s.integration_method == \"rk4.jit2\":\n", @@ -484,9 +472,7 @@ " proportion_array,\n", " proportion_info,\n", " ) = s.compartments.get_transition_array(parameters, s.parameters.pnames)\n", - " seir.log_debug_parameters(\n", - " parsed_parameters, \"Unique Parameters used by transitions\"\n", - " )" + " seir.log_debug_parameters(parsed_parameters, \"Unique Parameters used by transitions\")" ] }, { @@ -1114,9 +1100,7 @@ " mobility_data_indices,\n", " stoch_traj_flag,\n", " )\n", - " data[f\"legacy | dt={dt} day | jit (pre)\"] = seir.postprocess_and_write(\n", - " sim_id, s, states, p_draw, npi, seeding_data\n", - " )" + " data[f\"legacy | dt={dt} day | jit (pre)\"] = seir.postprocess_and_write(sim_id, s, states, p_draw, npi, seeding_data)" ] }, { @@ -1174,9 +1158,9 @@ " mobility_data_indices,\n", " stoch_traj_flag,\n", " )\n", - " data[\n", - " f\"legacy | dt=1.0 day | alpha={alpha} | jit (pre)\"\n", - " ] = seir.postprocess_and_write(sim_id, s, states, p_draw, npi, seeding_data)\n", + " data[f\"legacy | dt=1.0 day | alpha={alpha} | jit (pre)\"] = seir.postprocess_and_write(\n", + " sim_id, s, states, p_draw, npi, seeding_data\n", + " )\n", " if False:\n", " with Timer(\"onerun_SEIR.compute.legacy, dt=0.5 day\"):\n", " s.integration_method = \"classical\"\n", @@ -1195,9 +1179,9 @@ " mobility_data_indices,\n", " stoch_traj_flag,\n", " )\n", - " data[\n", - " f\"legacy | dt=0.5 day | alpha={alpha} | jit (pre)\"\n", - " ] = seir.postprocess_and_write(sim_id, s, states, p_draw, npi, seeding_data)" + " data[f\"legacy | dt=0.5 day | alpha={alpha} | jit (pre)\"] = seir.postprocess_and_write(\n", + " sim_id, s, states, p_draw, npi, seeding_data\n", + " )" ] }, { @@ -12271,9 +12255,7 @@ "lw = 2 # 0\n", "for met, dat in data.items():\n", " if \"alpha\" not in met:\n", - " df = dat[\n", - " (dat[\"value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")\n", - " ]\n", + " df = dat[(dat[\"value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")]\n", " df = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", " ax.plot(df, label=met, ls=\"-.\", alpha=1, lw=lw)\n", " # lw -=2\n", @@ -12396,9 +12378,7 @@ "ref = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", "for met, dat in data.items():\n", " if \"legacy\" not in met:\n", - " df = dat[\n", - " (dat[\"value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")\n", - " ]\n", + " df = dat[(dat[\"value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")]\n", " df = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", " df = (df - ref).abs() # .cumsum()\n", " ax.plot(df, label=met, ls=\"-.\", alpha=0.7, lw=lw)\n", diff --git a/gempyor_pkg/docs/integration_doc.ipynb b/gempyor_pkg/docs/integration_doc.ipynb index 9d0c49db2..036dc5cdb 100644 --- a/gempyor_pkg/docs/integration_doc.ipynb +++ b/gempyor_pkg/docs/integration_doc.ipynb @@ -107,17 +107,11 @@ " gempyor_simulator.proportion_array,\n", " gempyor_simulator.proportion_info,\n", ") = gempyor_simulator.s.compartments.get_transition_array()\n", - "npi_seir = seir.build_npi_SEIR(\n", - " s=gempyor_simulator.s, load_ID=load_ID, sim_id2load=sim_id2load, config=config\n", - ")\n", + "npi_seir = seir.build_npi_SEIR(s=gempyor_simulator.s, load_ID=load_ID, sim_id2load=sim_id2load, config=config)\n", "\n", "\n", - "initial_conditions = gempyor_simulator.s.seedingAndIC.draw_ic(\n", - " sim_id2write, setup=gempyor_simulator.s\n", - ")\n", - "seeding_data, seeding_amounts = gempyor_simulator.s.seedingAndIC.draw_seeding(\n", - " sim_id2write, setup=gempyor_simulator.s\n", - ")\n", + "initial_conditions = gempyor_simulator.s.seedingAndIC.draw_ic(sim_id2write, setup=gempyor_simulator.s)\n", + "seeding_data, seeding_amounts = gempyor_simulator.s.seedingAndIC.draw_seeding(sim_id2write, setup=gempyor_simulator.s)\n", "\n", "\n", "p_draw = gempyor_simulator.s.parameters.parameters_quick_draw(\n", @@ -499,9 +493,7 @@ "lw = 5 # 0\n", "for met, dat in data.items():\n", " if not \"stoch\" in met:\n", - " df = dat[\n", - " (dat[\"mc_value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")\n", - " ]\n", + " df = dat[(dat[\"mc_value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")]\n", " df = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", " ax.plot(df.cumsum(), label=met, ls=\"-.\", alpha=1, lw=lw) # , marker='o')\n", " lw -= 1\n", @@ -513,9 +505,7 @@ "lw = 5 # 0\n", "for met, dat in data.items():\n", " if not \"stoch\" in met:\n", - " df = dat[\n", - " (dat[\"mc_value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")\n", - " ]\n", + " df = dat[(dat[\"mc_value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")]\n", " df = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", " ax.plot(df, label=met, alpha=1, lw=lw, marker=\".\", ls=\"\") # ls=\"-.\",\n", " lw -= 1\n", @@ -527,9 +517,7 @@ "lw = 5\n", "for met, dat in data.items():\n", " if not \"stoch\" in met:\n", - " df = dat[\n", - " (dat[\"mc_value_type\"] == \"prevalence\") & (dat[\"mc_infection_stage\"] == \"I1\")\n", - " ]\n", + " df = dat[(dat[\"mc_value_type\"] == \"prevalence\") & (dat[\"mc_infection_stage\"] == \"I1\")]\n", " df = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", " ax.plot(df, label=met, ls=\"-.\", alpha=0.5, lw=lw)\n", " lw -= 1\n", @@ -602,9 +590,7 @@ "lw = 2 # 0\n", "for met, dat in data.items():\n", " if \"legacy\" in met and \"dt=1\" in met:\n", - " df = dat[\n", - " (dat[\"mc_value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")\n", - " ]\n", + " df = dat[(dat[\"mc_value_type\"] == \"incidence\") & (dat[\"mc_infection_stage\"] == \"E\")]\n", " df = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", " if \"stoch\" in met:\n", " ax.plot(df, label=\"\", ls=\"-.\", alpha=0.8, lw=lw / 2, c=\"k\")\n", @@ -620,9 +606,7 @@ "lw = 2\n", "for met, dat in data.items():\n", " if \"legacy\" in met and \"dt=1\" in met:\n", - " df = dat[\n", - " (dat[\"mc_value_type\"] == \"prevalence\") & (dat[\"mc_infection_stage\"] == \"I1\")\n", - " ]\n", + " df = dat[(dat[\"mc_value_type\"] == \"prevalence\") & (dat[\"mc_infection_stage\"] == \"I1\")]\n", " df = df.reset_index(drop=True).groupby(by=\"date\").sum().sum(axis=1)\n", " if \"stoch\" in met:\n", " ax.plot(df, label=\"\", ls=\"-.\", alpha=0.8, lw=lw / 2, c=\"k\")\n", @@ -915,9 +899,7 @@ ], "source": [ "states_daily_incid = np.load(\"test.npy\")\n", - "states_i_f = scipy.interpolate.interp1d(\n", - " np.arange(ndays, step=2), states_daily_incid[::2, :, :], axis=0, kind=\"linear\"\n", - ")\n", + "states_i_f = scipy.interpolate.interp1d(np.arange(ndays, step=2), states_daily_incid[::2, :, :], axis=0, kind=\"linear\")\n", "states_daily_incid = states_i_f(np.arange(ndays)) / 2\n", "states_daily_incid.sum()" ] diff --git a/gempyor_pkg/src/gempyor/NPI/MultiTimeReduce.py b/gempyor_pkg/src/gempyor/NPI/MultiTimeReduce.py index c29613476..ef218ef41 100644 --- a/gempyor_pkg/src/gempyor/NPI/MultiTimeReduce.py +++ b/gempyor_pkg/src/gempyor/NPI/MultiTimeReduce.py @@ -20,8 +20,7 @@ def __init__( name=getattr( npi_config, "key", - (npi_config["scenario"].exists() and npi_config["scenario"].get()) - or "unknown", + (npi_config["scenario"].exists() and npi_config["scenario"].get()) or "unknown", ) ) @@ -57,22 +56,14 @@ def __init__( # if parameters are exceeding global start/end dates, index of parameter df will be out of range so check first if self.sanitize: - too_early = ( - min([min(i) for i in self.parameters["start_date"]]) < self.start_date - ) - too_late = ( - max([max(i) for i in self.parameters["end_date"]]) > self.end_date - ) + too_early = min([min(i) for i in self.parameters["start_date"]]) < self.start_date + too_late = max([max(i) for i in self.parameters["end_date"]]) > self.end_date if too_early or too_late: - raise ValueError( - "at least one period start or end date is not between global dates" - ) + raise ValueError("at least one period start or end date is not between global dates") for grp_config in npi_config["groups"]: affected_geoids_grp = self.__get_affected_geoids_grp(grp_config) - for sub_index in range( - len(self.parameters["start_date"][affected_geoids_grp[0]]) - ): + for sub_index in range(len(self.parameters["start_date"][affected_geoids_grp[0]])): period_range = pd.date_range( self.parameters["start_date"][affected_geoids_grp[0]][sub_index], self.parameters["end_date"][affected_geoids_grp[0]][sub_index], @@ -98,9 +89,7 @@ def __checkErrors(self): max_start_date = max([max(i) for i in self.parameters["start_date"]]) min_end_date = min([min(i) for i in self.parameters["end_date"]]) max_end_date = max([max(i) for i in self.parameters["end_date"]]) - if not ( - (self.start_date <= min_start_date) & (max_start_date <= self.end_date) - ): + if not ((self.start_date <= min_start_date) & (max_start_date <= self.end_date)): raise ValueError( f"at least one period_start_date [{min_start_date}, {max_start_date}] is not between global dates [{self.start_date}, {self.end_date}]" ) @@ -110,9 +99,7 @@ def __checkErrors(self): ) if not (self.parameters["start_date"] <= self.parameters["end_date"]).all(): - raise ValueError( - f"at least one period_start_date is greater than the corresponding period end date" - ) + raise ValueError(f"at least one period_start_date is greater than the corresponding period end date") for n in self.affected_geoids: if n not in self.geoids: @@ -138,9 +125,7 @@ def __createFromConfig(self, npi_config): self.affected_geoids = self.__get_affected_geoids(npi_config) - self.parameters = self.parameters[ - self.parameters.index.isin(self.affected_geoids) - ] + self.parameters = self.parameters[self.parameters.index.isin(self.affected_geoids)] dist = npi_config["value"].as_random_distribution() self.parameters["npi_name"] = self.name self.parameters["parameter"] = self.param_name @@ -172,9 +157,7 @@ def __get_affected_geoids_grp(self, grp_config): def __createFromDf(self, loaded_df, npi_config): loaded_df.index = loaded_df.geoid loaded_df = loaded_df[loaded_df["npi_name"] == self.name] - self.parameters = loaded_df[ - ["npi_name", "start_date", "end_date", "parameter", "reduction"] - ].copy() + self.parameters = loaded_df[["npi_name", "start_date", "end_date", "parameter", "reduction"]].copy() # self.parameters["start_date"] = [[datetime.date.fromisoformat(date) for date in strdate.split(",")] for strdate in self.parameters["start_date"]] # self.parameters["end_date"] = [[datetime.date.fromisoformat(date) for date in strdate.split(",")] for strdate in self.parameters["end_date"]] # self.affected_geoids = set(self.parameters.index) @@ -183,9 +166,7 @@ def __createFromDf(self, loaded_df, npi_config): if self.sanitize: if len(self.affected_geoids) != len(self.parameters): print(f"loading {self.name} and we got {len(self.parameters)} geoids") - print( - f"getting from config that it affects {len(self.affected_geoids)}" - ) + print(f"getting from config that it affects {len(self.affected_geoids)}") for grp_config in npi_config["groups"]: affected_geoids_grp = self.__get_affected_geoids_grp(grp_config) @@ -229,14 +210,10 @@ def __get_affected_geoids(self, npi_config): if grp_config["affected_geoids"].get() == "all": affected_geoids_grp = self.geoids else: - affected_geoids_grp += [ - str(n.get()) for n in grp_config["affected_geoids"] - ] + affected_geoids_grp += [str(n.get()) for n in grp_config["affected_geoids"]] affected_geoids = set(affected_geoids_grp) if len(affected_geoids) != len(affected_geoids_grp): - raise ValueError( - f"In NPI {self.name}, some geoids belong to several groups. This is unsupported." - ) + raise ValueError(f"In NPI {self.name}, some geoids belong to several groups. This is unsupported.") return affected_geoids def getReduction(self, param, default=0.0): @@ -249,11 +226,7 @@ def getReduction(self, param, default=0.0): def getReductionToWrite(self): df = self.parameters df.index.name = "geoid" - df["start_date"] = df["start_date"].apply( - lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l]) - ) - df["end_date"] = df["end_date"].apply( - lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l]) - ) + df["start_date"] = df["start_date"].apply(lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l])) + df["end_date"] = df["end_date"].apply(lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l])) df = df.reset_index() return df diff --git a/gempyor_pkg/src/gempyor/NPI/Reduce.py b/gempyor_pkg/src/gempyor/NPI/Reduce.py index 4bf48d518..a1ec8e6e8 100644 --- a/gempyor_pkg/src/gempyor/NPI/Reduce.py +++ b/gempyor_pkg/src/gempyor/NPI/Reduce.py @@ -19,8 +19,7 @@ def __init__( name=getattr( npi_config, "key", - (npi_config["scenario"].exists() and npi_config["scenario"].get()) - or "unknown", + (npi_config["scenario"].exists() and npi_config["scenario"].get()) or "unknown", ) ) @@ -46,22 +45,15 @@ def __init__( self.__createFromConfig(npi_config) # if parameters are exceeding global start/end dates, index of parameter df will be out of range so check first - if ( - self.parameters["start_date"].min() < self.start_date - or self.parameters["end_date"].max() > self.end_date - ): - raise ValueError( - f"""{self.name} : at least one period start or end date is not between global dates""" - ) + if self.parameters["start_date"].min() < self.start_date or self.parameters["end_date"].max() > self.end_date: + raise ValueError(f"""{self.name} : at least one period start or end date is not between global dates""") # for index in self.parameters.index: # period_range = pd.date_range(self.parameters["start_date"][index], self.parameters["end_date"][index]) ## This the line that does the work # self.npi_old.loc[index, period_range] = np.tile(self.parameters["reduction"][index], (len(period_range), 1)).T - period_range = pd.date_range( - self.parameters["start_date"].iloc[0], self.parameters["end_date"].iloc[0] - ) + period_range = pd.date_range(self.parameters["start_date"].iloc[0], self.parameters["end_date"].iloc[0]) self.npi.loc[self.parameters.index, period_range] = np.tile( self.parameters["reduction"][:], (len(period_range), 1) ).T @@ -73,9 +65,7 @@ def __checkErrors(self): max_start_date = self.parameters["start_date"].max() min_end_date = self.parameters["end_date"].min() max_end_date = self.parameters["end_date"].max() - if not ( - (self.start_date <= min_start_date) & (max_start_date <= self.end_date) - ): + if not ((self.start_date <= min_start_date) & (max_start_date <= self.end_date)): raise ValueError( f"at least one period_start_date [{min_start_date}, {max_start_date}] is not between global dates [{self.start_date}, {self.end_date}]" ) @@ -85,9 +75,7 @@ def __checkErrors(self): ) if not (self.parameters["start_date"] <= self.parameters["end_date"]).all(): - raise ValueError( - f"at least one period_start_date is greater than the corresponding period end date" - ) + raise ValueError(f"at least one period_start_date is greater than the corresponding period end date") for n in self.affected_geoids: if n not in self.geoids: @@ -113,28 +101,19 @@ def __createFromConfig(self, npi_config): # If values of "affected_geoids" is "all" or unspecified, run on all geoids. # Otherwise, run only on geoids specified. self.affected_geoids = set(self.geoids) - if ( - npi_config["affected_geoids"].exists() - and npi_config["affected_geoids"].get() != "all" - ): + if npi_config["affected_geoids"].exists() and npi_config["affected_geoids"].get() != "all": self.affected_geoids = {str(n.get()) for n in npi_config["affected_geoids"]} - self.parameters = self.parameters[ - self.parameters.index.isin(self.affected_geoids) - ] + self.parameters = self.parameters[self.parameters.index.isin(self.affected_geoids)] # Create reduction self.dist = npi_config["value"].as_random_distribution() self.parameters["npi_name"] = self.name self.parameters["start_date"] = ( - npi_config["period_start_date"].as_date() - if npi_config["period_start_date"].exists() - else self.start_date + npi_config["period_start_date"].as_date() if npi_config["period_start_date"].exists() else self.start_date ) self.parameters["end_date"] = ( - npi_config["period_end_date"].as_date() - if npi_config["period_end_date"].exists() - else self.end_date + npi_config["period_end_date"].as_date() if npi_config["period_end_date"].exists() else self.end_date ) self.parameters["parameter"] = self.param_name self.parameters["reduction"] = self.dist(size=self.parameters.shape[0]) @@ -142,19 +121,13 @@ def __createFromConfig(self, npi_config): def __createFromDf(self, loaded_df, npi_config): loaded_df.index = loaded_df.geoid loaded_df = loaded_df[loaded_df["npi_name"] == self.name] - self.parameters = loaded_df[ - ["npi_name", "start_date", "end_date", "parameter", "reduction"] - ].copy() + self.parameters = loaded_df[["npi_name", "start_date", "end_date", "parameter", "reduction"]].copy() # dates are picked from config self.parameters["start_date"] = ( - npi_config["period_start_date"].as_date() - if npi_config["period_start_date"].exists() - else self.start_date + npi_config["period_start_date"].as_date() if npi_config["period_start_date"].exists() else self.start_date ) self.parameters["end_date"] = ( - npi_config["period_end_date"].as_date() - if npi_config["period_end_date"].exists() - else self.end_date + npi_config["period_end_date"].as_date() if npi_config["period_end_date"].exists() else self.end_date ) ## This is more legible to me, but if we change it here, we should change it in __createFromConfig as well # if npi_config["period_start_date"].exists(): diff --git a/gempyor_pkg/src/gempyor/NPI/ReduceIntervention.py b/gempyor_pkg/src/gempyor/NPI/ReduceIntervention.py index 2b363eac9..05f51a24a 100644 --- a/gempyor_pkg/src/gempyor/NPI/ReduceIntervention.py +++ b/gempyor_pkg/src/gempyor/NPI/ReduceIntervention.py @@ -44,13 +44,8 @@ def __init__( self.__createFromConfig(npi_config) # if parameters are exceeding global start/end dates, index of parameter df will be out of range so check first - if ( - self.parameters["start_date"].min() < self.start_date - or self.parameters["end_date"].max() > self.end_date - ): - raise ValueError( - f"""{self.name} : at least one period start or end date is not between global dates""" - ) + if self.parameters["start_date"].min() < self.start_date or self.parameters["end_date"].max() > self.end_date: + raise ValueError(f"""{self.name} : at least one period start or end date is not between global dates""") self.param_name = [] self.reductions = {} @@ -77,16 +72,12 @@ def __init__( loaded_df=loaded_df, ) new_params = self.sub_npi.param_name # either a list (if stacked) or a string - new_params = ( - [new_params] if isinstance(new_params, str) else new_params - ) # convert to list + new_params = [new_params] if isinstance(new_params, str) else new_params # convert to list # Add each parameter at first encounter for new_p in new_params: if new_p not in self.param_name: self.param_name.append(new_p) - if ( - new_p in pnames_overlap_operation_sum - ): # re.match("^transition_rate [1234567890]+$",new_p): + if new_p in pnames_overlap_operation_sum: # re.match("^transition_rate [1234567890]+$",new_p): self.reductions[new_p] = 0 else: self.reductions[new_p] = 0 @@ -100,9 +91,7 @@ def __init__( for param in self.param_name: reduction = self.sub_npi.getReduction(param, default=0.0) - if ( - param in pnames_overlap_operation_sum - ): # re.match("^transition_rate [1234567890]+$",param): + if param in pnames_overlap_operation_sum: # re.match("^transition_rate [1234567890]+$",param): self.reductions[param] = reduction.copy() else: self.reductions[param] = reduction.copy() @@ -119,9 +108,7 @@ def __init__( self.parameters["start_date"][index], self.parameters["end_date"][index], ) - self.reductions[param].loc[index, period_range] *= ( - 1 - self.parameters["reduction"][index] - ) + self.reductions[param].loc[index, period_range] *= 1 - self.parameters["reduction"][index] # self.__checkErrors() @@ -130,9 +117,7 @@ def __checkErrors(self): max_start_date = self.parameters["start_date"].max() min_end_date = self.parameters["end_date"].min() max_end_date = self.parameters["end_date"].max() - if not ( - (self.start_date <= min_start_date) & (max_start_date <= self.end_date) - ): + if not ((self.start_date <= min_start_date) & (max_start_date <= self.end_date)): raise ValueError( f"at least one period_start_date [{min_start_date}, {max_start_date}] is not between global dates [{self.start_date}, {self.end_date}]" ) @@ -142,9 +127,7 @@ def __checkErrors(self): ) if not (self.parameters["start_date"] <= self.parameters["end_date"]).all(): - raise ValueError( - f"at least one period_start_date is greater than the corresponding period end date" - ) + raise ValueError(f"at least one period_start_date is greater than the corresponding period end date") for n in self.affected_geoids: if n not in self.geoids: @@ -178,19 +161,13 @@ def getReductionToWrite(self): def __createFromDf(self, loaded_df, npi_config): loaded_df.index = loaded_df.geoid loaded_df = loaded_df[loaded_df["npi_name"] == self.name] - self.parameters = loaded_df[ - ["npi_name", "start_date", "end_date", "parameter", "reduction"] - ].copy() + self.parameters = loaded_df[["npi_name", "start_date", "end_date", "parameter", "reduction"]].copy() self.parameters["start_date"] = ( - npi_config["period_start_date"].as_date() - if npi_config["period_start_date"].exists() - else self.start_date + npi_config["period_start_date"].as_date() if npi_config["period_start_date"].exists() else self.start_date ) self.parameters["end_date"] = ( - npi_config["period_end_date"].as_date() - if npi_config["period_end_date"].exists() - else self.end_date + npi_config["period_end_date"].as_date() if npi_config["period_end_date"].exists() else self.end_date ) ## This is more legible to me, but if we change it here, we should change it in __createFromConfig as well @@ -218,28 +195,19 @@ def __createFromConfig(self, npi_config): # If values of "affected_geoids" is "all" or unspecified, run on all geoids. # Otherwise, run only on geoids specified. self.affected_geoids = set(self.geoids) - if ( - npi_config["affected_geoids"].exists() - and npi_config["affected_geoids"].get() != "all" - ): + if npi_config["affected_geoids"].exists() and npi_config["affected_geoids"].get() != "all": self.affected_geoids = {str(n.get()) for n in npi_config["affected_geoids"]} - self.parameters = self.parameters[ - self.parameters.index.isin(self.affected_geoids) - ] + self.parameters = self.parameters[self.parameters.index.isin(self.affected_geoids)] # Create reduction self.dist = npi_config["value"].as_random_distribution() self.parameters["npi_name"] = self.name self.parameters["start_date"] = ( - npi_config["period_start_date"].as_date() - if npi_config["period_start_date"].exists() - else self.start_date + npi_config["period_start_date"].as_date() if npi_config["period_start_date"].exists() else self.start_date ) self.parameters["end_date"] = ( - npi_config["period_end_date"].as_date() - if npi_config["period_end_date"].exists() - else self.end_date + npi_config["period_end_date"].as_date() if npi_config["period_end_date"].exists() else self.end_date ) self.parameters["parameter"] = self.param_name self.parameters["reduction"] = self.dist(size=self.parameters.shape[0]) diff --git a/gempyor_pkg/src/gempyor/NPI/ReduceR0.py b/gempyor_pkg/src/gempyor/NPI/ReduceR0.py index 26496b034..d24b255dd 100644 --- a/gempyor_pkg/src/gempyor/NPI/ReduceR0.py +++ b/gempyor_pkg/src/gempyor/NPI/ReduceR0.py @@ -6,15 +6,7 @@ class ReduceR0(Reduce): - def __init__( - self, - *, - npi_config, - global_config, - geoids, - loaded_df=None, - pnames_overlap_operation_sum=[] - ): + def __init__(self, *, npi_config, global_config, geoids, loaded_df=None, pnames_overlap_operation_sum=[]): npi_config["parameter"] = "r0" super().__init__( npi_config=npi_config, diff --git a/gempyor_pkg/src/gempyor/NPI/Stacked.py b/gempyor_pkg/src/gempyor/NPI/Stacked.py index 7d5100b2a..9f55e9abd 100644 --- a/gempyor_pkg/src/gempyor/NPI/Stacked.py +++ b/gempyor_pkg/src/gempyor/NPI/Stacked.py @@ -46,9 +46,7 @@ def __init__( if isinstance(scenario, str): settings = settings_map.get(scenario) if settings is None: - raise RuntimeError( - f"couldn't find scenario in config file [got: {scenario}]" - ) + raise RuntimeError(f"couldn't find scenario in config file [got: {scenario}]") # via profiling: faster to recreate the confuse view than to fetch+resolve due to confuse isinstance # checks scenario_npi_config = confuse.RootView([settings]) @@ -66,25 +64,19 @@ def __init__( ) new_params = sub_npi.param_name # either a list (if stacked) or a string - new_params = ( - [new_params] if isinstance(new_params, str) else new_params - ) # convert to list + new_params = [new_params] if isinstance(new_params, str) else new_params # convert to list # Add each parameter at first encounter for new_p in new_params: if new_p not in self.param_name: self.param_name.append(new_p) - if ( - new_p in pnames_overlap_operation_sum - ): # re.match("^transition_rate [1234567890]+$",new_p): + if new_p in pnames_overlap_operation_sum: # re.match("^transition_rate [1234567890]+$",new_p): self.reductions[new_p] = 0 else: self.reductions[new_p] = 1 for param in self.param_name: reduction = sub_npi.getReduction(param, default=0.0) - if ( - param in pnames_overlap_operation_sum - ): # re.match("^transition_rate [1234567890]+$",param): + if param in pnames_overlap_operation_sum: # re.match("^transition_rate [1234567890]+$",param): self.reductions[param] += reduction else: self.reductions[param] *= 1 - reduction @@ -105,9 +97,7 @@ def __init__( self.reduction_params.clear() for param in self.param_name: - if ( - not param in pnames_overlap_operation_sum - ): # re.match("^transition_rate \d+$",param): + if not param in pnames_overlap_operation_sum: # re.match("^transition_rate \d+$",param): self.reductions[param] = 1 - self.reductions[param] # check that no NPI is called several times, and retourn them diff --git a/gempyor_pkg/src/gempyor/NPI/helpers.py b/gempyor_pkg/src/gempyor/NPI/helpers.py index 821f03703..623be31a4 100644 --- a/gempyor_pkg/src/gempyor/NPI/helpers.py +++ b/gempyor_pkg/src/gempyor/NPI/helpers.py @@ -12,9 +12,7 @@ def reduce_parameter( if isinstance(modification, pd.DataFrame): modification = modification.T modification.index = pd.to_datetime(modification.index.astype(str)) - modification = ( - modification.resample("1D").ffill().to_numpy() - ) # Type consistency: + modification = modification.resample("1D").ffill().to_numpy() # Type consistency: if method == "prod": return parameter * (1 - modification) elif method == "sum": diff --git a/gempyor_pkg/src/gempyor/compartments.py b/gempyor_pkg/src/gempyor/compartments.py index 1c5991157..38fa31f14 100644 --- a/gempyor_pkg/src/gempyor/compartments.py +++ b/gempyor_pkg/src/gempyor/compartments.py @@ -42,31 +42,21 @@ def defaultConstruct(self, seir_config): use_parallel = True n_parallel_compartments = 1 if use_parallel: - n_parallel_compartments = len( - seir_config["parameters"]["parallel_structure"]["compartments"].get() - ) - self.compartments = pd.DataFrame( - {"key": 1, "infection_stage": ["S", "E", "I1", "I2", "I3", "R"]} - ) + n_parallel_compartments = len(seir_config["parameters"]["parallel_structure"]["compartments"].get()) + self.compartments = pd.DataFrame({"key": 1, "infection_stage": ["S", "E", "I1", "I2", "I3", "R"]}) parallel_frame = None if use_parallel: parallel_frame = pd.DataFrame( { "key": 1, - "vaccination_stage": seir_config["parameters"][ - "parallel_structure" - ]["compartments"].keys(), + "vaccination_stage": seir_config["parameters"]["parallel_structure"]["compartments"].keys(), } ) else: - parallel_frame = pd.DataFrame( - {"key": 1, "vaccination_stage": ["unvaccinated"]} - ) + parallel_frame = pd.DataFrame({"key": 1, "vaccination_stage": ["unvaccinated"]}) self.compartments = pd.merge(self.compartments, parallel_frame) self.compartments = self.compartments.drop(["key"], axis=1) - self.compartments["name"] = self.compartments.apply( - lambda x: reduce(lambda a, b: a + "_" + b, x), axis=1 - ) + self.compartments["name"] = self.compartments.apply(lambda x: reduce(lambda a, b: a + "_" + b, x), axis=1) if not use_parallel: transitions = [ @@ -110,12 +100,8 @@ def defaultConstruct(self, seir_config): }, ] else: - unique_infections_stages = ( - self.compartments["infection_stage"].unique().tolist() - ) - unique_vaccination_stages = ( - self.compartments["vaccination_stage"].unique().tolist() - ) + unique_infections_stages = self.compartments["infection_stage"].unique().tolist() + unique_vaccination_stages = self.compartments["vaccination_stage"].unique().tolist() transitions = [ { "source": ["S", unique_vaccination_stages], @@ -161,14 +147,10 @@ def defaultConstruct(self, seir_config): "source": [unique_infections_stages, [transition["from"].get()]], "destination": [unique_infections_stages, [transition["to"].get()]], "rate": [f"transition_rate{i}", 1], - "proportional_to": [ - [unique_infections_stages, [transition["from"].get()]] - ], + "proportional_to": [[unique_infections_stages, [transition["from"].get()]]], "proportion_exponent": [["1", "1"]], } - for i, transition in enumerate( - seir_config["parameters"]["parallel_structure"]["transitions"] - ) + for i, transition in enumerate(seir_config["parameters"]["parallel_structure"]["transitions"]) ] transitions = transitions + parallel_transitions @@ -187,54 +169,39 @@ def parse_compartments(self, seir_config): else: compartment_frame = pd.merge(compartment_frame, tmp, on="key") compartment_frame = compartment_frame.drop(["key"], axis=1) - compartment_frame["name"] = compartment_frame.apply( - lambda x: reduce(lambda a, b: a + "_" + b, x), axis=1 - ) + compartment_frame["name"] = compartment_frame.apply(lambda x: reduce(lambda a, b: a + "_" + b, x), axis=1) self.compartments = compartment_frame def parse_transitions(self, seir_config, fake_config=False): rc = reduce( - lambda a, b: a.append( - self.parse_single_transition(seir_config, b, fake_config) - ), + lambda a, b: a.append(self.parse_single_transition(seir_config, b, fake_config)), seir_config["transitions"], pd.DataFrame(), ) rc = rc.reset_index(drop=True) return rc - def check_transition_element( - self, single_transition_config, problem_dimension=None - ): + def check_transition_element(self, single_transition_config, problem_dimension=None): return True def check_transition_elements(self, single_transition_config, problem_dimension): return True - def access_original_config_by_multi_index( - self, config_piece, index, dimension=None, encapsulate_as_list=False - ): + def access_original_config_by_multi_index(self, config_piece, index, dimension=None, encapsulate_as_list=False): if dimension is None: dimension = [None for i in index] tmp = [y for y in zip(index, range(len(index)), dimension)] tmp = zip(index, range(len(index)), dimension) - tmp = [ - list_access_element(config_piece[x[1]], x[0], x[2], encapsulate_as_list) - for x in tmp - ] + tmp = [list_access_element(config_piece[x[1]], x[0], x[2], encapsulate_as_list) for x in tmp] return tmp def expand_transition_elements(self, single_transition_config, problem_dimension): - proportion_size = get_list_dimension( - single_transition_config["proportional_to"] - ) + proportion_size = get_list_dimension(single_transition_config["proportional_to"]) new_transition_config = single_transition_config.copy() for p_idx in range(proportion_size): if new_transition_config["proportional_to"][p_idx] == "source": - new_transition_config["proportional_to"][p_idx] = new_transition_config[ - "source" - ] + new_transition_config["proportional_to"][p_idx] = new_transition_config["source"] temp_array = np.zeros(problem_dimension) @@ -242,37 +209,21 @@ def expand_transition_elements(self, single_transition_config, problem_dimension new_transition_config["destination"] = np.zeros(problem_dimension, dtype=object) new_transition_config["rate"] = np.zeros(problem_dimension, dtype=object) - new_transition_config["proportional_to"] = np.zeros( - problem_dimension, dtype=object - ) - new_transition_config["proportion_exponent"] = np.zeros( - problem_dimension, dtype=object - ) + new_transition_config["proportional_to"] = np.zeros(problem_dimension, dtype=object) + new_transition_config["proportion_exponent"] = np.zeros(problem_dimension, dtype=object) it = np.nditer(temp_array, flags=["multi_index"]) for x in it: - new_transition_config["source"][ - it.multi_index - ] = list_recursive_convert_to_string( - self.access_original_config_by_multi_index( - single_transition_config["source"], it.multi_index - ) + new_transition_config["source"][it.multi_index] = list_recursive_convert_to_string( + self.access_original_config_by_multi_index(single_transition_config["source"], it.multi_index) ) - new_transition_config["destination"][ - it.multi_index - ] = list_recursive_convert_to_string( - self.access_original_config_by_multi_index( - single_transition_config["destination"], it.multi_index - ) + new_transition_config["destination"][it.multi_index] = list_recursive_convert_to_string( + self.access_original_config_by_multi_index(single_transition_config["destination"], it.multi_index) ) - new_transition_config["rate"][ - it.multi_index - ] = list_recursive_convert_to_string( - self.access_original_config_by_multi_index( - single_transition_config["rate"], it.multi_index - ) + new_transition_config["rate"][it.multi_index] = list_recursive_convert_to_string( + self.access_original_config_by_multi_index(single_transition_config["rate"], it.multi_index) ) new_transition_config["proportional_to"][it.multi_index] = as_list( @@ -294,9 +245,7 @@ def expand_transition_elements(self, single_transition_config, problem_dimension it.multi_index, problem_dimension, ) - new_transition_config["proportion_exponent"][ - it.multi_index - ] = list_recursive_convert_to_string( + new_transition_config["proportion_exponent"][it.multi_index] = list_recursive_convert_to_string( [ self.access_original_config_by_multi_index( single_transition_config["proportion_exponent"][p_idx], @@ -310,12 +259,7 @@ def expand_transition_elements(self, single_transition_config, problem_dimension return new_transition_config def format_source(self, source_column): - rc = [ - y - for y in map( - lambda x: reduce(lambda a, b: str(a) + "_" + str(b), x), source_column - ) - ] + rc = [y for y in map(lambda x: reduce(lambda a, b: str(a) + "_" + str(b), x), source_column)] return rc def unformat_source(self, source_column): @@ -337,12 +281,7 @@ def unformat_destination(self, destination_column): return rc def format_rate(self, rate_column): - rc = [ - y - for y in map( - lambda x: reduce(lambda a, b: str(a) + "%*%" + str(b), x), rate_column - ) - ] + rc = [y for y in map(lambda x: reduce(lambda a, b: str(a) + "%*%" + str(b), x), rate_column)] return rc def unformat_rate(self, rate_column, compartment_dimension): @@ -362,9 +301,7 @@ def format_proportional_to(self, proportional_to_column): lambda x: reduce( lambda a, b: str(a) + "_" + str(b), map( - lambda x: reduce( - lambda a, b: str(a) + "+" + str(b), as_list(x) - ), + lambda x: reduce(lambda a, b: str(a) + "+" + str(b), as_list(x)), x, ), ), @@ -397,40 +334,26 @@ def format_proportion_exponent(self, proportion_exponent_column): ] return rc - def unformat_proportion_exponent( - self, proportion_exponent_column, compartment_dimension - ): + def unformat_proportion_exponent(self, proportion_exponent_column, compartment_dimension): rc = [x.split("%*%") for x in proportion_exponent_column] for row in range(len(rc)): - rc[row] = [ - x.split("*", maxsplit=compartment_dimension - 1) for x in rc[row] - ] + rc[row] = [x.split("*", maxsplit=compartment_dimension - 1) for x in rc[row]] for elem in rc[row]: while len(elem) < compartment_dimension: elem.append(1) return rc - def parse_single_transition( - self, seir_config, single_transition_config, fake_config=False - ): + def parse_single_transition(self, seir_config, single_transition_config, fake_config=False): ## This method relies on having run parse_compartments if not fake_config: single_transition_config = single_transition_config.get() self.check_transition_element(single_transition_config["source"]) self.check_transition_element(single_transition_config["destination"]) - source_dimension = [ - get_list_dimension(x) for x in single_transition_config["source"] - ] - destination_dimension = [ - get_list_dimension(x) for x in single_transition_config["destination"] - ] - problem_dimension = reduce( - lambda x, y: max(x, y), (source_dimension, destination_dimension) - ) + source_dimension = [get_list_dimension(x) for x in single_transition_config["source"]] + destination_dimension = [get_list_dimension(x) for x in single_transition_config["destination"]] + problem_dimension = reduce(lambda x, y: max(x, y), (source_dimension, destination_dimension)) self.check_transition_elements(single_transition_config, problem_dimension) - transitions = self.expand_transition_elements( - single_transition_config, problem_dimension - ) + transitions = self.expand_transition_elements(single_transition_config, problem_dimension) tmp_array = np.zeros(problem_dimension) it = np.nditer(tmp_array, flags=["multi_index"]) @@ -442,12 +365,8 @@ def parse_single_transition( "source": [transitions["source"][it.multi_index]], "destination": [transitions["destination"][it.multi_index]], "rate": [transitions["rate"][it.multi_index]], - "proportional_to": [ - transitions["proportional_to"][it.multi_index] - ], - "proportion_exponent": [ - transitions["proportion_exponent"][it.multi_index] - ], + "proportional_to": [transitions["proportional_to"][it.multi_index]], + "proportion_exponent": [transitions["proportion_exponent"][it.multi_index]], }, index=[0], ) @@ -466,12 +385,8 @@ def toFile(self, compartments_file, transitions_file): out_df["source"] = self.format_source(out_df["source"]) out_df["destination"] = self.format_destination(out_df["destination"]) out_df["rate"] = self.format_rate(out_df["rate"]) - out_df["proportional_to"] = self.format_proportional_to( - out_df["proportional_to"] - ) - out_df["proportion_exponent"] = self.format_proportion_exponent( - out_df["proportion_exponent"] - ) + out_df["proportional_to"] = self.format_proportional_to(out_df["proportional_to"]) + out_df["proportion_exponent"] = self.format_proportion_exponent(out_df["proportion_exponent"]) pa_df = pa.Table.from_pandas(out_df, preserve_index=False) pa.parquet.write_table(pa_df, transitions_file) @@ -482,15 +397,9 @@ def fromFile(self, compartments_file, transitions_file): self.transitions = pq.read_table(transitions_file).to_pandas() compartment_dimension = self.compartments.shape[1] - 1 self.transitions["source"] = self.unformat_source(self.transitions["source"]) - self.transitions["destination"] = self.unformat_destination( - self.transitions["destination"] - ) - self.transitions["rate"] = self.unformat_rate( - self.transitions["rate"], compartment_dimension - ) - self.transitions["proportional_to"] = self.unformat_proportional_to( - self.transitions["proportional_to"] - ) + self.transitions["destination"] = self.unformat_destination(self.transitions["destination"]) + self.transitions["rate"] = self.unformat_rate(self.transitions["rate"], compartment_dimension) + self.transitions["proportional_to"] = self.unformat_proportional_to(self.transitions["proportional_to"]) self.transitions["proportion_exponent"] = self.unformat_proportion_exponent( self.transitions["proportion_exponent"], compartment_dimension ) @@ -504,9 +413,7 @@ def get_comp_idx(self, comp_dict: dict) -> int: :param comp_dict: :return: """ - mask = pd.concat( - [self.compartments[k] == v for k, v in comp_dict.items()], axis=1 - ).all(axis=1) + mask = pd.concat([self.compartments[k] == v for k, v in comp_dict.items()], axis=1).all(axis=1) comp_idx = self.compartments[mask].index.values if len(comp_idx) != 1: raise ValueError( @@ -523,9 +430,7 @@ def constructFromConfig(self, seir_config): def get_transition_array(self): with Timer("SEIR.compartments"): - transition_array = np.zeros( - (self.transitions.shape[1], self.transitions.shape[0]), dtype="int" - ) + transition_array = np.zeros((self.transitions.shape[1], self.transitions.shape[0]), dtype="int") for cit, colname in enumerate(("source", "destination")): for it, elem in enumerate(self.transitions[colname]): elem = reduce(lambda a, b: a + "_" + b, elem) @@ -535,9 +440,7 @@ def get_transition_array(self): rc = compartment if rc == -1: print(self.compartments) - raise ValueError( - f"Could find {colname} defined by {elem} in compartments" - ) + raise ValueError(f"Could find {colname} defined by {elem} in compartments") transition_array[cit, it] = rc unique_strings = [] @@ -556,18 +459,10 @@ def get_transition_array(self): if not candidate in unique_strings: unique_strings.append(candidate) - assert reduce( - lambda a, b: a and b, [(x.find("(") == -1) for x in unique_strings] - ) - assert reduce( - lambda a, b: a and b, [(x.find(")") == -1) for x in unique_strings] - ) - assert reduce( - lambda a, b: a and b, [(x.find("%") == -1) for x in unique_strings] - ) - assert reduce( - lambda a, b: a and b, [(x.find(" ") == -1) for x in unique_strings] - ) + assert reduce(lambda a, b: a and b, [(x.find("(") == -1) for x in unique_strings]) + assert reduce(lambda a, b: a and b, [(x.find(")") == -1) for x in unique_strings]) + assert reduce(lambda a, b: a and b, [(x.find("%") == -1) for x in unique_strings]) + assert reduce(lambda a, b: a and b, [(x.find(" ") == -1) for x in unique_strings]) for it, elem in enumerate(self.transitions["rate"]): candidate = reduce(lambda a, b: a + "*" + b, elem) @@ -603,12 +498,8 @@ def get_transition_array(self): # rc = compartment # if rc == -1: # raise ValueError(f"Could not find match for {elem3} in compartments") - proportion_info[0][ - current_proportion_sum_it - ] = current_proportion_sum_start - proportion_info[1][ - current_proportion_sum_it - ] = current_proportion_sum_start + len(elem_tmp) + proportion_info[0][current_proportion_sum_it] = current_proportion_sum_start + proportion_info[1][current_proportion_sum_it] = current_proportion_sum_start + len(elem_tmp) current_proportion_sum_it += 1 current_proportion_sum_start += len(elem_tmp) proportion_compartment_index = 0 @@ -619,9 +510,7 @@ def get_transition_array(self): candidate = candidate.replace("*1", "") if not candidate in unique_strings: raise ValueError("Something went wrong") - rc = [it for it, x in enumerate(unique_strings) if x == candidate][ - 0 - ] + rc = [it for it, x in enumerate(unique_strings) if x == candidate][0] proportion_info[2][proportion_compartment_index] = rc proportion_compartment_index += 1 @@ -645,9 +534,7 @@ def get_transition_array(self): if self.compartments["name"][compartment] == elem3: rc = compartment if rc == -1: - raise ValueError( - f"Could find proportional_to {elem3} in compartments" - ) + raise ValueError(f"Could find proportional_to {elem3} in compartments") proportion_array[proportion_index] = rc proportion_index += 1 @@ -684,9 +571,7 @@ def get_transition_array(self): ) def parse_parameters(self, parameters, parameter_names, unique_strings): - parsed_parameters = self.parse_parameter_strings_to_numpy_arrays( - parameters, parameter_names, unique_strings - ) + parsed_parameters = self.parse_parameter_strings_to_numpy_arrays(parameters, parameter_names, unique_strings) return parsed_parameters def parse_parameter_strings_to_numpy_arrays( @@ -718,13 +603,9 @@ def parse_parameter_strings_to_numpy_arrays( is_resolvable = [x[0] or x[1] for x in zip(is_numeric, is_parameter)] is_totally_resolvable = reduce(lambda a, b: a and b, is_resolvable) if not is_totally_resolvable: - not_resolvable_indices = [ - it for it, x in enumerate(is_resolvable) if not x - ] + not_resolvable_indices = [it for it, x in enumerate(is_resolvable) if not x] - tmp_rc[ - not_resolvable_indices - ] = self.parse_parameter_strings_to_numpy_arrays( + tmp_rc[not_resolvable_indices] = self.parse_parameter_strings_to_numpy_arrays( parameters, parameter_names, [string[not is_resolvable]], @@ -733,14 +614,8 @@ def parse_parameter_strings_to_numpy_arrays( ) for numeric_index in [x for x in range(len(is_numeric)) if is_numeric[x]]: tmp_rc[numeric_index] = parameters[0] * 0 + float(string[numeric_index]) - for parameter_index in [ - x for x in range(len(is_parameter)) if is_parameter[x] - ]: - parameter_name_index = [ - it - for it, x in enumerate(parameter_names) - if x == string[parameter_index] - ] + for parameter_index in [x for x in range(len(is_parameter)) if is_parameter[x]]: + parameter_name_index = [it for it, x in enumerate(parameter_names) if x == string[parameter_index]] tmp_rc[parameter_index] = parameters[parameter_name_index] rc[sit] = reduce(operator_reduce_lambdas[operators[0]], tmp_rc) return rc @@ -754,9 +629,7 @@ def get_compartments_explicitDF(self): df = df.rename(columns=rename_dict) return df - def plot( - self, output_file="transition_graph", source_filters=[], destination_filters=[] - ): + def plot(self, output_file="transition_graph", source_filters=[], destination_filters=[]): """ if source_filters is [["age0to17"], ["OMICRON", "WILD"]], it means filter all transitions that have as source age0to17 AND (OMICRON OR WILD). diff --git a/gempyor_pkg/src/gempyor/dev/dev_seir.py b/gempyor_pkg/src/gempyor/dev/dev_seir.py index 956853644..1c536ec3a 100644 --- a/gempyor_pkg/src/gempyor/dev/dev_seir.py +++ b/gempyor_pkg/src/gempyor/dev/dev_seir.py @@ -58,9 +58,7 @@ mobility_data_indices = s.mobility.indptr mobility_data = s.mobility.data -npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames -) +npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) params = s.parameters.parameters_reduce(params, npi) @@ -92,12 +90,7 @@ True, ) df = seir.states2Df(s, states) -assert ( - df[(df["mc_value_type"] == "prevalence") & (df["mc_infection_stage"] == "R")].loc[ - str(s.tf), "20002" - ] - > 1 -) +assert df[(df["mc_value_type"] == "prevalence") & (df["mc_infection_stage"] == "R")].loc[str(s.tf), "20002"] > 1 print(df) ts = df cp = "R" diff --git a/gempyor_pkg/src/gempyor/dev/steps.py b/gempyor_pkg/src/gempyor/dev/steps.py index 72b8dd56d..8cc22b2f9 100644 --- a/gempyor_pkg/src/gempyor/dev/steps.py +++ b/gempyor_pkg/src/gempyor/dev/steps.py @@ -54,11 +54,7 @@ def ode_integration( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -70,9 +66,7 @@ def ode_integration( def rhs(t, x, today): print("rhs.t", t) states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -87,72 +81,52 @@ def rhs(t, x, today): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -169,15 +143,9 @@ def rhs(t, x, today): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += number_move # Cumumlative + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move + states_diff[1, transitions[transition_destination_col][transition_index], :] += number_move # Cumumlative # states_current = states_next.copy() return np.reshape(states_diff, states_diff.size) # return a 1D vector @@ -201,24 +169,18 @@ def rhs(t, x, today): this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts ### Shape @@ -297,11 +259,7 @@ def rk4_integration1( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -311,9 +269,7 @@ def rk4_integration1( def rhs(t, x, today): states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -328,72 +284,52 @@ def rhs(t, x, today): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -410,15 +346,9 @@ def rhs(t, x, today): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += number_move # Cumumlative + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move + states_diff[1, transitions[transition_destination_col][transition_index], :] += number_move # Cumumlative # states_current = states_next.copy() return np.reshape(states_diff, states_diff.size) # return a 1D vector @@ -452,24 +382,18 @@ def rk4_integrate(t, x, today): this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts ### Shape @@ -526,11 +450,7 @@ def rk4_integration2( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -541,9 +461,7 @@ def rk4_integration2( @jit(nopython=True) def rhs(t, x, today): states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -558,72 +476,52 @@ def rhs(t, x, today): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -640,15 +538,9 @@ def rhs(t, x, today): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += number_move # Cumumlative + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move + states_diff[1, transitions[transition_destination_col][transition_index], :] += number_move # Cumumlative # states_current = states_next.copy() return np.reshape(states_diff, states_diff.size) # return a 1D vector @@ -683,24 +575,18 @@ def rk4_integrate(t, x, today): this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts ### Shape @@ -762,11 +648,7 @@ def rk4_integration3( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -777,9 +659,7 @@ def rk4_integration3( @jit(nopython=True) def rhs(t, x, today): states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -794,72 +674,52 @@ def rhs(t, x, today): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -876,15 +736,9 @@ def rhs(t, x, today): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += number_move # Cumumlative + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move + states_diff[1, transitions[transition_destination_col][transition_index], :] += number_move # Cumumlative # states_current = states_next.copy() return np.reshape(states_diff, states_diff.size) # return a 1D vector @@ -903,18 +757,16 @@ def rk4_integrate(t, x, today): @jit(nopython=True) def day_wrapper_rk4(today, states_next): x_ = np.zeros((2, ncompartments, nspatial_nodes)) - for seeding_instance_idx in range( - day_start_idx_dict[today], day_start_idx_dict[today + 1] - ): + for seeding_instance_idx in range(day_start_idx_dict[today], day_start_idx_dict[today + 1]): this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_places_dict[seeding_instance_idx] seeding_sources = seeding_sources_dict[seeding_instance_idx] seeding_destinations = seeding_destinations_dict[seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[seeding_sources][ - seeding_places - ] * (states_next[seeding_sources][seeding_places] > 0) + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) states_next[seeding_destinations][seeding_places] += this_seeding_amounts # ADD TO cumulative, this is debatable, @@ -991,11 +843,7 @@ def rk4_integration4( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -1006,9 +854,7 @@ def rk4_integration4( @jit(nopython=True) # , fastmath=True, parallel=True) def rhs(t, x, today): states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -1023,72 +869,52 @@ def rhs(t, x, today): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -1105,15 +931,9 @@ def rhs(t, x, today): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += number_move # Cumumlative + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move + states_diff[1, transitions[transition_destination_col][transition_index], :] += number_move # Cumumlative # states_current = states_next.copy() return np.reshape(states_diff, states_diff.size) # return a 1D vector @@ -1148,24 +968,18 @@ def rk4_integrate(t, x, today): this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts ### Shape @@ -1223,11 +1037,7 @@ def rk4_integration5( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -1254,24 +1064,18 @@ def rk4_integration5( this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts ### Shape @@ -1294,9 +1098,7 @@ def rk4_integration5( x = x_ + kx[i - 1] * rk_coefs[i] states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -1308,72 +1110,48 @@ def rk4_integration5( relevant_number_in_comp = np.zeros((nspatial_nodes)) relevant_exponent = np.ones((nspatial_nodes)) for proportion_sum_index in range( - proportion_info[proportion_sum_starts_col][ - proportion_index - ], + proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][ - proportion_index - ] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][ + today + ] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][ - transition_index - ] - + 1 - ) == transitions[transition_proportion_stop_col][ - transition_index - ] + transitions[transition_proportion_start_col][transition_index] + 1 + ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][ - transition_index - ] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][ + spatial_node + ] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment @@ -1381,16 +1159,11 @@ def rk4_integration5( relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) - rate_change_compartment /= population[ - visiting_compartment - ] + rate_change_compartment /= population[visiting_compartment] rate_change_compartment *= parameters[ transitions[transition_rate_col][transition_index] ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment - + rate_change_compartment.sum() - ) + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -1402,19 +1175,13 @@ def rk4_integration5( # ) # else: if True: - number_move = ( - source_number * total_rate - ) # * compound_adjusted_rate + number_move = source_number * total_rate # * compound_adjusted_rate # for spatial_node in range(nspatial_nodes): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move states_diff[ 1, transitions[transition_destination_col][transition_index], : ] += number_move # Cumumlative @@ -1474,11 +1241,7 @@ def rk4_integration2_smart( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -1492,9 +1255,7 @@ def rhs(t, x): if (today) > ndays: today = ndays - 1 states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -1509,72 +1270,52 @@ def rhs(t, x): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -1591,15 +1332,9 @@ def rhs(t, x): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += number_move # Cumumlative + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move + states_diff[1, transitions[transition_destination_col][transition_index], :] += number_move # Cumumlative # states_current = states_next.copy() return np.reshape(states_diff, states_diff.size) # return a 1D vector @@ -1644,32 +1379,20 @@ def rk4_integrate(today, x): seeding_data["day_start_idx"][today + 1], ): this_seeding_amounts = seeding_amounts[seeding_instance_idx] - seeding_places = seeding_data["seeding_places"][ - seeding_instance_idx - ] - seeding_sources = seeding_data["seeding_sources"][ - seeding_instance_idx - ] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_places = seeding_data["seeding_places"][seeding_instance_idx] + seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * ( + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( states_next[seeding_sources][seeding_places] > 0 ) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts ### Shape @@ -1737,8 +1460,7 @@ def rk4_integrate(today, x): ## Dimensions "int32," "int32," "int32," ## ncompartments ## nspatial_nodes ## Number of days ## Parameters - "float64[:, :, :]," ## Parameters [ nparameters x ndays x nspatial_nodes] - "float64," ## dt + "float64[:, :, :]," "float64," ## Parameters [ nparameters x ndays x nspatial_nodes] ## dt ## Transitions "int64[:, :]," ## transitions [ [source, destination, proportion_start, proportion_stop, rate] x ntransitions ] "int64[:, :]," ## proportions_info [ [sum_starts, sum_stops, exponent] x ntransition_proportions ] @@ -1791,11 +1513,7 @@ def rk4_integration_aot( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -1806,9 +1524,7 @@ def rk4_integration_aot( def rhs(t, x, today): # states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] states_current = x[0] - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum for transition_index in range(ntransitions): total_rate = np.ones((nspatial_nodes)) @@ -1823,72 +1539,52 @@ def rhs(t, x, today): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() # compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -1905,15 +1601,9 @@ def rhs(t, x, today): # if number_move[spatial_node] > states_current[transitions[transition_source_col][transition_index]][spatial_node]: # number_move[spatial_node] = states_current[transitions[transition_source_col][transition_index]][spatial_node] # Not possible to enforce this anymore, but it shouldn't be a problem or maybe ? # TODO - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= number_move - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += number_move - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += number_move # Cumumlative + states_diff[0, transitions[transition_source_col][transition_index]] -= number_move + states_diff[0, transitions[transition_destination_col][transition_index]] += number_move + states_diff[1, transitions[transition_destination_col][transition_index], :] += number_move # Cumumlative # states_current = states_next.copy() return states_diff # return a 1D vector @@ -1947,24 +1637,18 @@ def rk4_integrate(t, x, today): this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts ### Shape diff --git a/gempyor_pkg/src/gempyor/file_paths.py b/gempyor_pkg/src/gempyor/file_paths.py index 3ffa9e092..f20f7048c 100644 --- a/gempyor_pkg/src/gempyor/file_paths.py +++ b/gempyor_pkg/src/gempyor/file_paths.py @@ -6,15 +6,11 @@ def create_file_name(run_id, prefix, index, ftype, extension, create_directory=T if create_directory: os.makedirs(create_dir_name(run_id, prefix, ftype), exist_ok=True) - fn_no_ext = create_file_name_without_extension( - run_id, prefix, index, ftype, create_directory=create_directory - ) + fn_no_ext = create_file_name_without_extension(run_id, prefix, index, ftype, create_directory=create_directory) return f"{fn_no_ext}.%s" % (extension,) -def create_file_name_without_extension( - run_id, prefix, index, ftype, create_directory=True -): +def create_file_name_without_extension(run_id, prefix, index, ftype, create_directory=True): if create_directory: os.makedirs(create_dir_name(run_id, prefix, ftype), exist_ok=True) return "model_output/%s/%s%09d.%s.%s" % (ftype, prefix, index, run_id, ftype) @@ -25,8 +21,4 @@ def run_id(): def create_dir_name(run_id, prefix, ftype): - return os.path.dirname( - create_file_name_without_extension( - run_id, prefix, 1, ftype, create_directory=False - ) - ) + return os.path.dirname(create_file_name_without_extension(run_id, prefix, 1, ftype, create_directory=False)) diff --git a/gempyor_pkg/src/gempyor/interface.py b/gempyor_pkg/src/gempyor/interface.py index 3776a2a82..b3e417f33 100644 --- a/gempyor_pkg/src/gempyor/interface.py +++ b/gempyor_pkg/src/gempyor/interface.py @@ -116,9 +116,7 @@ def __init__( f""" gempyor >> prefix: {in_prefix};""" # ti: {s.ti}; tf: {s.tf}; ) - self.already_built = ( - False # whether we have already build the costly object we just build once. - ) + self.already_built = False # whether we have already build the costly object we just build once. def update_prefix(self, new_prefix, new_out_prefix=None): self.s.in_prefix = new_prefix @@ -134,15 +132,11 @@ def update_run_id(self, new_run_id, new_out_run_id=None): else: self.s.out_run_id = new_out_run_id - def one_simulation_legacy( - self, sim_id2write: int, load_ID: bool = False, sim_id2load: int = None - ): + def one_simulation_legacy(self, sim_id2write: int, load_ID: bool = False, sim_id2load: int = None): sim_id2write = int(sim_id2write) if load_ID: sim_id2load = int(sim_id2load) - with Timer( - f">>> GEMPYOR onesim {'(loading file)' if load_ID else '(from config)'}" - ): + with Timer(f">>> GEMPYOR onesim {'(loading file)' if load_ID else '(from config)'}"): with Timer("onerun_SEIR"): seir.onerun_SEIR( sim_id2write=sim_id2write, @@ -173,21 +167,15 @@ def one_simulation( if load_ID: sim_id2load = int(sim_id2load) - with Timer( - f">>> GEMPYOR onesim {'(loading file)' if load_ID else '(from config)'}" - ): + with Timer(f">>> GEMPYOR onesim {'(loading file)' if load_ID else '(from config)'}"): if not self.already_built: self.outcomes_parameters = outcomes.read_parameters_from_config(self.s) npi_outcomes = None if parallel: with Timer("//things"): - with ProcessPoolExecutor( - max_workers=max(mp.cpu_count(), 3) - ) as executor: - ret_seir = executor.submit( - seir.build_npi_SEIR, self.s, load_ID, sim_id2load, config - ) + with ProcessPoolExecutor(max_workers=max(mp.cpu_count(), 3)) as executor: + ret_seir = executor.submit(seir.build_npi_SEIR, self.s, load_ID, sim_id2load, config) if self.s.npi_config_outcomes: ret_outcomes = executor.submit( outcomes.build_npi_Outcomes, @@ -197,9 +185,7 @@ def one_simulation( config, ) if not self.already_built: - ret_comparments = executor.submit( - self.s.compartments.get_transition_array - ) + ret_comparments = executor.submit(self.s.compartments.get_transition_array) # print("expections:", ret_seir.exception(), ret_outcomes.exception(), ret_comparments.exception()) @@ -223,9 +209,7 @@ def one_simulation( self.proportion_info, ) = self.s.compartments.get_transition_array() self.already_built = True - npi_seir = seir.build_npi_SEIR( - s=self.s, load_ID=load_ID, sim_id2load=sim_id2load, config=config - ) + npi_seir = seir.build_npi_SEIR(s=self.s, load_ID=load_ID, sim_id2load=sim_id2load, config=config) if self.s.npi_config_outcomes: npi_outcomes = outcomes.build_npi_Outcomes( s=self.s, @@ -238,9 +222,7 @@ def one_simulation( ### Run every time: with Timer("SEIR.parameters"): # Draw or load parameters - p_draw = self.get_seir_parameters( - load_ID=load_ID, sim_id2load=sim_id2load - ) + p_draw = self.get_seir_parameters(load_ID=load_ID, sim_id2load=sim_id2load) # reduce them parameters = self.s.parameters.parameters_reduce(p_draw, npi_seir) @@ -252,22 +234,14 @@ def one_simulation( self.debug_p_draw = p_draw self.debug_parameters = parameters self.debug_parsed_parameters = parsed_parameters - + with Timer("onerun_SEIR.seeding"): if load_ID: - initial_conditions = self.s.seedingAndIC.load_ic( - sim_id2load, setup=self.s - ) - seeding_data, seeding_amounts = self.s.seedingAndIC.load_seeding( - sim_id2load, setup=self.s - ) + initial_conditions = self.s.seedingAndIC.load_ic(sim_id2load, setup=self.s) + seeding_data, seeding_amounts = self.s.seedingAndIC.load_seeding(sim_id2load, setup=self.s) else: - initial_conditions = self.s.seedingAndIC.draw_ic( - sim_id2write, setup=self.s - ) - seeding_data, seeding_amounts = self.s.seedingAndIC.draw_seeding( - sim_id2write, setup=self.s - ) + initial_conditions = self.s.seedingAndIC.draw_ic(sim_id2write, setup=self.s) + seeding_data, seeding_amounts = self.s.seedingAndIC.draw_seeding(sim_id2write, setup=self.s) self.debug_seeding_date = seeding_data self.debug_seeding_amounts = seeding_amounts @@ -286,9 +260,7 @@ def one_simulation( with Timer("SEIR.postprocess"): if self.s.write_csv or self.s.write_parquet: - out_df = seir.postprocess_and_write( - sim_id2write, self.s, states, p_draw, npi_seir, seeding_data - ) + out_df = seir.postprocess_and_write(sim_id2write, self.s, states, p_draw, npi_seir, seeding_data) self.debug_out_df = out_df loaded_values = None @@ -318,18 +290,14 @@ def one_simulation( ) return 0 - def plot_transition_graph( - self, output_file="transition_graph", source_filters=[], destination_filters=[] - ): + def plot_transition_graph(self, output_file="transition_graph", source_filters=[], destination_filters=[]): self.s.compartments.plot( output_file=output_file, source_filters=source_filters, destination_filters=destination_filters, ) - def get_outcome_npi( - self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None - ): + def get_outcome_npi(self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None): npi_outcomes = None if self.s.npi_config_outcomes: npi_outcomes = outcomes.build_npi_Outcomes( @@ -342,9 +310,7 @@ def get_outcome_npi( ) return npi_outcomes - def get_seir_npi( - self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None - ): + def get_seir_npi(self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None): npi_seir = seir.build_npi_SEIR( s=self.s, load_ID=load_ID, @@ -355,9 +321,7 @@ def get_seir_npi( ) return npi_seir - def get_seir_parameters( - self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None - ): + def get_seir_parameters(self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None): param_df = None if bypass_DF is not None: param_df = bypass_DF @@ -373,14 +337,10 @@ def get_seir_parameters( nnodes=self.s.nnodes, ) else: - p_draw = self.s.parameters.parameters_quick_draw( - n_days=self.s.n_days, nnodes=self.s.nnodes - ) + p_draw = self.s.parameters.parameters_quick_draw(n_days=self.s.n_days, nnodes=self.s.nnodes) return p_draw - def get_seir_parametersDF( - self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None - ): + def get_seir_parametersDF(self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None): p_draw = self.get_seir_parameters( load_ID=load_ID, sim_id2load=sim_id2load, @@ -435,9 +395,7 @@ def paramred_parallel(run_spec, snpi_fn): scenario="inference", # NPIs scenario to use deathrate="med", # Outcome scenario to use stoch_traj_flag=False, - spatial_path_prefix=run_spec[ - "geodata" - ], # prefix where to find the folder indicated in spatial_setup$ + spatial_path_prefix=run_spec["geodata"], # prefix where to find the folder indicated in spatial_setup$ ) snpi = pq.read_table(snpi_fn).to_pandas() @@ -448,9 +406,7 @@ def paramred_parallel(run_spec, snpi_fn): params_draw_arr = gempyor_simulator.get_seir_parameters( bypass_FN=snpi_fn.replace("snpi", "spar") ) # could also accept (load_ID=True, sim_id2load=XXX) or (bypass_DF=) or (bypass_FN=) - param_reduc_from = gempyor_simulator.get_seir_parameter_reduced( - npi_seir=npi_seir, p_draw=params_draw_arr - ) + param_reduc_from = gempyor_simulator.get_seir_parameter_reduced(npi_seir=npi_seir, p_draw=params_draw_arr) return param_reduc_from @@ -465,9 +421,7 @@ def paramred_parallel_config(run_spec, dummy): scenario="inference", # NPIs scenario to use deathrate="med", # Outcome scenario to use stoch_traj_flag=False, - spatial_path_prefix=run_spec[ - "geodata" - ], # prefix where to find the folder indicated in spatial_setup$ + spatial_path_prefix=run_spec["geodata"], # prefix where to find the folder indicated in spatial_setup$ ) npi_seir = gempyor_simulator.get_seir_npi() @@ -475,8 +429,6 @@ def paramred_parallel_config(run_spec, dummy): params_draw_arr = ( gempyor_simulator.get_seir_parameters() ) # could also accept (load_ID=True, sim_id2load=XXX) or (bypass_DF=) or (bypass_FN=) - param_reduc_from = gempyor_simulator.get_seir_parameter_reduced( - npi_seir=npi_seir, p_draw=params_draw_arr - ) + param_reduc_from = gempyor_simulator.get_seir_parameter_reduced(npi_seir=npi_seir, p_draw=params_draw_arr) return param_reduc_from diff --git a/gempyor_pkg/src/gempyor/outcomes.py b/gempyor_pkg/src/gempyor/outcomes.py index 9b60a1c74..e601e384d 100644 --- a/gempyor_pkg/src/gempyor/outcomes.py +++ b/gempyor_pkg/src/gempyor/outcomes.py @@ -21,9 +21,7 @@ def run_parallel_outcomes(s, *, sim_id2write, nsim=1, n_jobs=1): sim_id2writes = np.arange(sim_id2write, sim_id2write + s.nsim) loaded_values = None - if (n_jobs == 1) or ( - s.nsim == 1 - ): # run single process for debugging/profiling purposes + if (n_jobs == 1) or (s.nsim == 1): # run single process for debugging/profiling purposes for sim_offset in np.arange(nsim): onerun_delayframe_outcomes( sim_id2write=sim_id2writes[sim_offset], @@ -98,9 +96,7 @@ def onerun_delayframe_outcomes( npi_outcomes = None if s.npi_config_outcomes: - npi_outcomes = build_npi_Outcomes( - s=s, load_ID=load_ID, sim_id2load=sim_id2load, config=config - ) + npi_outcomes = build_npi_Outcomes(s=s, load_ID=load_ID, sim_id2load=sim_id2load, config=config) loaded_values = None if load_ID: @@ -117,9 +113,7 @@ def onerun_delayframe_outcomes( ) with Timer("onerun_delayframe_outcomes.postprocess"): - postprocess_and_write( - sim_id=sim_id2write, s=s, outcomes=outcomes, hpar=hpar, npi=npi_outcomes - ) + postprocess_and_write(sim_id=sim_id2write, s=s, outcomes=outcomes, hpar=hpar, npi=npi_outcomes) def read_parameters_from_config(s: setup.Setup): @@ -133,9 +127,7 @@ def read_parameters_from_config(s: setup.Setup): branching_file = s.outcomes_config["param_place_file"].as_str() branching_data = pa.parquet.read_table(branching_file).to_pandas() if "relative_probability" not in list(branching_data["quantity"]): - raise ValueError( - f"No 'relative_probability' quantity in {branching_file}, therefor making it useless" - ) + raise ValueError(f"No 'relative_probability' quantity in {branching_file}, therefor making it useless") print( "Loaded geoids in loaded relative probablity file:", @@ -143,9 +135,7 @@ def read_parameters_from_config(s: setup.Setup): "", end="", ) - branching_data = branching_data[ - branching_data["geoid"].isin(s.spatset.nodenames) - ] + branching_data = branching_data[branching_data["geoid"].isin(s.spatset.nodenames)] print( "Intersect with seir simulation: ", len(branching_data.geoid.unique()), @@ -176,30 +166,18 @@ def read_parameters_from_config(s: setup.Setup): parameters[class_name]["source"] = src_name else: if subclasses != [""]: - raise ValueError( - "Subclasses not compatible with outcomes from compartments " - ) - elif ("incidence" in src_name.keys()) or ( - "prevalence" in src_name.keys() - ): + raise ValueError("Subclasses not compatible with outcomes from compartments ") + elif ("incidence" in src_name.keys()) or ("prevalence" in src_name.keys()): parameters[class_name]["source"] = dict(src_name) else: raise ValueError( f"unsure how to read outcome {class_name}: not a str, nor an incidence or prevalence: {src_name}" ) - parameters[class_name]["probability"] = outcomes_config[new_comp][ - "probability" - ]["value"] - if outcomes_config[new_comp]["probability"][ - "intervention_param_name" - ].exists(): + parameters[class_name]["probability"] = outcomes_config[new_comp]["probability"]["value"] + if outcomes_config[new_comp]["probability"]["intervention_param_name"].exists(): parameters[class_name]["probability::npi_param_name"] = ( - outcomes_config[new_comp]["probability"][ - "intervention_param_name" - ] - .as_str() - .lower() + outcomes_config[new_comp]["probability"]["intervention_param_name"].as_str().lower() ) logging.debug( f"probability of outcome {new_comp} is affected by intervention " @@ -207,22 +185,12 @@ def read_parameters_from_config(s: setup.Setup): f"instead of {new_comp}::probability" ) else: - parameters[class_name][ - "probability::npi_param_name" - ] = f"{new_comp}::probability".lower() - - parameters[class_name]["delay"] = outcomes_config[new_comp][ - "delay" - ]["value"] - if outcomes_config[new_comp]["delay"][ - "intervention_param_name" - ].exists(): + parameters[class_name]["probability::npi_param_name"] = f"{new_comp}::probability".lower() + + parameters[class_name]["delay"] = outcomes_config[new_comp]["delay"]["value"] + if outcomes_config[new_comp]["delay"]["intervention_param_name"].exists(): parameters[class_name]["delay::npi_param_name"] = ( - outcomes_config[new_comp]["delay"][ - "intervention_param_name" - ] - .as_str() - .lower() + outcomes_config[new_comp]["delay"]["intervention_param_name"].as_str().lower() ) logging.debug( f"delay of outcome {new_comp} is affected by intervention " @@ -230,23 +198,13 @@ def read_parameters_from_config(s: setup.Setup): f"instead of {new_comp}::delay" ) else: - parameters[class_name][ - "delay::npi_param_name" - ] = f"{new_comp}::delay".lower() + parameters[class_name]["delay::npi_param_name"] = f"{new_comp}::delay".lower() if outcomes_config[new_comp]["duration"].exists(): - parameters[class_name]["duration"] = outcomes_config[new_comp][ - "duration" - ]["value"] - if outcomes_config[new_comp]["duration"][ - "intervention_param_name" - ].exists(): + parameters[class_name]["duration"] = outcomes_config[new_comp]["duration"]["value"] + if outcomes_config[new_comp]["duration"]["intervention_param_name"].exists(): parameters[class_name]["duration::npi_param_name"] = ( - outcomes_config[new_comp]["duration"][ - "intervention_param_name" - ] - .as_str() - .lower() + outcomes_config[new_comp]["duration"]["intervention_param_name"].as_str().lower() ) logging.debug( f"duration of outcome {new_comp} is affected by intervention " @@ -254,19 +212,14 @@ def read_parameters_from_config(s: setup.Setup): f"instead of {new_comp}::duration" ) else: - parameters[class_name][ - "duration::npi_param_name" - ] = f"{new_comp}::duration".lower() + parameters[class_name]["duration::npi_param_name"] = f"{new_comp}::duration".lower() if outcomes_config[new_comp]["duration"]["name"].exists(): parameters[class_name]["duration_name"] = ( - outcomes_config[new_comp]["duration"]["name"].as_str() - + subclass + outcomes_config[new_comp]["duration"]["name"].as_str() + subclass ) else: - parameters[class_name]["duration_name"] = ( - new_comp + "_curr" + subclass - ) + parameters[class_name]["duration_name"] = new_comp + "_curr" + subclass if s.outcomes_config["param_from_file"].get(): rel_probability = branching_data[ @@ -274,20 +227,12 @@ def read_parameters_from_config(s: setup.Setup): & (branching_data["quantity"] == "relative_probability") ].copy(deep=True) if len(rel_probability) > 0: - logging.debug( - f"Using 'param_from_file' for relative probability in outcome {class_name}" - ) + logging.debug(f"Using 'param_from_file' for relative probability in outcome {class_name}") # Sort it in case the relative probablity file is mispecified - rel_probability.geoid = rel_probability.geoid.astype( - "category" - ) - rel_probability.geoid.cat.set_categories( - s.spatset.nodenames, inplace=True - ) + rel_probability.geoid = rel_probability.geoid.astype("category") + rel_probability.geoid.cat.set_categories(s.spatset.nodenames, inplace=True) rel_probability = rel_probability.sort_values(["geoid"]) - parameters[class_name]["rel_probability"] = rel_probability[ - "value" - ].to_numpy() + parameters[class_name]["rel_probability"] = rel_probability["value"].to_numpy() else: logging.debug( f"*NOT* Using 'param_from_file' for relative probability in outcome {class_name}" @@ -300,13 +245,9 @@ def read_parameters_from_config(s: setup.Setup): if outcomes_config[new_comp]["duration"].exists(): duration_name = new_comp + "_curr" if outcomes_config[new_comp]["duration"]["name"].exists(): - duration_name = outcomes_config[new_comp]["duration"][ - "name" - ].as_str() + duration_name = outcomes_config[new_comp]["duration"]["name"].as_str() parameters[duration_name] = {} - parameters[duration_name]["sum"] = [ - duration_name + c for c in subclasses - ] + parameters[duration_name]["sum"] = [duration_name + c for c in subclasses] elif outcomes_config[new_comp]["sum"].exists(): parameters[new_comp] = {} @@ -357,9 +298,7 @@ def read_seir_sim(s, sim_id): return seir_df -def compute_all_multioutcomes( - *, s, sim_id2write, parameters, loaded_values=None, npi=None -): +def compute_all_multioutcomes(*, s, sim_id2write, parameters, loaded_values=None, npi=None): """Compute delay frame based on temporally varying input. We load the seir sim corresponding to sim_id to write""" hpar = pd.DataFrame(columns=["geoid", "quantity", "outcome", "value"]) all_data = {} @@ -390,52 +329,36 @@ def compute_all_multioutcomes( all_data["incidI"] = source_array outcomes = pd.merge( outcomes, - dataframe_from_array( - source_array, s.spatset.nodenames, dates, "incidI" - ), + dataframe_from_array(source_array, s.spatset.nodenames, dates, "incidI"), ) elif isinstance(source_name, dict): - source_array = get_filtered_incidI( - seir_sim, dates, s.spatset.nodenames, source_name - ) + source_array = get_filtered_incidI(seir_sim, dates, s.spatset.nodenames, source_name) # we don't keep source in this cases else: # already defined outcomes source_array = all_data[source_name] - if (loaded_values is not None) and ( - new_comp in loaded_values["outcome"].values - ): + if (loaded_values is not None) and (new_comp in loaded_values["outcome"].values): ## This may be unnecessary probabilities = loaded_values[ - (loaded_values["quantity"] == "probability") - & (loaded_values["outcome"] == new_comp) - ]["value"].to_numpy() - delays = loaded_values[ - (loaded_values["quantity"] == "delay") - & (loaded_values["outcome"] == new_comp) + (loaded_values["quantity"] == "probability") & (loaded_values["outcome"] == new_comp) ]["value"].to_numpy() + delays = loaded_values[(loaded_values["quantity"] == "delay") & (loaded_values["outcome"] == new_comp)][ + "value" + ].to_numpy() else: - probabilities = parameters[new_comp][ - "probability" - ].as_random_distribution()( + probabilities = parameters[new_comp]["probability"].as_random_distribution()( size=len(s.spatset.nodenames) ) # one draw per geoid if "rel_probability" in parameters[new_comp]: - probabilities = ( - probabilities * parameters[new_comp]["rel_probability"] - ) + probabilities = probabilities * parameters[new_comp]["rel_probability"] delays = parameters[new_comp]["delay"].as_random_distribution()( size=len(s.spatset.nodenames) ) # one draw per geoid probabilities[probabilities > 1] = 1 probabilities[probabilities < 0] = 0 - probabilities = np.repeat( - probabilities[:, np.newaxis], len(dates), axis=1 - ).T # duplicate in time - delays = np.repeat( - delays[:, np.newaxis], len(dates), axis=1 - ).T # duplicate in time + probabilities = np.repeat(probabilities[:, np.newaxis], len(dates), axis=1).T # duplicate in time + delays = np.repeat(delays[:, np.newaxis], len(dates), axis=1).T # duplicate in time delays = np.round(delays).astype(int) # write hpar before NPI hpar = pd.concat( @@ -446,8 +369,7 @@ def compute_all_multioutcomes( "geoid": s.spatset.nodenames, "quantity": ["probability"] * len(s.spatset.nodenames), "outcome": [new_comp] * len(s.spatset.nodenames), - "value": probabilities[0] - * np.ones(len(s.spatset.nodenames)), + "value": probabilities[0] * np.ones(len(s.spatset.nodenames)), } ), pd.DataFrame.from_dict( @@ -464,60 +386,41 @@ def compute_all_multioutcomes( if npi is not None: delays = NPI.reduce_parameter( parameter=delays, - modification=npi.getReduction( - parameters[new_comp]["delay::npi_param_name"].lower() - ), + modification=npi.getReduction(parameters[new_comp]["delay::npi_param_name"].lower()), ) delays = np.round(delays).astype(int) probabilities = NPI.reduce_parameter( parameter=probabilities, - modification=npi.getReduction( - parameters[new_comp]["probability::npi_param_name"].lower() - ), + modification=npi.getReduction(parameters[new_comp]["probability::npi_param_name"].lower()), ) # Create new compartment incidence: all_data[new_comp] = np.empty_like(source_array) # Draw with from source compartment if s.stoch_traj_flag: - all_data[new_comp] = np.random.binomial( - source_array.astype(np.int32), probabilities - ) + all_data[new_comp] = np.random.binomial(source_array.astype(np.int32), probabilities) else: - all_data[new_comp] = source_array * ( - probabilities * np.ones_like(source_array) - ) + all_data[new_comp] = source_array * (probabilities * np.ones_like(source_array)) # Shift to account for the delay ## stoch_delay_flag is whether to use stochastic delays or not stoch_delay_flag = False - all_data[new_comp] = multishift( - all_data[new_comp], delays, stoch_delay_flag=stoch_delay_flag - ) + all_data[new_comp] = multishift(all_data[new_comp], delays, stoch_delay_flag=stoch_delay_flag) # Produce a dataframe an merge it - df_p = dataframe_from_array( - all_data[new_comp], s.spatset.nodenames, dates, new_comp - ) + df_p = dataframe_from_array(all_data[new_comp], s.spatset.nodenames, dates, new_comp) outcomes = pd.merge(outcomes, df_p) # Make duration if "duration" in parameters[new_comp]: - if (loaded_values is not None) and ( - new_comp in loaded_values["outcome"].values - ): + if (loaded_values is not None) and (new_comp in loaded_values["outcome"].values): durations = loaded_values[ - (loaded_values["quantity"] == "duration") - & (loaded_values["outcome"] == new_comp) + (loaded_values["quantity"] == "duration") & (loaded_values["outcome"] == new_comp) ]["value"].to_numpy() else: - durations = parameters[new_comp][ - "duration" - ].as_random_distribution()( + durations = parameters[new_comp]["duration"].as_random_distribution()( size=len(s.spatset.nodenames) ) # one draw per geoid - durations = np.repeat( - durations[:, np.newaxis], len(dates), axis=1 - ).T # duplicate in time + durations = np.repeat(durations[:, np.newaxis], len(dates), axis=1).T # duplicate in time durations = np.round(durations).astype(int) hpar = pd.concat( @@ -528,8 +431,7 @@ def compute_all_multioutcomes( "geoid": s.spatset.nodenames, "quantity": ["duration"] * len(s.spatset.nodenames), "outcome": [new_comp] * len(s.spatset.nodenames), - "value": durations[0] - * np.ones(len(s.spatset.nodenames)), + "value": durations[0] * np.ones(len(s.spatset.nodenames)), } ), ], @@ -546,9 +448,7 @@ def compute_all_multioutcomes( # print(f"{new_comp}-duration".lower(), npi.getReduction(f"{new_comp}-duration".lower())) durations = NPI.reduce_parameter( parameter=durations, - modification=npi.getReduction( - parameters[new_comp]["duration::npi_param_name"].lower() - ), + modification=npi.getReduction(parameters[new_comp]["duration::npi_param_name"].lower()), ) # npi.getReduction(f"{new_comp}::duration".lower())) durations = np.round(durations).astype(int) # plt.imshow(durations) @@ -557,9 +457,7 @@ def compute_all_multioutcomes( # plt.savefig('Daft'+new_comp + '-' + source) # plt.close() - all_data[parameters[new_comp]["duration_name"]] = np.cumsum( - all_data[new_comp], axis=0 - ) - multishift( + all_data[parameters[new_comp]["duration_name"]] = np.cumsum(all_data[new_comp], axis=0) - multishift( np.cumsum(all_data[new_comp], axis=0), durations, stoch_delay_flag=stoch_delay_flag, @@ -582,9 +480,7 @@ def compute_all_multioutcomes( for cmp in parameters[new_comp]["sum"]: sum_outcome += all_data[cmp] all_data[new_comp] = sum_outcome - df_p = dataframe_from_array( - sum_outcome, s.spatset.nodenames, dates, new_comp - ) + df_p = dataframe_from_array(sum_outcome, s.spatset.nodenames, dates, new_comp) outcomes = pd.merge(outcomes, df_p) return outcomes, hpar @@ -681,9 +577,7 @@ def multishift(arr, shifts, stoch_delay_flag=True): # for k,case in enumerate(cases): # results[i+k][j] = cases[k] else: - for i in range( - arr.shape[0] - ): # numba nopython does not allow iterating over 2D array + for i in range(arr.shape[0]): # numba nopython does not allow iterating over 2D array for j in range(arr.shape[1]): if i + shifts[i, j] < arr.shape[0]: result[i + shifts[i, j], j] += arr[i, j] diff --git a/gempyor_pkg/src/gempyor/parameters.py b/gempyor_pkg/src/gempyor/parameters.py index becdd8c9b..55dd3ae44 100644 --- a/gempyor_pkg/src/gempyor/parameters.py +++ b/gempyor_pkg/src/gempyor/parameters.py @@ -15,7 +15,13 @@ class Parameters: # Minimal object to be easily picklable for // runs def __init__( - self, parameter_config: confuse.ConfigView, *, ti: datetime.date, tf: datetime.date, nodenames: list, config_version: str = "old", + self, + parameter_config: confuse.ConfigView, + *, + ti: datetime.date, + tf: datetime.date, + nodenames: list, + config_version: str = "old", ): self.pconfig = parameter_config self.pnames = [] @@ -39,46 +45,50 @@ def __init__( self.pdata[pn] = {} self.pdata[pn]["idx"] = idx - # Parameter characterized by it's distribution - if self.pconfig[pn]["value"].exists(): - self.pdata[pn]["dist"] = self.pconfig[pn][ - "value" - ].as_random_distribution() + if self.pconfig[pn]["value"].exists(): + self.pdata[pn]["dist"] = self.pconfig[pn]["value"].as_random_distribution() # Parameter given as a file elif self.pconfig[pn]["timeserie"].exists(): fn_name = self.pconfig[pn]["timeserie"].get() df = utils.read_df(fn_name).set_index("date") df.index = pd.to_datetime(df.index) - if len(df.columns) >= len(nodenames): # one ts per geoid - df = df[nodenames] # make sure the order of geoids is the same as the reference + if len(df.columns) >= len(nodenames): # one ts per geoid + df = df[nodenames] # make sure the order of geoids is the same as the reference # (nodenames from spatial setup) and select the columns elif len(df.columns) == 1: - df = pd.DataFrame(pd.concat([df]*len(nodenames), axis=1).values, - index=df.index, columns=nodenames) + df = pd.DataFrame( + pd.concat([df] * len(nodenames), axis=1).values, index=df.index, columns=nodenames + ) else: - print('loaded col :', sorted(list(df.columns))) - print('geodata col:', sorted(nodenames)) - raise ValueError(f"""ERROR loading file {fn_name} for parameter {pn}: the number of non 'date' - columns are {len(df.columns)}, expected {len(nodenames)} (the number of geoids) or one.""") + print("loaded col :", sorted(list(df.columns))) + print("geodata col:", sorted(nodenames)) + raise ValueError( + f"""ERROR loading file {fn_name} for parameter {pn}: the number of non 'date' + columns are {len(df.columns)}, expected {len(nodenames)} (the number of geoids) or one.""" + ) - df = df[str(ti):str(tf)] - if not (len(df.index) == len(pd.date_range(ti,tf))): - print('config dates:', pd.date_range(ti,tf)) - print('loaded dates:', df.index) - print(pd.date_range(ti,tf) == df.index) - raise ValueError(f"""ERROR loading file {fn_name} for parameter {pn}: + df = df[str(ti) : str(tf)] + if not (len(df.index) == len(pd.date_range(ti, tf))): + print("config dates:", pd.date_range(ti, tf)) + print("loaded dates:", df.index) + print(pd.date_range(ti, tf) == df.index) + raise ValueError( + f"""ERROR loading file {fn_name} for parameter {pn}: the 'date' index of the provided file does not cover the whole config time span from - {ti}->{tf}, where we have dates from {str(df.index[0])} to {str(df.index[-1])}""") + {ti}->{tf}, where we have dates from {str(df.index[0])} to {str(df.index[-1])}""" + ) # check the date range, need the lenght to be equal - if not (pd.date_range(ti,tf) == df.index).all(): - print('config dates:', pd.date_range(ti,tf)) - print('loaded dates:', df.index) - print(pd.date_range(ti,tf) == df.index) - raise ValueError(f"""ERROR loading file {fn_name} for parameter {pn}: + if not (pd.date_range(ti, tf) == df.index).all(): + print("config dates:", pd.date_range(ti, tf)) + print("loaded dates:", df.index) + print(pd.date_range(ti, tf) == df.index) + raise ValueError( + f"""ERROR loading file {fn_name} for parameter {pn}: the 'date' index of the provided file does not cover the whole config time span from - {ti}->{tf}""") + {ti}->{tf}""" + ) self.pdata[pn]["ts"] = df if self.pconfig[pn]["intervention_overlap_operation"].exists(): @@ -90,9 +100,7 @@ def __init__( logging.debug( f"No 'intervention_overlap_operation' for parameter {pn}, assuming multiplicative NPIs" ) - self.intervention_overlap_operation[ - self.pdata[pn]["intervention_overlap_operation"] - ].append(pn.lower()) + self.intervention_overlap_operation[self.pdata[pn]["intervention_overlap_operation"]].append(pn.lower()) elif config_version == "old": n_parallel_compartments = 1 @@ -138,14 +146,10 @@ def __init__( for compartment, index in compartments_dict.items(): if "susceptibility_reduction" in compartments_map[compartment]: pn = f"susceptibility_reduction{index}" - p_dists[pn] = compartments_map[compartment][ - "susceptibility_reduction" - ].as_random_distribution() + p_dists[pn] = compartments_map[compartment]["susceptibility_reduction"].as_random_distribution() self.intervention_overlap_operation["prod"].append(pn.lower()) else: - raise ValueError( - f"Susceptibility Reduction not found for comp {compartment}" - ) + raise ValueError(f"Susceptibility Reduction not found for comp {compartment}") if "transmissibility_reduction" in compartments_map[compartment]: pn = f"transmissibility_reduction{index}" p_dists[pn] = compartments_map[compartment][ @@ -153,14 +157,10 @@ def __init__( ].as_random_distribution() self.intervention_overlap_operation["prod"].append(pn.lower()) else: - raise ValueError( - f"Transmissibility Reduction not found for comp {compartment}" - ) + raise ValueError(f"Transmissibility Reduction not found for comp {compartment}") for transition in range(n_parallel_transitions): pn = f"transition_rate{transition}" - p_dists[pn] = transition_map[transition][ - "rate" - ].as_random_distribution() + p_dists[pn] = transition_map[transition]["rate"].as_random_distribution() self.intervention_overlap_operation["sum"].append(pn.lower()) ### Build the new structure @@ -178,9 +178,7 @@ def __init__( logging.debug(f"We have {self.npar} parameter: {self.pnames}") logging.debug(f"Data to sample is: {self.pdata}") logging.debug(f"Index in arrays are: {self.pnames2pindex}") - logging.debug( - f"NPI overlap operation is {self.intervention_overlap_operation} " - ) + logging.debug(f"NPI overlap operation is {self.intervention_overlap_operation} ") def picklable_lamda_alpha(self): """These two functions were lambda in __init__ before, it was more elegant. but as the object needs to be pickable, @@ -211,9 +209,7 @@ def parameters_quick_draw(self, n_days: int, nnodes: int) -> ndarray: return param_arr # we don't store it as a member because this object needs to be small to be pickable - def parameters_load( - self, param_df: pd.DataFrame, n_days: int, nnodes: int - ) -> ndarray: + def parameters_load(self, param_df: pd.DataFrame, n_days: int, nnodes: int) -> ndarray: """ drop-in equivalent to param_quick_draw() that take a file as written parameter_write() :param fname: @@ -232,9 +228,7 @@ def parameters_load( elif "ts" in self.pdata[pn]: param_arr[idx] = self.pdata[pn]["ts"].values else: - print( - f"PARAM: parameter {pn} NOT found in loadID file. Drawing from config distribution" - ) + print(f"PARAM: parameter {pn} NOT found in loadID file. Drawing from config distribution") pval = self.pdata[pn]["dist"]() param_arr[idx] = np.full((n_days, nnodes), pval) diff --git a/gempyor_pkg/src/gempyor/results.py b/gempyor_pkg/src/gempyor/results.py index 8a10607cc..ffa2db674 100644 --- a/gempyor_pkg/src/gempyor/results.py +++ b/gempyor_pkg/src/gempyor/results.py @@ -46,9 +46,7 @@ def plot_quick_summary(self, comp="cumI"): ) fig.autofmt_xdate() if not self.s.interactive: - plt.savefig( - f"{self.figdir}{self.s.setup_name}_{comp}_per_node{self.timestamp}.pdf" - ) + plt.savefig(f"{self.figdir}{self.s.setup_name}_{comp}_per_node{self.timestamp}.pdf") q50 = pd.DataFrame( index=pd.date_range(self.ti, self.tf, freq=self.freq), @@ -73,9 +71,7 @@ def plot_quick_summary(self, comp="cumI"): ax.fill_between(q50.index, q05.sum(axis=1), q95.sum(axis=1), alpha=0.3) fig.autofmt_xdate() if not self.s.interactive: - plt.savefig( - f"{self.figdir}{self.s.setup_name}_{comp}_all_nodes{self.timestamp}.pdf" - ) + plt.savefig(f"{self.figdir}{self.s.setup_name}_{comp}_all_nodes{self.timestamp}.pdf") def build_comp_data(self): """Very long""" @@ -196,9 +192,7 @@ def plot_comp(self, comp, nodes): fig.autofmt_xdate() if not self.s.interactive: - plt.savefig( - f"{self.figdir}{self.s.setup_name}_{comp}_selected{self.timestamp}.pdf" - ) + plt.savefig(f"{self.figdir}{self.s.setup_name}_{comp}_selected{self.timestamp}.pdf") return fig, axes @@ -225,9 +219,7 @@ def plot_all_comp(self, nodes): ax.legend() fig.autofmt_xdate() if not self.s.interactive: - plt.savefig( - f"{self.figdir}{self.s.setup_name}_allcomp_selected{self.timestamp}.pdf" - ) + plt.savefig(f"{self.figdir}{self.s.setup_name}_allcomp_selected{self.timestamp}.pdf") return fig, axes def plot_comp_mult(self, comp, nodes): @@ -242,9 +234,7 @@ def plot_comp_mult(self, comp, nodes): for i, nd in enumerate(nodes): ax = axes.flat[i] ax.set_title(self.s.spatset.data["geoid"][nd]) - ax.plot( - self.comp_data[nd][comp].quantile(0.5, axis=1), c=self.colors[i % 4] - ) + ax.plot(self.comp_data[nd][comp].quantile(0.5, axis=1), c=self.colors[i % 4]) ax.fill_between( self.comp_data[nd][comp].index, self.comp_data[nd][comp].quantile(0.05, axis=1), @@ -255,7 +245,5 @@ def plot_comp_mult(self, comp, nodes): fig.autofmt_xdate() if not self.s.interactive: - plt.savefig( - f"{self.figdir}{self.s.setup_name}_{comp}2_selected{self.timestamp}.pdf" - ) + plt.savefig(f"{self.figdir}{self.s.setup_name}_{comp}2_selected{self.timestamp}.pdf") return fig, axes diff --git a/gempyor_pkg/src/gempyor/seeding_ic.py b/gempyor_pkg/src/gempyor/seeding_ic.py index 8e2bbb008..438d7d4e5 100644 --- a/gempyor_pkg/src/gempyor/seeding_ic.py +++ b/gempyor_pkg/src/gempyor/seeding_ic.py @@ -17,13 +17,9 @@ def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict: if not df["date"].is_monotonic_increasing: - raise ValueError( - "_DataFrame2NumbaDict got an unsorted dataframe, exposing itself to non-sense" - ) + raise ValueError("_DataFrame2NumbaDict got an unsorted dataframe, exposing itself to non-sense") - cmp_grp_names = [ - col for col in setup.compartments.compartments.columns if col != "name" - ] + cmp_grp_names = [col for col in setup.compartments.compartments.columns if col != "name"] seeding_dict: nb.typed.Dict = nb.typed.Dict.empty( key_type=nb.types.unicode_type, value_type=nb.types.int64[:], @@ -47,27 +43,19 @@ def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict: nb_seed_perday[(row["date"].date() - setup.ti).days] + 1 ) - source_dict = { - grp_name: row[f"source_{grp_name}"] for grp_name in cmp_grp_names - } - destination_dict = { - grp_name: row[f"destination_{grp_name}"] for grp_name in cmp_grp_names - } - seeding_dict["seeding_sources"][idx] = setup.compartments.get_comp_idx( - source_dict - ) - seeding_dict["seeding_destinations"][idx] = setup.compartments.get_comp_idx( - destination_dict - ) - seeding_dict["seeding_places"][idx] = setup.spatset.nodenames.index( - row["place"] - ) + source_dict = {grp_name: row[f"source_{grp_name}"] for grp_name in cmp_grp_names} + destination_dict = {grp_name: row[f"destination_{grp_name}"] for grp_name in cmp_grp_names} + seeding_dict["seeding_sources"][idx] = setup.compartments.get_comp_idx(source_dict) + seeding_dict["seeding_destinations"][idx] = setup.compartments.get_comp_idx(destination_dict) + seeding_dict["seeding_places"][idx] = setup.spatset.nodenames.index(row["place"]) seeding_amounts[idx] = amounts[idx] else: n_seeding_ignored += 1 - + if n_seeding_ignored > 0: - logging.critical(f"Seeding ignored {n_seeding_ignored} rows because they were before the start of the simulation.") + logging.critical( + f"Seeding ignored {n_seeding_ignored} rows because they were before the start of the simulation." + ) day_start_idx = np.zeros(setup.n_days + 1, dtype=np.int64) day_start_idx[1:] = np.cumsum(nb_seed_perday) @@ -97,31 +85,21 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: elif method == "SetInitialConditions": # TODO: this format should allow not complete configurations # - Does not support the new way of doing compartiment indexing - logger.critical( - "Untested method SetInitialConditions !!! Please report this messsage." - ) + logger.critical("Untested method SetInitialConditions !!! Please report this messsage.") ic_df = pd.read_csv( self.initial_conditions_config["states_file"].as_str(), converters={"place": lambda x: str(x)}, ) if ic_df.empty: - raise ValueError( - f"There is no entry for initial time ti in the provided seeding::states_file." - ) + raise ValueError(f"There is no entry for initial time ti in the provided seeding::states_file.") y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nnodes)) for pl_idx, pl in enumerate(setup.spatset.nodenames): # if pl in list(ic_df["place"]): states_pl = ic_df[ic_df["place"] == pl] - for comp_idx, comp_name in setup.compartments.compartments[ - "name" - ].iteritems(): - y0[comp_idx, pl_idx] = float( - states_pl[states_pl["comp"] == comp_name]["amount"] - ) + for comp_idx, comp_name in setup.compartments.compartments["name"].iteritems(): + y0[comp_idx, pl_idx] = float(states_pl[states_pl["comp"] == comp_name]["amount"]) elif self.seeding_config["ignore_missing"].get(): - print( - f"WARNING: State load does not exist for node {pl}, assuming fully susceptible population" - ) + print(f"WARNING: State load does not exist for node {pl}, assuming fully susceptible population") y0[0, pl_idx] = setup.popnodes[pl_idx] else: raise ValueError( @@ -129,22 +107,13 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: ) elif method == "InitialConditionsFolderDraw": - ic_df = setup.read_simID( - ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id - ) - ic_df = ic_df[ - (ic_df["date"] == str(setup.ti)) - & (ic_df["mc_value_type"] == "prevalence") - ] + ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id) + ic_df = ic_df[(ic_df["date"] == str(setup.ti)) & (ic_df["mc_value_type"] == "prevalence")] if ic_df.empty: - raise ValueError( - f"There is no entry for initial time ti in the provided seeding::states_file." - ) + raise ValueError(f"There is no entry for initial time ti in the provided seeding::states_file.") y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nnodes)) - for comp_idx, comp_name in setup.compartments.compartments[ - "name" - ].iteritems(): + for comp_idx, comp_name in setup.compartments.compartments["name"].iteritems(): ic_df_compartment = ic_df[ic_df["mc_name"] == comp_name] for pl_idx, pl in enumerate(setup.spatset.nodenames): if pl in ic_df.columns: @@ -159,9 +128,7 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: f"place {pl} does not exist in seeding::states_file. You can set ignore_missing=TRUE to bypass this error" ) else: - raise NotImplementedError( - f"unknown initial conditions method [got: {method}]" - ) + raise NotImplementedError(f"unknown initial conditions method [got: {method}]") return y0 def draw_seeding(self, sim_id: int, setup) -> nb.typed.Dict: @@ -177,9 +144,7 @@ def draw_seeding(self, sim_id: int, setup) -> nb.typed.Dict: ) dupes = seeding[seeding.duplicated(["place", "date"])].index + 1 if not dupes.empty: - raise ValueError( - f"Repeated place-date in rows {dupes.tolist()} of seeding::lambda_file." - ) + raise ValueError(f"Repeated place-date in rows {dupes.tolist()} of seeding::lambda_file.") elif method == "FolderDraw": seeding = pd.read_csv( setup.get_input_filename( @@ -229,6 +194,4 @@ def load_ic(self, sim_id: int, setup) -> nb.typed.Dict: # Write seeding used to file def seeding_write(self, seeding, fname, extension): - raise NotImplementedError( - f"It is not yet possible to write the seeding to a file" - ) + raise NotImplementedError(f"It is not yet possible to write the seeding to a file") diff --git a/gempyor_pkg/src/gempyor/seir.py b/gempyor_pkg/src/gempyor/seir.py index 4430e4b79..c38f92f20 100644 --- a/gempyor_pkg/src/gempyor/seir.py +++ b/gempyor_pkg/src/gempyor/seir.py @@ -98,9 +98,7 @@ def steps_SEIR( ) seir_sim = steps_rk4.rk4_integration(**fnct_args) else: - logging.critical( - "Experimental !!! These methods are not ready for production ! " - ) + logging.critical("Experimental !!! These methods are not ready for production ! ") if s.integration_method in [ "scipy.solve_ivp", "scipy.odeint", @@ -112,9 +110,7 @@ def steps_SEIR( f"with method {s.integration_method}, only deterministic " f"integration is possible (got stoch_straj_flag={s.stoch_traj_flag}" ) - seir_sim = steps_experimental.ode_integration( - **fnct_args, integration_method=s.integration_method - ) + seir_sim = steps_experimental.ode_integration(**fnct_args, integration_method=s.integration_method) elif s.integration_method == "rk4.jit1": seir_sim = steps_experimental.rk4_integration1(**fnct_args) elif s.integration_method == "rk4.jit2": @@ -152,18 +148,14 @@ def build_npi_SEIR(s, load_ID, sim_id2load, config, bypass_DF=None, bypass_FN=No global_config=config, geoids=s.spatset.nodenames, loaded_df=loaded_df, - pnames_overlap_operation_sum=s.parameters.intervention_overlap_operation[ - "sum" - ], + pnames_overlap_operation_sum=s.parameters.intervention_overlap_operation["sum"], ) else: npi = NPI.NPIBase.execute( npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames, - pnames_overlap_operation_sum=s.parameters.intervention_overlap_operation[ - "sum" - ], + pnames_overlap_operation_sum=s.parameters.intervention_overlap_operation["sum"], ) return npi @@ -190,14 +182,10 @@ def onerun_SEIR( with Timer("onerun_SEIR.seeding"): if load_ID: initial_conditions = s.seedingAndIC.load_ic(sim_id2load, setup=s) - seeding_data, seeding_amounts = s.seedingAndIC.load_seeding( - sim_id2load, setup=s - ) + seeding_data, seeding_amounts = s.seedingAndIC.load_seeding(sim_id2load, setup=s) else: initial_conditions = s.seedingAndIC.draw_ic(sim_id2write, setup=s) - seeding_data, seeding_amounts = s.seedingAndIC.draw_seeding( - sim_id2write, setup=s - ) + seeding_data, seeding_amounts = s.seedingAndIC.draw_seeding(sim_id2write, setup=s) with Timer("onerun_SEIR.parameters"): # Draw or load parameters @@ -208,18 +196,14 @@ def onerun_SEIR( nnodes=s.nnodes, ) else: - p_draw = s.parameters.parameters_quick_draw( - n_days=s.n_days, nnodes=s.nnodes - ) + p_draw = s.parameters.parameters_quick_draw(n_days=s.n_days, nnodes=s.nnodes) # reduce them parameters = s.parameters.parameters_reduce(p_draw, npi) log_debug_parameters(p_draw, "Parameters without interventions") log_debug_parameters(parameters, "Parameters with interventions") # Parse them - parsed_parameters = s.compartments.parse_parameters( - parameters, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(parameters, s.parameters.pnames, unique_strings) log_debug_parameters(parsed_parameters, "Unique Parameters used by transitions") with Timer("onerun_SEIR.compute"): @@ -236,9 +220,7 @@ def onerun_SEIR( with Timer("onerun_SEIR.postprocess"): if s.write_csv or s.write_parquet: - out_df = postprocess_and_write( - sim_id2write, s, states, p_draw, npi, seeding_data - ) + out_df = postprocess_and_write(sim_id2write, s, states, p_draw, npi, seeding_data) return out_df @@ -248,9 +230,7 @@ def run_parallel_SEIR(s, config, *, n_jobs=1): if n_jobs == 1: # run single process for debugging/profiling purposes for sim_id in tqdm.tqdm(sim_ids): - onerun_SEIR( - sim_id2write=sim_id, s=s, load_ID=False, sim_id2load=None, config=config - ) + onerun_SEIR(sim_id2write=sim_id, s=s, load_ID=False, sim_id2load=None, config=config) else: tqdm.contrib.concurrent.process_map( onerun_SEIR, @@ -262,9 +242,7 @@ def run_parallel_SEIR(s, config, *, n_jobs=1): max_workers=n_jobs, ) - logging.info( - f""">> {s.nsim} seir simulations completed in {time.monotonic() - start:.1f} seconds""" - ) + logging.info(f""">> {s.nsim} seir simulations completed in {time.monotonic() - start:.1f} seconds""") def states2Df(s, states): @@ -333,9 +311,7 @@ def postprocess_and_write(sim_id, s, states, p_draw, npi, seeding): # NPIs s.write_simID(ftype="snpi", sim_id=sim_id, df=npi.getReductionDF()) # Parameters - s.write_simID( - ftype="spar", sim_id=sim_id, df=s.parameters.getParameterDF(p_draw=p_draw) - ) + s.write_simID(ftype="spar", sim_id=sim_id, df=s.parameters.getParameterDF(p_draw=p_draw)) out_df = states2Df(s, states) s.write_simID(ftype="seir", sim_id=sim_id, df=out_df) diff --git a/gempyor_pkg/src/gempyor/setup.py b/gempyor_pkg/src/gempyor/setup.py index ac4401f6a..c67556dd4 100644 --- a/gempyor_pkg/src/gempyor/setup.py +++ b/gempyor_pkg/src/gempyor/setup.py @@ -59,9 +59,7 @@ def __init__( self.ti = ti ## we start at 00:00 on ti self.tf = tf ## we end on 23:59 on tf if self.tf <= self.ti: - raise ValueError( - "tf (time to finish) is less than or equal to ti (time to start)" - ) + raise ValueError("tf (time to finish) is less than or equal to ti (time to start)") self.npi_scenario = npi_scenario self.npi_config_seir = npi_config_seir self.seeding_config = seeding_config @@ -93,14 +91,10 @@ def __init__( if self.integration_method == "rk4": self.integration_method = "rk4.jit" if self.integration_method not in ["rk4.jit", "legacy"]: - raise ValueError( - f"Unknow integration method {self.integration_method}." - ) + raise ValueError(f"Unknow integration method {self.integration_method}.") else: self.integration_method = "rk4.jit" - logging.info( - f"Integration method not provided, assuming type {self.integration_method}" - ) + logging.info(f"Integration method not provided, assuming type {self.integration_method}") if config_version is None: if "compartments" in self.seir_config.keys(): @@ -108,9 +102,7 @@ def __init__( else: config_version = "old" - logging.debug( - f"Config version not provided, infering type {config_version}" - ) + logging.debug(f"Config version not provided, infering type {config_version}") if config_version != "old" and config_version != "v2": raise ValueError( @@ -120,8 +112,11 @@ def __init__( # Think if we really want to hold this up. self.parameters = parameters.Parameters( - parameter_config=self.parameters_config, config_version=config_version, - ti=self.ti, tf=self.tf, nodenames=self.spatset.nodenames + parameter_config=self.parameters_config, + config_version=config_version, + ti=self.ti, + tf=self.tf, + nodenames=self.spatset.nodenames, ) self.seedingAndIC = seeding_ic.SeedingAndIC( seeding_config=self.seeding_config, @@ -132,12 +127,8 @@ def __init__( # 3. Outcomes self.npi_config_outcomes = None if self.outcomes_config: - if self.outcomes_config["interventions"]["settings"][ - self.outcomes_scenario - ].exists(): - self.npi_config_outcomes = self.outcomes_config["interventions"][ - "settings" - ][self.outcomes_scenario] + if self.outcomes_config["interventions"]["settings"][self.outcomes_scenario].exists(): + self.npi_config_outcomes = self.outcomes_config["interventions"]["settings"][self.outcomes_scenario] # 4. Inputs and outputs if in_run_id is None: @@ -163,15 +154,11 @@ def __init__( if outcomes_config: ftypes.extend(["hosp", "hpar", "hnpi"]) for ftype in ftypes: - datadir = file_paths.create_dir_name( - self.out_run_id, self.out_prefix, ftype - ) + datadir = file_paths.create_dir_name(self.out_run_id, self.out_prefix, ftype) os.makedirs(datadir, exist_ok=True) if self.write_parquet and self.write_csv: - print( - "Confused between reading .csv or parquet. Assuming input file is .parquet" - ) + print("Confused between reading .csv or parquet. Assuming input file is .parquet") if self.write_parquet: self.extension = "parquet" elif self.write_csv: @@ -185,9 +172,7 @@ def get_input_filename(self, ftype: str, sim_id: int, extension_override: str = extension_override=extension_override, ) - def get_output_filename( - self, ftype: str, sim_id: int, extension_override: str = "" - ): + def get_output_filename(self, ftype: str, sim_id: int, extension_override: str = ""): return self.get_filename( ftype=ftype, sim_id=sim_id, @@ -195,9 +180,7 @@ def get_output_filename( extension_override=extension_override, ) - def get_filename( - self, ftype: str, sim_id: int, input: bool, extension_override: str = "" - ): + def get_filename(self, ftype: str, sim_id: int, input: bool, extension_override: str = ""): """return a CSP formated filename.""" if extension_override: # empty strings are Falsy @@ -221,9 +204,7 @@ def get_filename( ) return fn - def read_simID( - self, ftype: str, sim_id: int, input: bool = True, extension_override: str = "" - ): + def read_simID(self, ftype: str, sim_id: int, input: bool = True, extension_override: str = ""): return read_df( fname=self.get_filename( ftype=ftype, @@ -255,20 +236,14 @@ def write_simID( class SpatialSetup: - def __init__( - self, *, setup_name, geodata_file, mobility_file, popnodes_key, nodenames_key - ): + def __init__(self, *, setup_name, geodata_file, mobility_file, popnodes_key, nodenames_key): self.setup_name = setup_name - self.data = pd.read_csv( - geodata_file, converters={nodenames_key: lambda x: str(x)} - ) # geoids and populations + self.data = pd.read_csv(geodata_file, converters={nodenames_key: lambda x: str(x)}) # geoids and populations self.nnodes = len(self.data) # K = # of locations # popnodes_key is the name of the column in geodata_file with populations if popnodes_key not in self.data: - raise ValueError( - f"popnodes_key: {popnodes_key} does not correspond to a column in geodata." - ) + raise ValueError(f"popnodes_key: {popnodes_key} does not correspond to a column in geodata.") self.popnodes = self.data[popnodes_key].to_numpy() # population if len(np.argwhere(self.popnodes == 0)): raise ValueError( @@ -277,18 +252,14 @@ def __init__( # nodenames_key is the name of the column in geodata_file with geoids if nodenames_key not in self.data: - raise ValueError( - f"nodenames_key: {nodenames_key} does not correspond to a column in geodata." - ) + raise ValueError(f"nodenames_key: {nodenames_key} does not correspond to a column in geodata.") self.nodenames = self.data[nodenames_key].tolist() if len(self.nodenames) != len(set(self.nodenames)): raise ValueError(f"There are duplicate nodenames in geodata.") mobility_file = pathlib.Path(mobility_file) if mobility_file.suffix == ".txt": - print( - "Mobility files as matrices are not recommended. Please switch soon to long form csv files." - ) + print("Mobility files as matrices are not recommended. Please switch soon to long form csv files.") self.mobility = scipy.sparse.csr_matrix( np.loadtxt(mobility_file), dtype=int ) # K x K matrix of people moving @@ -299,9 +270,7 @@ def __init__( ) elif mobility_file.suffix == ".csv": - mobility_data = pd.read_csv( - mobility_file, converters={"ori": str, "dest": str} - ) + mobility_data = pd.read_csv(mobility_file, converters={"ori": str, "dest": str}) nn_dict = {v: k for k, v in enumerate(self.nodenames)} mobility_data["ori_idx"] = mobility_data["ori"].apply(nn_dict.__getitem__) mobility_data["dest_idx"] = mobility_data["dest"].apply(nn_dict.__getitem__) @@ -335,7 +304,9 @@ def __init__( rows, cols, values = scipy.sparse.find(tmp) errmsg = "" for r, c, v in zip(rows, cols, values): - errmsg += f"\n({r}, {c}) = {self.mobility[r, c]} > population of '{self.nodenames[r]}' = {self.popnodes[r]}" + errmsg += ( + f"\n({r}, {c}) = {self.mobility[r, c]} > population of '{self.nodenames[r]}' = {self.popnodes[r]}" + ) raise ValueError( f"The following entries in the mobility data exceed the source node populations in geodata:{errmsg}" ) diff --git a/gempyor_pkg/src/gempyor/simulate_outcome.py b/gempyor_pkg/src/gempyor/simulate_outcome.py index c5c4a35a6..b1294e4f0 100755 --- a/gempyor_pkg/src/gempyor/simulate_outcome.py +++ b/gempyor_pkg/src/gempyor/simulate_outcome.py @@ -244,9 +244,7 @@ def simulate( ) if config["outcomes"]["method"].get() == "delayframe": - outcomes.run_parallel_outcomes( - sim_id2write=index, s=s, nsim=nsim, n_jobs=jobs - ) + outcomes.run_parallel_outcomes(sim_id2write=index, s=s, nsim=nsim, n_jobs=jobs) else: raise ValueError(f"Only method 'delayframe' is supported at the moment.") diff --git a/gempyor_pkg/src/gempyor/simulate_seir.py b/gempyor_pkg/src/gempyor/simulate_seir.py index d494dd9f4..9fab4ba82 100755 --- a/gempyor_pkg/src/gempyor/simulate_seir.py +++ b/gempyor_pkg/src/gempyor/simulate_seir.py @@ -282,12 +282,7 @@ def simulate( in_run_id=in_run_id, in_prefix=config["name"].get() + "/", out_run_id=out_run_id, - out_prefix=config["name"].get() - + "/" - + str(scenario) - + "/" - + out_run_id - + "/", + out_prefix=config["name"].get() + "/" + str(scenario) + "/" + out_run_id + "/", stoch_traj_flag=stoch_traj_flag, ) diff --git a/gempyor_pkg/src/gempyor/steps_rk4.py b/gempyor_pkg/src/gempyor/steps_rk4.py index 69a0cae53..354684afb 100644 --- a/gempyor_pkg/src/gempyor/steps_rk4.py +++ b/gempyor_pkg/src/gempyor/steps_rk4.py @@ -54,11 +54,7 @@ def rk4_integration( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -66,14 +62,10 @@ def rk4_integration( @jit(nopython=True) def rhs(t, x, today): states_current = np.reshape(x, (2, ncompartments, nspatial_nodes))[0] - st_next = ( - states_current.copy() - ) # this is used to make sure stochastic integration never goes below zero - transition_amounts = np.zeros( - (ntransitions, nspatial_nodes) - ) # keep track of the transitions + st_next = states_current.copy() # this is used to make sure stochastic integration never goes below zero + transition_amounts = np.zeros((ntransitions, nspatial_nodes)) # keep track of the transitions - if ((x < 0).any()): + if (x < 0).any(): print("Integration error: rhs got a negative x (pos, time)", np.where(x < 0), t) for transition_index in range(ntransitions): @@ -89,83 +81,59 @@ def rhs(t, x, today): proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] - if ( - first_proportion - ): # TODO: ask why there is nothing with n_spatial node here. + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] + if first_proportion: # TODO: ask why there is nothing with n_spatial node here. only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() if method == "rk4": number_move = source_number * total_rate # * compound_adjusted_rate elif method == "legacy": compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) if stochastic_p: - number_move = ( - source_number * compound_adjusted_rate - ) ## to initialize typ + number_move = source_number * compound_adjusted_rate ## to initialize typ for spatial_node in range(nspatial_nodes): number_move[spatial_node] = np.random.binomial( source_number[spatial_node], @@ -183,49 +151,42 @@ def rhs(t, x, today): @jit(nopython=True) def update_states(states, delta_t, transition_amounts): - states_diff = np.zeros( - (2, ncompartments, nspatial_nodes) - ) # first dim: 0 -> states_diff, 1: states_cum + states_diff = np.zeros((2, ncompartments, nspatial_nodes)) # first dim: 0 -> states_diff, 1: states_cum st_next = states.copy() st_next = np.reshape(st_next, (2, ncompartments, nspatial_nodes)) - if method=="rk4": + if method == "rk4": # we move by delta_t * transitions, in case of rk4 - # when we use legacy, the compound_adjusted_rate already + # when we use legacy, the compound_adjusted_rate already # includes the time step - transition_amounts = ( - transition_amounts.copy() * delta_t - ) + transition_amounts = transition_amounts.copy() * delta_t for transition_index in range(ntransitions): for spatial_node in range(nspatial_nodes): - if ((transition_amounts[transition_index][spatial_node] < 0)): - print("Integration error: transition amounts negative (trans_idx, node)", transition_index, spatial_node) + if transition_amounts[transition_index][spatial_node] < 0: + print( + "Integration error: transition amounts negative (trans_idx, node)", + transition_index, + spatial_node, + ) if ( transition_amounts[transition_index][spatial_node] - >= st_next[0][transitions[transition_source_col][transition_index]][ - spatial_node - ] - float_tolerance + >= st_next[0][transitions[transition_source_col][transition_index]][spatial_node] - float_tolerance ): - transition_amounts[transition_index][spatial_node] = max(st_next[0][ - transitions[transition_source_col][transition_index] - ][spatial_node] - float_tolerance, - 0) - st_next[0][ - transitions[transition_source_col][transition_index] - ] -= transition_amounts[transition_index] - st_next[0][ - transitions[transition_destination_col][transition_index] - ] += transition_amounts[transition_index] + transition_amounts[transition_index][spatial_node] = max( + st_next[0][transitions[transition_source_col][transition_index]][spatial_node] + - float_tolerance, + 0, + ) + st_next[0][transitions[transition_source_col][transition_index]] -= transition_amounts[transition_index] + st_next[0][transitions[transition_destination_col][transition_index]] += transition_amounts[ + transition_index + ] - states_diff[ - 0, transitions[transition_source_col][transition_index] - ] -= transition_amounts[transition_index] - states_diff[ - 0, transitions[transition_destination_col][transition_index] - ] += transition_amounts[transition_index] - states_diff[ - 1, transitions[transition_destination_col][transition_index], : - ] += transition_amounts[ + states_diff[0, transitions[transition_source_col][transition_index]] -= transition_amounts[transition_index] + states_diff[0, transitions[transition_destination_col][transition_index]] += transition_amounts[ + transition_index + ] + states_diff[1, transitions[transition_destination_col][transition_index], :] += transition_amounts[ transition_index ] # Cumumlative @@ -252,31 +213,21 @@ def rk4_integrate(t, x, today): states[today, :, :] = states_next for seeding_instance_idx in range( seeding_data["day_start_idx"][today], - seeding_data["day_start_idx"][ - min( - today + int(np.ceil(dt)), len(seeding_data["day_start_idx"]) - 1 - ) - ], + seeding_data["day_start_idx"][min(today + int(np.ceil(dt)), len(seeding_data["day_start_idx"]) - 1)], ): this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts x_ = np.zeros((2, ncompartments, nspatial_nodes)) x_[0] = states_next @@ -309,19 +260,13 @@ def rk4_integrate(t, x, today): error = False ## Perform some checks: if np.isnan(states_daily_incid).any() or np.isnan(states).any(): - logging.critical( - "Integration error: NaN detected in epidemic integration result. Failing..." - ) + logging.critical("Integration error: NaN detected in epidemic integration result. Failing...") error = True if not (np.isfinite(states_daily_incid).all() and np.isfinite(states).all()): - logging.critical( - "Integration error: Inf detected in epidemic integration result. Failing..." - ) + logging.critical("Integration error: Inf detected in epidemic integration result. Failing...") error = True if (states_daily_incid < 0).any() or (states < 0).any(): - logging.critical( - "Integration error: negative values detected in epidemic integration result. Failing..." - ) + logging.critical("Integration error: negative values detected in epidemic integration result. Failing...") # todo: this, but smart so it doesn't fail if empty array # print( # f"STATES: NNZ:{states[states < 0].size}/{states.size}, max:{np.max(states[states < 0])}, min:{np.min(states[states < 0])}, mean:{np.mean(states[states < 0])} median:{np.median(states[states < 0])}" diff --git a/gempyor_pkg/src/gempyor/steps_source.py b/gempyor_pkg/src/gempyor/steps_source.py index 1d919ea08..9e52cb830 100644 --- a/gempyor_pkg/src/gempyor/steps_source.py +++ b/gempyor_pkg/src/gempyor/steps_source.py @@ -32,8 +32,7 @@ ## Dimensions "int32," "int32," "int32," ## ncompartments ## nspatial_nodes ## Number of days ## Parameters - "float64[:, :, :]," ## Parameters [ nparameters x ndays x nspatial_nodes] - "float64," ## dt + "float64[:, :, :]," "float64," ## Parameters [ nparameters x ndays x nspatial_nodes] ## dt ## Transitions "int64[:, :]," ## transitions [ [source, destination, proportion_start, proportion_stop, rate] x ntransitions ] "int64[:, :]," ## proportions_info [ [sum_starts, sum_stops, exponent] x ntransition_proportions ] @@ -85,11 +84,7 @@ def steps_SEIR_nb( percent_day_away = 0.5 for spatial_node in range(nspatial_nodes): percent_who_move[spatial_node] = min( - mobility_data[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] - ].sum() + mobility_data[mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1]].sum() / population[spatial_node], 1, ) @@ -116,24 +111,18 @@ def steps_SEIR_nb( this_seeding_amounts = seeding_amounts[seeding_instance_idx] seeding_places = seeding_data["seeding_places"][seeding_instance_idx] seeding_sources = seeding_data["seeding_sources"][seeding_instance_idx] - seeding_destinations = seeding_data["seeding_destinations"][ - seeding_instance_idx - ] + seeding_destinations = seeding_data["seeding_destinations"][seeding_instance_idx] # this_seeding_amounts = this_seeding_amounts < states_next[seeding_sources] ? this_seeding_amounts : states_next[seeding_instance_idx] states_next[seeding_sources][seeding_places] -= this_seeding_amounts - states_next[seeding_sources][seeding_places] = states_next[ - seeding_sources - ][seeding_places] * (states_next[seeding_sources][seeding_places] > 0) - states_next[seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_next[seeding_sources][seeding_places] = states_next[seeding_sources][seeding_places] * ( + states_next[seeding_sources][seeding_places] > 0 + ) + states_next[seeding_destinations][seeding_places] += this_seeding_amounts total_seeded += this_seeding_amounts times_seeded += 1 # ADD TO cumulative, this is debatable, - states_daily_incid[today][seeding_destinations][ - seeding_places - ] += this_seeding_amounts + states_daily_incid[today][seeding_destinations][seeding_places] += this_seeding_amounts total_infected = 0 for transition_index in range(ntransitions): @@ -149,72 +138,52 @@ def steps_SEIR_nb( proportion_info[proportion_sum_starts_col][proportion_index], proportion_info[proportion_sum_stops_col][proportion_index], ): - relevant_number_in_comp += states_current[ - transition_sum_compartments[proportion_sum_index] - ] + relevant_number_in_comp += states_current[transition_sum_compartments[proportion_sum_index]] # exponents should not be a proportion, since we don't sum them over sum compartments - relevant_exponent = parameters[ - proportion_info[proportion_exponent_col][proportion_index] - ][today] + relevant_exponent = parameters[proportion_info[proportion_exponent_col][proportion_index]][today] if first_proportion: only_one_proportion = ( - transitions[transition_proportion_start_col][transition_index] - + 1 + transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] first_proportion = False source_number = relevant_number_in_comp if source_number.max() > 0: total_rate[source_number > 0] *= ( - source_number[source_number > 0] - ** relevant_exponent[source_number > 0] + source_number[source_number > 0] ** relevant_exponent[source_number > 0] / source_number[source_number > 0] ) if only_one_proportion: - total_rate *= parameters[ - transitions[transition_rate_col][transition_index] - ][today] + total_rate *= parameters[transitions[transition_rate_col][transition_index]][today] else: for spatial_node in range(nspatial_nodes): - proportion_keep_compartment = ( - 1 - percent_day_away * percent_who_move[spatial_node] - ) + proportion_keep_compartment = 1 - percent_day_away * percent_who_move[spatial_node] proportion_change_compartment = ( percent_day_away * mobility_data[ - mobility_data_indices[ - spatial_node - ] : mobility_data_indices[spatial_node + 1] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] / population[spatial_node] ) rate_keep_compartment = ( proportion_keep_compartment - * relevant_number_in_comp[spatial_node] - ** relevant_exponent[spatial_node] + * relevant_number_in_comp[spatial_node] ** relevant_exponent[spatial_node] / population[spatial_node] - * parameters[ - transitions[transition_rate_col][transition_index] - ][today][spatial_node] + * parameters[transitions[transition_rate_col][transition_index]][today][spatial_node] ) visiting_compartment = mobility_row_indices[ - mobility_data_indices[spatial_node] : mobility_data_indices[ - spatial_node + 1 - ] + mobility_data_indices[spatial_node] : mobility_data_indices[spatial_node + 1] ] rate_change_compartment = proportion_change_compartment rate_change_compartment *= ( - relevant_number_in_comp[visiting_compartment] - ** relevant_exponent[visiting_compartment] + relevant_number_in_comp[visiting_compartment] ** relevant_exponent[visiting_compartment] ) rate_change_compartment /= population[visiting_compartment] - rate_change_compartment *= parameters[ - transitions[transition_rate_col][transition_index] - ][today][visiting_compartment] - total_rate[spatial_node] *= ( - rate_keep_compartment + rate_change_compartment.sum() - ) + rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ + today + ][visiting_compartment] + total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() compound_adjusted_rate = 1.0 - np.exp(-dt * total_rate) @@ -252,22 +221,14 @@ def steps_SEIR_nb( for spatial_node in range(nspatial_nodes): if ( number_move[spatial_node] - > states_next[transitions[transition_source_col][transition_index]][ + > states_next[transitions[transition_source_col][transition_index]][spatial_node] + ): + number_move[spatial_node] = states_next[transitions[transition_source_col][transition_index]][ spatial_node ] - ): - number_move[spatial_node] = states_next[ - transitions[transition_source_col][transition_index] - ][spatial_node] - states_next[ - transitions[transition_source_col][transition_index] - ] -= number_move - states_next[ - transitions[transition_destination_col][transition_index] - ] += number_move - states_daily_incid[ - today, transitions[transition_destination_col][transition_index], : - ] += number_move + states_next[transitions[transition_source_col][transition_index]] -= number_move + states_next[transitions[transition_destination_col][transition_index]] += number_move + states_daily_incid[today, transitions[transition_destination_col][transition_index], :] += number_move states_current = states_next.copy() diff --git a/gempyor_pkg/src/gempyor/utils.py b/gempyor_pkg/src/gempyor/utils.py index dac2e09bb..701273b20 100644 --- a/gempyor_pkg/src/gempyor/utils.py +++ b/gempyor_pkg/src/gempyor/utils.py @@ -26,9 +26,7 @@ def write_df(fname: str, df: pd.DataFrame, extension: str = ""): df = pa.Table.from_pandas(df, preserve_index=False) pa.parquet.write_table(df, fname) else: - raise NotImplementedError( - f"Invalid extension {extension}. Must be 'csv' or 'parquet'" - ) + raise NotImplementedError(f"Invalid extension {extension}. Must be 'csv' or 'parquet'") def read_df(fname: str, extension: str = "") -> pd.DataFrame: @@ -42,9 +40,7 @@ def read_df(fname: str, extension: str = "") -> pd.DataFrame: elif extension == "parquet": df = pa.parquet.read_table(fname).to_pandas() else: - raise NotImplementedError( - f"Invalid extension {extension}. Must be 'csv' or 'parquet'" - ) + raise NotImplementedError(f"Invalid extension {extension}. Must be 'csv' or 'parquet'") return df @@ -68,9 +64,7 @@ def wrapper(*args, **kwargs): from functools import wraps -def profile( - output_file=None, sort_by="cumulative", lines_to_print=None, strip_dirs=False -): +def profile(output_file=None, sort_by="cumulative", lines_to_print=None, strip_dirs=False): """A time profiler decorator. Inspired by and modified the profile decorator of Giampaolo Rodola: http://code.activestate.com/recipes/577817-profile-decorator/ diff --git a/gempyor_pkg/tests/npi/test_npis.py b/gempyor_pkg/tests/npi/test_npis.py index 58877276a..57fcbd580 100644 --- a/gempyor_pkg/tests/npi/test_npis.py +++ b/gempyor_pkg/tests/npi/test_npis.py @@ -49,25 +49,17 @@ def test_full_npis_read_write(): # sim_id2write=1, s=inference_simulator.s, load_ID=False, sim_id2load=1 # ) - npi_outcomes = outcomes.build_npi_Outcomes( - inference_simulator.s, load_ID=False, sim_id2load=None, config=config - ) + npi_outcomes = outcomes.build_npi_Outcomes(inference_simulator.s, load_ID=False, sim_id2load=None, config=config) # npi_seir = seir.build_npi_SEIR( # inference_simulator.s, load_ID=False, sim_id2load=None, config=config # ) - inference_simulator.s.write_simID( - ftype="hnpi", sim_id=1, df=npi_outcomes.getReductionDF() - ) + inference_simulator.s.write_simID(ftype="hnpi", sim_id=1, df=npi_outcomes.getReductionDF()) - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() hnpi_read["reduction"] = np.random.random(len(hnpi_read)) * 2 - 1 out_hnpi = pa.Table.from_pandas(hnpi_read, preserve_index=False) - pa.parquet.write_table( - out_hnpi, file_paths.create_file_name(105, "", 1, "hnpi", "parquet") - ) + pa.parquet.write_table(out_hnpi, file_paths.create_file_name(105, "", 1, "hnpi", "parquet")) import random random.seed(10) @@ -88,19 +80,11 @@ def test_full_npis_read_write(): # sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 # ) - npi_outcomes = outcomes.build_npi_Outcomes( - inference_simulator.s, load_ID=True, sim_id2load=1, config=config - ) - inference_simulator.s.write_simID( - ftype="hnpi", sim_id=1, df=npi_outcomes.getReductionDF() - ) + npi_outcomes = outcomes.build_npi_Outcomes(inference_simulator.s, load_ID=True, sim_id2load=1, config=config) + inference_simulator.s.write_simID(ftype="hnpi", sim_id=1, df=npi_outcomes.getReductionDF()) - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() # runs with the new, random NPI @@ -119,17 +103,9 @@ def test_full_npis_read_write(): # sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 # ) - npi_outcomes = outcomes.build_npi_Outcomes( - inference_simulator.s, load_ID=True, sim_id2load=1, config=config - ) - inference_simulator.s.write_simID( - ftype="hnpi", sim_id=1, df=npi_outcomes.getReductionDF() - ) + npi_outcomes = outcomes.build_npi_Outcomes(inference_simulator.s, load_ID=True, sim_id2load=1, config=config) + inference_simulator.s.write_simID(ftype="hnpi", sim_id=1, df=npi_outcomes.getReductionDF()) - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.107.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.107.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() diff --git a/gempyor_pkg/tests/outcomes/make_seir_test_file.py b/gempyor_pkg/tests/outcomes/make_seir_test_file.py index a66239d32..8551774d2 100644 --- a/gempyor_pkg/tests/outcomes/make_seir_test_file.py +++ b/gempyor_pkg/tests/outcomes/make_seir_test_file.py @@ -36,9 +36,7 @@ prefix = "" sim_id = 1 -a = pd.read_parquet( - file_paths.create_file_name(run_id, prefix, sim_id, "seir", "parquet") -) +a = pd.read_parquet(file_paths.create_file_name(run_id, prefix, sim_id, "seir", "parquet")) print(a) # created by running SEIR test_seir.py (comment line 530 to remove file tree) first b = pd.read_parquet("../../SEIR/test/model_output/seir/000000101.test.seir.parquet") @@ -56,9 +54,7 @@ diffI = np.arange(5) * 2 date_data = datetime.date(2020, 4, 15) for i in range(5): - b.loc[ - (b["mc_value_type"] == "incidence") & (b["date"] == str(date_data)), geoid[i] - ] = diffI[i] + b.loc[(b["mc_value_type"] == "incidence") & (b["date"] == str(date_data)), geoid[i]] = diffI[i] pa_df = pa.Table.from_pandas(b, preserve_index=False) pa.parquet.write_table(pa_df, "new_test_no_vacc.parquet") diff --git a/gempyor_pkg/tests/outcomes/test_outcomes.py b/gempyor_pkg/tests/outcomes/test_outcomes.py index a25164712..94c354f20 100644 --- a/gempyor_pkg/tests/outcomes/test_outcomes.py +++ b/gempyor_pkg/tests/outcomes/test_outcomes.py @@ -41,146 +41,83 @@ def test_outcomes_scenario(): stoch_traj_flag=False, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=False - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=False) - hosp = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.1.hosp.parquet" - ).to_pandas() + hosp = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.1.hosp.parquet").to_pandas() hosp.set_index("time", drop=True, inplace=True) for i, place in enumerate(geoid): for dt in hosp.index: if dt == date_data: assert hosp[hosp["geoid"] == place]["incidI"][dt] == diffI[i] - assert ( - hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] - == diffI[i] * 0.1 - ) - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] - == diffI[i] * 0.01 - ) - assert ( - hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] - == diffI[i] * 0.1 * 0.4 - ) + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == diffI[i] * 0.01 + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 * 0.4 for j in range(7): - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + j) - ] - == diffI[i] * 0.1 - ) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + 8) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + j)] == diffI[i] * 0.1 + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + 8)] == 0 elif dt < date_data: - assert ( - hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == 0 assert hosp[hosp["geoid"] == place]["incidI"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == 0 + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7)] == 0 elif dt > (date_data + datetime.timedelta(7)): assert hosp[hosp["geoid"] == place]["incidH"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] == 0 assert hosp[hosp["geoid"] == place]["incidICU"][dt] == 0 - hpar = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.1.hpar.parquet" - ).to_pandas() + hpar = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.1.hpar.parquet").to_pandas() for i, place in enumerate(geoid): assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.1 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "delay")]["value"] ) == 7 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "duration") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "duration")][ + "value" + ] ) == 7 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidD") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidD") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.01 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidD") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidD") & (hpar["quantity"] == "delay")]["value"] ) == 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidICU") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidICU") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.4 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidICU") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidICU") & (hpar["quantity"] == "delay")][ + "value" + ] ) == 0 ) @@ -197,23 +134,15 @@ def test_outcomes_scenario_with_load(): stoch_traj_flag=False, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=False - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=False) - hpar_config = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.1.hpar.parquet" - ).to_pandas() - hpar_rel = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.2.hpar.parquet" - ).to_pandas() + hpar_config = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.1.hpar.parquet").to_pandas() + hpar_rel = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.2.hpar.parquet").to_pandas() for out in ["incidH", "incidD", "incidICU"]: for i, place in enumerate(geoid): a = hpar_rel[(hpar_rel["outcome"] == out) & (hpar_rel["geoid"] == place)] - b = hpar_config[ - (hpar_rel["outcome"] == out) & (hpar_config["geoid"] == place) - ] + b = hpar_config[(hpar_rel["outcome"] == out) & (hpar_config["geoid"] == place)] assert len(a) == len(b) for j in range(len(a)): if b.iloc[j]["quantity"] in ["delay", "duration"]: @@ -241,30 +170,16 @@ def test_outcomes_read_write_hpar(): stoch_traj_flag=False, out_run_id=3, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hpar_read = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.2.hpar.parquet" - ).to_pandas() - hpar_wrote = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.3.hpar.parquet" - ).to_pandas() + hpar_read = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.2.hpar.parquet").to_pandas() + hpar_wrote = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.3.hpar.parquet").to_pandas() assert (hpar_read == hpar_wrote).all().all() - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.2.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.3.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.2.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.3.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() - hosp_read = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.2.hosp.parquet" - ).to_pandas() - hosp_wrote = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.3.hosp.parquet" - ).to_pandas() + hosp_read = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.2.hosp.parquet").to_pandas() + hosp_wrote = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.3.hosp.parquet").to_pandas() assert (hosp_read == hosp_wrote).all().all() @@ -283,141 +198,68 @@ def test_outcomes_scenario_subclasses(): outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s) - hosp = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.10.hosp.parquet" - ).to_pandas() + hosp = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.10.hosp.parquet").to_pandas() hosp.set_index("time", drop=True, inplace=True) for i, place in enumerate(geoid): for dt in hosp.index: if dt == date_data: assert hosp[hosp["geoid"] == place]["incidI"][dt] == diffI[i] - assert hosp[hosp["geoid"] == place]["incidH"][ - dt + datetime.timedelta(7) - ] == diffI[i] * 0.1 * len(subclasses) - assert hosp[hosp["geoid"] == place]["incidD"][ - dt + datetime.timedelta(2) - ] == diffI[i] * 0.01 * len(subclasses) - assert hosp[hosp["geoid"] == place]["incidICU"][ - dt + datetime.timedelta(7) - ] == diffI[i] * 0.1 * 0.4 * len(subclasses) - for j in range(7): - assert hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + j) - ] == diffI[i] * 0.1 * len(subclasses) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + 8) - ] - == 0 + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 * len( + subclasses + ) + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == diffI[i] * 0.01 * len( + subclasses ) + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == diffI[ + i + ] * 0.1 * 0.4 * len(subclasses) + for j in range(7): + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + j)] == diffI[ + i + ] * 0.1 * len(subclasses) + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + 8)] == 0 elif dt < date_data: - assert ( - hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == 0 assert hosp[hosp["geoid"] == place]["incidI"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == 0 + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7)] == 0 elif dt > (date_data + datetime.timedelta(7)): assert hosp[hosp["geoid"] == place]["incidH"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] == 0 assert hosp[hosp["geoid"] == place]["incidICU"][dt] == 0 for cl in subclasses: for i, place in enumerate(geoid): for dt in hosp.index: if dt == date_data: + assert hosp[hosp["geoid"] == place][f"incidH{cl}"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 + assert hosp[hosp["geoid"] == place][f"incidD{cl}"][dt + datetime.timedelta(2)] == diffI[i] * 0.01 assert ( - hosp[hosp["geoid"] == place][f"incidH{cl}"][ - dt + datetime.timedelta(7) - ] - == diffI[i] * 0.1 - ) - assert ( - hosp[hosp["geoid"] == place][f"incidD{cl}"][ - dt + datetime.timedelta(2) - ] - == diffI[i] * 0.01 - ) - assert ( - hosp[hosp["geoid"] == place][f"incidICU{cl}"][ - dt + datetime.timedelta(7) - ] + hosp[hosp["geoid"] == place][f"incidICU{cl}"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 * 0.4 ) for j in range(7): assert ( - hosp[hosp["geoid"] == place][f"hosp_curr{cl}"][ - dt + datetime.timedelta(7 + j) - ] + hosp[hosp["geoid"] == place][f"hosp_curr{cl}"][dt + datetime.timedelta(7 + j)] == diffI[i] * 0.1 ) - assert ( - hosp[hosp["geoid"] == place][f"hosp_curr{cl}"][ - dt + datetime.timedelta(7 + 8) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place][f"hosp_curr{cl}"][dt + datetime.timedelta(7 + 8)] == 0 elif dt < date_data: - assert ( - hosp[hosp["geoid"] == place][f"incidH{cl}"][ - dt + datetime.timedelta(7) - ] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place][f"incidD{cl}"][ - dt + datetime.timedelta(2) - ] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place][f"incidICU{cl}"][ - dt + datetime.timedelta(7) - ] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place][f"hosp_curr{cl}"][ - dt + datetime.timedelta(7) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place][f"incidH{cl}"][dt + datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place][f"incidD{cl}"][dt + datetime.timedelta(2)] == 0 + assert hosp[hosp["geoid"] == place][f"incidICU{cl}"][dt + datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place][f"hosp_curr{cl}"][dt + datetime.timedelta(7)] == 0 elif dt > (date_data + datetime.timedelta(7)): assert hosp[hosp["geoid"] == place][f"incidH{cl}"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place][f"incidD{cl}"][ - dt - datetime.timedelta(4) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place][f"incidD{cl}"][dt - datetime.timedelta(4)] == 0 assert hosp[hosp["geoid"] == place][f"incidICU{cl}"][dt] == 0 - hpar = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.10.hpar.parquet" - ).to_pandas() + hpar = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.10.hpar.parquet").to_pandas() for cl in subclasses: for i, place in enumerate(geoid): assert ( @@ -432,20 +274,16 @@ def test_outcomes_scenario_subclasses(): ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == f"incidH{cl}") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == f"incidH{cl}") & (hpar["quantity"] == "delay")][ + "value" + ] ) == 7 ) assert ( float( hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == f"incidH{cl}") - & (hpar["quantity"] == "duration") + (hpar["geoid"] == place) & (hpar["outcome"] == f"incidH{cl}") & (hpar["quantity"] == "duration") ]["value"] ) == 7 @@ -462,11 +300,9 @@ def test_outcomes_scenario_subclasses(): ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == f"incidD{cl}") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == f"incidD{cl}") & (hpar["quantity"] == "delay")][ + "value" + ] ) == 2 ) @@ -483,9 +319,7 @@ def test_outcomes_scenario_subclasses(): assert ( float( hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == f"incidICU{cl}") - & (hpar["quantity"] == "delay") + (hpar["geoid"] == place) & (hpar["outcome"] == f"incidICU{cl}") & (hpar["quantity"] == "delay") ]["value"] ) == 0 @@ -509,21 +343,13 @@ def test_outcomes_scenario_with_load_subclasses(): outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s) - hpar_config = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.10.hpar.parquet" - ).to_pandas() - hpar_rel = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.11.hpar.parquet" - ).to_pandas() + hpar_config = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.10.hpar.parquet").to_pandas() + hpar_rel = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.11.hpar.parquet").to_pandas() for cl in subclasses: for out in [f"incidH{cl}", f"incidD{cl}", f"incidICU{cl}"]: for i, place in enumerate(geoid): - a = hpar_rel[ - (hpar_rel["outcome"] == out) & (hpar_rel["geoid"] == place) - ] - b = hpar_config[ - (hpar_rel["outcome"] == out) & (hpar_config["geoid"] == place) - ] + a = hpar_rel[(hpar_rel["outcome"] == out) & (hpar_rel["geoid"] == place)] + b = hpar_config[(hpar_rel["outcome"] == out) & (hpar_config["geoid"] == place)] assert len(a) == len(b) for j in range(len(a)): if b.iloc[j]["quantity"] in ["delay", "duration"]: @@ -539,13 +365,9 @@ def test_outcomes_scenario_with_load_subclasses(): elif b.iloc[j]["outcome"] == f"incidICU{cl}": assert a.iloc[j]["value"] == b.iloc[j]["value"] * 0.4 elif b.iloc[j]["outcome"] == f"incidH{cl}": - assert a.iloc[j]["value"] == b.iloc[j]["value"] * ( - diffI[i] * 0.1 + add - ) + assert a.iloc[j]["value"] == b.iloc[j]["value"] * (diffI[i] * 0.1 + add) - hosp_rel = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.11.hosp.parquet" - ).to_pandas() + hosp_rel = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.11.hosp.parquet").to_pandas() assert (hosp_rel["incidH"] == hosp_rel["incidH_A"] + hosp_rel["incidH_B"]).all() @@ -574,24 +396,14 @@ def test_outcomes_read_write_hpar_subclasses(): out_run_id=13, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hpar_read = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.12.hpar.parquet" - ).to_pandas() - hpar_wrote = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.13.hpar.parquet" - ).to_pandas() + hpar_read = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.12.hpar.parquet").to_pandas() + hpar_wrote = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.13.hpar.parquet").to_pandas() assert (hpar_read == hpar_wrote).all().all() - hosp_read = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.12.hosp.parquet" - ).to_pandas() - hosp_wrote = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.13.hosp.parquet" - ).to_pandas() + hosp_read = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.12.hosp.parquet").to_pandas() + hosp_wrote = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.13.hosp.parquet").to_pandas() assert (hosp_read == hosp_wrote).all().all() @@ -612,9 +424,7 @@ def test_multishift_notstochdelays(): [36, 29], ] ) - shifts = np.array( - [[1, 0], [2, 1], [1, 0], [2, 2], [1, 2], [0, 1], [1, 1], [1, 2], [1, 2], [1, 0]] - ) + shifts = np.array([[1, 0], [2, 1], [1, 0], [2, 2], [1, 2], [0, 1], [1, 1], [1, 2], [1, 2], [1, 0]]) expected = np.array( [ [0, 39], @@ -629,9 +439,7 @@ def test_multishift_notstochdelays(): [12, 32], ] ) - assert ( - outcomes.multishift(array, shifts, stoch_delay_flag=False) == expected - ).all() + assert (outcomes.multishift(array, shifts, stoch_delay_flag=False) == expected).all() def test_outcomes_npi(): @@ -648,144 +456,83 @@ def test_outcomes_npi(): ) outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s) - hosp = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet" - ).to_pandas() + hosp = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet").to_pandas() hosp.set_index("time", drop=True, inplace=True) # same as config.yaml (doubled, then NPI halve it) for i, place in enumerate(geoid): for dt in hosp.index: if dt == date_data: assert hosp[hosp["geoid"] == place]["incidI"][dt] == diffI[i] - assert ( - hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] - == diffI[i] * 0.1 - ) - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] - == diffI[i] * 0.01 - ) - assert ( - hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] - == diffI[i] * 0.1 * 0.4 - ) + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == diffI[i] * 0.01 + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 * 0.4 for j in range(7): - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + j) - ] - == diffI[i] * 0.1 - ) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + 8) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + j)] == diffI[i] * 0.1 + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + 8)] == 0 elif dt < date_data: - assert ( - hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == 0 assert hosp[hosp["geoid"] == place]["incidI"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == 0 + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7)] == 0 elif dt > (date_data + datetime.timedelta(7)): assert hosp[hosp["geoid"] == place]["incidH"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] == 0 assert hosp[hosp["geoid"] == place]["incidICU"][dt] == 0 - hpar = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet" - ).to_pandas() + hpar = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet").to_pandas() # Doubled everything from previous config.yaml for i, place in enumerate(geoid): assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.1 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "delay")]["value"] ) == 7 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "duration") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "duration")][ + "value" + ] ) == 7 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidD") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidD") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.01 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidD") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidD") & (hpar["quantity"] == "delay")]["value"] ) == 2 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidICU") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidICU") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.4 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidICU") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidICU") & (hpar["quantity"] == "delay")][ + "value" + ] ) == 0 * 2 ) @@ -804,31 +551,17 @@ def test_outcomes_read_write_hnpi(): out_run_id=106, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hpar_read = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet" - ).to_pandas() - hpar_wrote = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet" - ).to_pandas() + hpar_read = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet").to_pandas() + hpar_wrote = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet").to_pandas() assert (hpar_read == hpar_wrote).all().all() - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() - hosp_read = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet" - ).to_pandas() - hosp_wrote = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet" - ).to_pandas() + hosp_read = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet").to_pandas() + hosp_wrote = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet").to_pandas() assert (hosp_read == hosp_wrote).all().all() @@ -845,27 +578,17 @@ def test_outcomes_read_write_hnpi2(): out_run_id=106, ) - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() hnpi_read["reduction"] = np.random.random(len(hnpi_read)) * 2 - 1 out_hnpi = pa.Table.from_pandas(hnpi_read, preserve_index=False) - pa.parquet.write_table( - out_hnpi, file_paths.create_file_name(105, "", 1, "hnpi", "parquet") - ) + pa.parquet.write_table(out_hnpi, file_paths.create_file_name(105, "", 1, "hnpi", "parquet")) import random random.seed(10) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() # runs with the new, random NPI @@ -878,30 +601,16 @@ def test_outcomes_read_write_hnpi2(): stoch_traj_flag=False, out_run_id=107, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hpar_read = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet" - ).to_pandas() - hpar_wrote = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.107.hpar.parquet" - ).to_pandas() + hpar_read = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet").to_pandas() + hpar_wrote = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.107.hpar.parquet").to_pandas() assert (hpar_read == hpar_wrote).all().all() - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.107.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.107.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() - hosp_read = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet" - ).to_pandas() - hosp_wrote = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.107.hosp.parquet" - ).to_pandas() + hosp_read = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet").to_pandas() + hosp_wrote = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.107.hosp.parquet").to_pandas() assert (hosp_read == hosp_wrote).all().all() @@ -917,148 +626,85 @@ def test_outcomes_npi_custom_pname(): stoch_traj_flag=False, out_run_id=105, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=False, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=False, sim_id2load=1) - hosp = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet" - ).to_pandas() + hosp = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet").to_pandas() hosp.set_index("time", drop=True, inplace=True) # same as config.yaml (doubled, then NPI halve it) for i, place in enumerate(geoid): for dt in hosp.index: if dt == date_data: assert hosp[hosp["geoid"] == place]["incidI"][dt] == diffI[i] - assert ( - hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] - == diffI[i] * 0.1 - ) - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] - == diffI[i] * 0.01 - ) - assert ( - hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] - == diffI[i] * 0.1 * 0.4 - ) + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == diffI[i] * 0.01 + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == diffI[i] * 0.1 * 0.4 for j in range(7): - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + j) - ] - == diffI[i] * 0.1 - ) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7 + 8) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + j)] == diffI[i] * 0.1 + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7 + 8)] == 0 elif dt < date_data: - assert ( - hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidH"][dt + datetime.timedelta(7)] == 0 assert hosp[hosp["geoid"] == place]["incidI"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["hosp_curr"][ - dt + datetime.timedelta(7) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidD"][dt + datetime.timedelta(2)] == 0 + assert hosp[hosp["geoid"] == place]["incidICU"][dt + datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["hosp_curr"][dt + datetime.timedelta(7)] == 0 elif dt > (date_data + datetime.timedelta(7)): assert hosp[hosp["geoid"] == place]["incidH"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] - == 0 - ) + assert hosp[hosp["geoid"] == place]["incidI"][dt - datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place]["incidD"][dt - datetime.timedelta(4)] == 0 assert hosp[hosp["geoid"] == place]["incidICU"][dt] == 0 - hpar = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet" - ).to_pandas() + hpar = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet").to_pandas() # Doubled everything from previous config.yaml for i, place in enumerate(geoid): assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.1 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "delay")]["value"] ) == 7 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidH") - & (hpar["quantity"] == "duration") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidH") & (hpar["quantity"] == "duration")][ + "value" + ] ) == 7 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidD") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidD") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.01 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidD") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidD") & (hpar["quantity"] == "delay")]["value"] ) == 2 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidICU") - & (hpar["quantity"] == "probability") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidICU") & (hpar["quantity"] == "probability")][ + "value" + ] ) == 0.4 * 2 ) assert ( float( - hpar[ - (hpar["geoid"] == place) - & (hpar["outcome"] == "incidICU") - & (hpar["quantity"] == "delay") - ]["value"] + hpar[(hpar["geoid"] == place) & (hpar["outcome"] == "incidICU") & (hpar["quantity"] == "delay")][ + "value" + ] ) == 0 * 2 ) @@ -1077,30 +723,16 @@ def test_outcomes_read_write_hnpi_custom_pname(): out_run_id=106, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hpar_read = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet" - ).to_pandas() - hpar_wrote = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet" - ).to_pandas() + hpar_read = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.105.hpar.parquet").to_pandas() + hpar_wrote = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet").to_pandas() assert (hpar_read == hpar_wrote).all().all() - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() - hosp_read = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet" - ).to_pandas() - hosp_wrote = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet" - ).to_pandas() + hosp_read = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.105.hosp.parquet").to_pandas() + hosp_wrote = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet").to_pandas() assert (hosp_read == hosp_wrote).all().all() @@ -1109,14 +741,10 @@ def test_outcomes_read_write_hnpi2_custom_pname(): prefix = "" - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() hnpi_read["reduction"] = np.random.random(len(hnpi_read)) * 2 - 1 out_hnpi = pa.Table.from_pandas(hnpi_read, preserve_index=False) - pa.parquet.write_table( - out_hnpi, file_paths.create_file_name(105, prefix, 1, "hnpi", "parquet") - ) + pa.parquet.write_table(out_hnpi, file_paths.create_file_name(105, prefix, 1, "hnpi", "parquet")) import random random.seed(10) @@ -1131,16 +759,10 @@ def test_outcomes_read_write_hnpi2_custom_pname(): out_run_id=106, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.105.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() # runs with the new, random NPI @@ -1154,30 +776,16 @@ def test_outcomes_read_write_hnpi2_custom_pname(): out_run_id=107, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hpar_read = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet" - ).to_pandas() - hpar_wrote = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.107.hpar.parquet" - ).to_pandas() + hpar_read = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.106.hpar.parquet").to_pandas() + hpar_wrote = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.107.hpar.parquet").to_pandas() assert (hpar_read == hpar_wrote).all().all() - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.107.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.106.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.107.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() - hosp_read = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet" - ).to_pandas() - hosp_wrote = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.107.hosp.parquet" - ).to_pandas() + hosp_read = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.106.hosp.parquet").to_pandas() + hosp_wrote = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.107.hosp.parquet").to_pandas() assert (hosp_read == hosp_wrote).all().all() @@ -1196,25 +804,17 @@ def test_outcomes_pcomp(): ) p_compmult = [1, 3] - seir = pq.read_table( - f"{config_path_prefix}model_output/seir/000000001.105.seir.parquet" - ).to_pandas() + seir = pq.read_table(f"{config_path_prefix}model_output/seir/000000001.105.seir.parquet").to_pandas() seir2 = seir.copy() seir2["mc_vaccination_stage"] = "first_dose" for pl in geoid: seir2[pl] = seir2[pl] * p_compmult[1] new_seir = pd.concat([seir, seir2]) out_df = pa.Table.from_pandas(new_seir, preserve_index=False) - pa.parquet.write_table( - out_df, file_paths.create_file_name(110, prefix, 1, "seir", "parquet") - ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=False - ) + pa.parquet.write_table(out_df, file_paths.create_file_name(110, prefix, 1, "seir", "parquet")) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=False) - hosp_f = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.111.hosp.parquet" - ).to_pandas() + hosp_f = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.111.hosp.parquet").to_pandas() hosp_f.set_index("time", drop=True, inplace=True) # same as config.yaml (doubled, then NPI halve it) for k, p_comp in enumerate(["0dose", "1dose"]): @@ -1222,90 +822,42 @@ def test_outcomes_pcomp(): for i, place in enumerate(geoid): for dt in hosp.index: if dt == date_data: + assert hosp[hosp["geoid"] == place][f"incidI_{p_comp}"][dt] == diffI[i] * p_compmult[k] assert ( - hosp[hosp["geoid"] == place][f"incidI_{p_comp}"][dt] - == diffI[i] * p_compmult[k] - ) - assert ( - hosp[hosp["geoid"] == place][f"incidH_{p_comp}"][ - dt + datetime.timedelta(7) - ] + hosp[hosp["geoid"] == place][f"incidH_{p_comp}"][dt + datetime.timedelta(7)] - diffI[i] * 0.1 * p_compmult[k] < 1e-8 ) assert ( - hosp[hosp["geoid"] == place][f"incidD_{p_comp}"][ - dt + datetime.timedelta(2) - ] + hosp[hosp["geoid"] == place][f"incidD_{p_comp}"][dt + datetime.timedelta(2)] - diffI[i] * 0.01 * p_compmult[k] < 1e-8 ) assert ( - hosp[hosp["geoid"] == place][f"incidICU_{p_comp}"][ - dt + datetime.timedelta(7) - ] + hosp[hosp["geoid"] == place][f"incidICU_{p_comp}"][dt + datetime.timedelta(7)] - diffI[i] * 0.1 * 0.4 * p_compmult[k] < 1e-8 ) for j in range(7): assert ( - hosp[hosp["geoid"] == place][f"incidH_{p_comp}_curr"][ - dt + datetime.timedelta(7 + j) - ] + hosp[hosp["geoid"] == place][f"incidH_{p_comp}_curr"][dt + datetime.timedelta(7 + j)] - diffI[i] * 0.1 * p_compmult[k] < 1e-8 ) - assert ( - hosp[hosp["geoid"] == place][f"incidH_{p_comp}_curr"][ - dt + datetime.timedelta(7 + 8) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place][f"incidH_{p_comp}_curr"][dt + datetime.timedelta(7 + 8)] == 0 elif dt < date_data: - assert ( - hosp[hosp["geoid"] == place][f"incidH_{p_comp}"][ - dt + datetime.timedelta(7) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place][f"incidH_{p_comp}"][dt + datetime.timedelta(7)] == 0 assert hosp[hosp["geoid"] == place][f"incidI_{p_comp}"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place][f"incidD_{p_comp}"][ - dt + datetime.timedelta(2) - ] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place][f"incidICU_{p_comp}"][ - dt + datetime.timedelta(7) - ] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place][f"incidH_{p_comp}_curr"][ - dt + datetime.timedelta(7) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place][f"incidD_{p_comp}"][dt + datetime.timedelta(2)] == 0 + assert hosp[hosp["geoid"] == place][f"incidICU_{p_comp}"][dt + datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place][f"incidH_{p_comp}_curr"][dt + datetime.timedelta(7)] == 0 elif dt > (date_data + datetime.timedelta(7)): assert hosp[hosp["geoid"] == place][f"incidH_{p_comp}"][dt] == 0 - assert ( - hosp[hosp["geoid"] == place][f"incidI_{p_comp}"][ - dt - datetime.timedelta(7) - ] - == 0 - ) - assert ( - hosp[hosp["geoid"] == place][f"incidD_{p_comp}"][ - dt - datetime.timedelta(4) - ] - == 0 - ) + assert hosp[hosp["geoid"] == place][f"incidI_{p_comp}"][dt - datetime.timedelta(7)] == 0 + assert hosp[hosp["geoid"] == place][f"incidD_{p_comp}"][dt - datetime.timedelta(4)] == 0 assert hosp[hosp["geoid"] == place][f"incidICU_{p_comp}"][dt] == 0 - hpar_f = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.111.hpar.parquet" - ).to_pandas() + hpar_f = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.111.hpar.parquet").to_pandas() # Doubled everything from previous config.yaml # for k, p_comp in enumerate(["unvaccinated", "first_dose"]): for k, p_comp in enumerate(["0dose", "1dose"]): @@ -1396,30 +948,16 @@ def test_outcomes_pcomp_read_write(): out_run_id=112, ) - outcomes.onerun_delayframe_outcomes( - sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1 - ) + outcomes.onerun_delayframe_outcomes(sim_id2write=1, s=inference_simulator.s, load_ID=True, sim_id2load=1) - hpar_read = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.111.hpar.parquet" - ).to_pandas() - hpar_wrote = pq.read_table( - f"{config_path_prefix}model_output/hpar/000000001.112.hpar.parquet" - ).to_pandas() + hpar_read = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.111.hpar.parquet").to_pandas() + hpar_wrote = pq.read_table(f"{config_path_prefix}model_output/hpar/000000001.112.hpar.parquet").to_pandas() assert (hpar_read == hpar_wrote).all().all() - hnpi_read = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.111.hnpi.parquet" - ).to_pandas() - hnpi_wrote = pq.read_table( - f"{config_path_prefix}model_output/hnpi/000000001.112.hnpi.parquet" - ).to_pandas() + hnpi_read = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.111.hnpi.parquet").to_pandas() + hnpi_wrote = pq.read_table(f"{config_path_prefix}model_output/hnpi/000000001.112.hnpi.parquet").to_pandas() assert (hnpi_read == hnpi_wrote).all().all() - hosp_read = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.111.hosp.parquet" - ).to_pandas() - hosp_wrote = pq.read_table( - f"{config_path_prefix}model_output/hosp/000000001.112.hosp.parquet" - ).to_pandas() + hosp_read = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.111.hosp.parquet").to_pandas() + hosp_wrote = pq.read_table(f"{config_path_prefix}model_output/hosp/000000001.112.hosp.parquet").to_pandas() assert (hosp_read == hosp_wrote).all().all() diff --git a/gempyor_pkg/tests/seir/dev_new_test.py b/gempyor_pkg/tests/seir/dev_new_test.py index de6af8a06..d33503f38 100644 --- a/gempyor_pkg/tests/seir/dev_new_test.py +++ b/gempyor_pkg/tests/seir/dev_new_test.py @@ -19,7 +19,7 @@ os.chdir(os.path.dirname(__file__)) -#def test_parameters_from_timeserie_file(): +# def test_parameters_from_timeserie_file(): if True: config.clear() config.read(user=False) @@ -33,32 +33,27 @@ stoch_traj_flag=False, ) - #p = parameters.Parameters( + # p = parameters.Parameters( # parameter_config=config["seir"]["parameters"], config_version="v2") p = inference_simulator.s.parameters - p_draw = p.parameters_quick_draw(n_days=inference_simulator.s.n_days, - nnodes=inference_simulator.s.nnodes) + p_draw = p.parameters_quick_draw(n_days=inference_simulator.s.n_days, nnodes=inference_simulator.s.nnodes) p_df = p.getParameterDF(p_draw)["parameter"] for pn in p.pnames: - if pn == 'R0s': - assert (pn not in p_df) + if pn == "R0s": + assert pn not in p_df else: - assert(pn in p_df) + assert pn in p_df initial_df = read_df("data/r0s_ts.csv").set_index("date") - assert((p_draw[p.pnames2pindex['R0s']] == initial_df.values).all()) + assert (p_draw[p.pnames2pindex["R0s"]] == initial_df.values).all() ### test what happen when the order of geoids is not respected (expected: reput them in order) - ### test what happens with incomplete data (expected: fail) ### test what happens when loading from file - #write_df(fname="test_pwrite.parquet", df=p.getParameterDF(p_draw=p_draw)) - - - + # write_df(fname="test_pwrite.parquet", df=p.getParameterDF(p_draw=p_draw)) diff --git a/gempyor_pkg/tests/seir/interface.ipynb b/gempyor_pkg/tests/seir/interface.ipynb index 96447164b..f2ae64590 100644 --- a/gempyor_pkg/tests/seir/interface.ipynb +++ b/gempyor_pkg/tests/seir/interface.ipynb @@ -229,9 +229,7 @@ "sim_id2write = 0\n", "\n", "\n", - "gempyor_simulator.outcomes_parameters = outcomes.read_parameters_from_config(\n", - " gempyor_simulator.s\n", - ")\n", + "gempyor_simulator.outcomes_parameters = outcomes.read_parameters_from_config(gempyor_simulator.s)\n", "\n", "npi_outcomes = None\n", "\n", @@ -243,9 +241,7 @@ " gempyor_simulator.proportion_info,\n", " ) = gempyor_simulator.s.compartments.get_transition_array()\n", " gempyor_simulator.already_built = True\n", - "npi_seir = seir.build_npi_SEIR(\n", - " s=gempyor_simulator.s, load_ID=load_ID, sim_id2load=sim_id2load, config=config\n", - ")\n", + "npi_seir = seir.build_npi_SEIR(s=gempyor_simulator.s, load_ID=load_ID, sim_id2load=sim_id2load, config=config)\n", "if gempyor_simulator.s.npi_config_outcomes:\n", " npi_outcomes = outcomes.build_npi_Outcomes(\n", " s=gempyor_simulator.s,\n", @@ -257,16 +253,12 @@ "### Run every time:\n", "with Timer(\"onerun_SEIR.seeding\"):\n", " if load_ID:\n", - " initial_conditions = gempyor_simulator.s.seedingAndIC.load_ic(\n", - " sim_id2load, setup=gempyor_simulator.s\n", - " )\n", + " initial_conditions = gempyor_simulator.s.seedingAndIC.load_ic(sim_id2load, setup=gempyor_simulator.s)\n", " seeding_data, seeding_amounts = gempyor_simulator.s.seedingAndIC.load_seeding(\n", " sim_id2load, setup=gempyor_simulator.s\n", " )\n", " else:\n", - " initial_conditions = gempyor_simulator.s.seedingAndIC.draw_ic(\n", - " sim_id2write, setup=gempyor_simulator.s\n", - " )\n", + " initial_conditions = gempyor_simulator.s.seedingAndIC.draw_ic(sim_id2write, setup=gempyor_simulator.s)\n", " seeding_data, seeding_amounts = gempyor_simulator.s.seedingAndIC.draw_seeding(\n", " sim_id2write, setup=gempyor_simulator.s\n", " )\n", diff --git a/gempyor_pkg/tests/seir/test_compartments.py b/gempyor_pkg/tests/seir/test_compartments.py index dccef2bde..1db824981 100644 --- a/gempyor_pkg/tests/seir/test_compartments.py +++ b/gempyor_pkg/tests/seir/test_compartments.py @@ -46,9 +46,7 @@ def test_check_transitions_parquet_writing_and_loading(): lhs = compartments.Compartments(seir_config=config["seir"]) temp_compartments_file = f"{DATA_DIR}/parsed_compartment_compartments.test.parquet" temp_transitions_file = f"{DATA_DIR}/parsed_compartment_transitions.test.parquet" - lhs.toFile( - compartments_file=temp_compartments_file, transitions_file=temp_transitions_file - ) + lhs.toFile(compartments_file=temp_compartments_file, transitions_file=temp_transitions_file) rhs = compartments.Compartments( seir_config=config["seir"], compartments_file=temp_compartments_file, diff --git a/gempyor_pkg/tests/seir/test_new_seir.py b/gempyor_pkg/tests/seir/test_new_seir.py index 3836802e7..ee01cd0e9 100644 --- a/gempyor_pkg/tests/seir/test_new_seir.py +++ b/gempyor_pkg/tests/seir/test_new_seir.py @@ -47,9 +47,7 @@ def test_constant_population(): initial_conditions = s.seedingAndIC.draw_ic(sim_id=0, setup=s) seeding_data, seeding_amounts = s.seedingAndIC.load_seeding(sim_id=100, setup=s) - npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames - ) + npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) parameters = s.parameters.parameters_quick_draw(n_days=s.n_days, nnodes=s.nnodes) parameter_names = [x for x in s.parameters.pnames] @@ -61,9 +59,7 @@ def test_constant_population(): proportion_array, proportion_info, ) = s.compartments.get_transition_array() - parsed_parameters = s.compartments.parse_parameters( - parameters, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(parameters, s.parameters.pnames, unique_strings) print("RUN_FUN_END") print(proportion_array) diff --git a/gempyor_pkg/tests/seir/test_parameters.py b/gempyor_pkg/tests/seir/test_parameters.py index 43a90a31f..84dd14e6d 100644 --- a/gempyor_pkg/tests/seir/test_parameters.py +++ b/gempyor_pkg/tests/seir/test_parameters.py @@ -24,12 +24,12 @@ def test_parameters_from_config_plus_read_write(): config.set_file(f"{DATA_DIR}/config_compartmental_model_format.yml") # Would be better to build a setup ss = setup.SpatialSetup( - setup_name="test_seir", - geodata_file=f"{DATA_DIR}/geodata.csv", - mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", - nodenames_key="geoid", -) + setup_name="test_seir", + geodata_file=f"{DATA_DIR}/geodata.csv", + mobility_file=f"{DATA_DIR}/mobility.txt", + popnodes_key="population", + nodenames_key="geoid", + ) index = 1 run_id = "test_parameter" @@ -39,7 +39,7 @@ def test_parameters_from_config_plus_read_write(): spatial_setup=ss, nsim=1, npi_scenario="None", - config_version='v2', + config_version="v2", npi_config_seir=config["interventions"]["settings"]["None"], parameters_config=config["seir"]["parameters"], seeding_config=config["seeding"], @@ -56,13 +56,21 @@ def test_parameters_from_config_plus_read_write(): ) lhs = parameters.Parameters( - parameter_config=config["seir"]["parameters"], ti = s.ti, tf=s.tf, nodenames=s.spatset.nodenames, config_version="v2" + parameter_config=config["seir"]["parameters"], + ti=s.ti, + tf=s.tf, + nodenames=s.spatset.nodenames, + config_version="v2", ) n_days = 10 nnodes = 5 p = parameters.Parameters( - parameter_config=config["seir"]["parameters"], ti = s.ti, tf=s.tf, nodenames=s.spatset.nodenames, config_version="v2" + parameter_config=config["seir"]["parameters"], + ti=s.ti, + tf=s.tf, + nodenames=s.spatset.nodenames, + config_version="v2", ) p_draw = p.parameters_quick_draw(n_days=10, nnodes=5) # test shape @@ -71,11 +79,13 @@ def test_parameters_from_config_plus_read_write(): write_df(fname="test_pwrite.parquet", df=p.getParameterDF(p_draw=p_draw)) rhs = parameters.Parameters( - parameter_config=config["seir"]["parameters"], ti = s.ti, tf=s.tf, nodenames=s.spatset.nodenames, config_version="v2" - ) - p_load = rhs.parameters_load( - param_df=read_df("test_pwrite.parquet"), n_days=n_days, nnodes=nnodes + parameter_config=config["seir"]["parameters"], + ti=s.ti, + tf=s.tf, + nodenames=s.spatset.nodenames, + config_version="v2", ) + p_load = rhs.parameters_load(param_df=read_df("test_pwrite.parquet"), n_days=n_days, nnodes=nnodes) assert (p_draw == p_load).all() @@ -86,12 +96,12 @@ def test_parameters_quick_draw_old(): config.set_file(f"{DATA_DIR}/parameters_only.yml") ss = setup.SpatialSetup( - setup_name="test_seir", - geodata_file=f"{DATA_DIR}/geodata.csv", - mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", - nodenames_key="geoid", -) + setup_name="test_seir", + geodata_file=f"{DATA_DIR}/geodata.csv", + mobility_file=f"{DATA_DIR}/mobility.txt", + popnodes_key="population", + nodenames_key="geoid", + ) index = 1 run_id = "test_parameter" prefix = "" @@ -105,7 +115,7 @@ def test_parameters_quick_draw_old(): seeding_config=config["seeding"], ti=config["start_date"].as_date(), tf=config["end_date"].as_date(), - config_version='old', + config_version="old", interactive=True, write_csv=False, first_sim_index=index, @@ -116,16 +126,20 @@ def test_parameters_quick_draw_old(): dt=0.25, ) - params = parameters.Parameters(parameter_config=config["seir"]["parameters"], ti = s.ti, tf=s.tf, nodenames=s.spatset.nodenames, config_version="old" ) + params = parameters.Parameters( + parameter_config=config["seir"]["parameters"], + ti=s.ti, + tf=s.tf, + nodenames=s.spatset.nodenames, + config_version="old", + ) ### Check that the object is well constructed: print(params.pnames) assert params.pnames == ["alpha", "sigma", "gamma", "R0"] assert params.npar == 4 assert params.intervention_overlap_operation["sum"] == [] - assert params.intervention_overlap_operation["prod"] == [ - pn.lower() for pn in params.pnames - ] + assert params.intervention_overlap_operation["prod"] == [pn.lower() for pn in params.pnames] p_array = params.parameters_quick_draw(n_days=s.n_days, nnodes=s.nnodes) print(p_array.shape) @@ -150,17 +164,18 @@ def test_parameters_quick_draw_old(): assert gamma.shape == (s.n_days, s.nnodes) assert len(np.unique(gamma)) == 1 + def test_parameters_from_timeserie_file(): config.clear() config.read(user=False) config.set_file(f"{DATA_DIR}/config_compartmental_model_format.yml") ss = setup.SpatialSetup( - setup_name="test_seir", - geodata_file=f"{DATA_DIR}/geodata.csv", - mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", - nodenames_key="geoid", -) + setup_name="test_seir", + geodata_file=f"{DATA_DIR}/geodata.csv", + mobility_file=f"{DATA_DIR}/mobility.txt", + popnodes_key="population", + nodenames_key="geoid", + ) index = 1 run_id = "test_parameter" prefix = "" @@ -169,7 +184,7 @@ def test_parameters_from_timeserie_file(): spatial_setup=ss, nsim=1, npi_scenario="None", - config_version='v2', + config_version="v2", npi_config_seir=config["interventions"]["settings"]["None"], parameters_config=config["seir"]["parameters"], seeding_config=config["seeding"], @@ -186,13 +201,21 @@ def test_parameters_from_timeserie_file(): ) lhs = parameters.Parameters( - parameter_config=config["seir"]["parameters"], ti = s.ti, tf=s.tf, nodenames=s.spatset.nodenames,config_version="v2" + parameter_config=config["seir"]["parameters"], + ti=s.ti, + tf=s.tf, + nodenames=s.spatset.nodenames, + config_version="v2", ) n_days = 10 nnodes = 5 p = parameters.Parameters( - parameter_config=config["seir"]["parameters"],ti = s.ti, tf=s.tf, nodenames=s.spatset.nodenames, config_version="v2" + parameter_config=config["seir"]["parameters"], + ti=s.ti, + tf=s.tf, + nodenames=s.spatset.nodenames, + config_version="v2", ) p_draw = p.parameters_quick_draw(n_days=10, nnodes=5) # test shape @@ -201,10 +224,12 @@ def test_parameters_from_timeserie_file(): write_df(fname="test_pwrite.parquet", df=p.getParameterDF(p_draw=p_draw)) rhs = parameters.Parameters( - parameter_config=config["seir"]["parameters"],ti = s.ti, tf=s.tf, nodenames=s.spatset.nodenames, config_version="v2" - ) - p_load = rhs.parameters_load( - param_df=read_df("test_pwrite.parquet"), n_days=n_days, nnodes=nnodes + parameter_config=config["seir"]["parameters"], + ti=s.ti, + tf=s.tf, + nodenames=s.spatset.nodenames, + config_version="v2", ) + p_load = rhs.parameters_load(param_df=read_df("test_pwrite.parquet"), n_days=n_days, nnodes=nnodes) assert (p_draw == p_load).all() diff --git a/gempyor_pkg/tests/seir/test_seir.py b/gempyor_pkg/tests/seir/test_seir.py index a08d92ae4..fe2600a22 100644 --- a/gempyor_pkg/tests/seir/test_seir.py +++ b/gempyor_pkg/tests/seir/test_seir.py @@ -108,9 +108,7 @@ def test_constant_population_legacy_integration(): seeding_data, seeding_amounts = s.seedingAndIC.load_seeding(sim_id=100, setup=s) initial_conditions = s.seedingAndIC.draw_ic(sim_id=100, setup=s) - npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames - ) + npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) params = s.parameters.parameters_reduce(params, npi) @@ -121,9 +119,7 @@ def test_constant_population_legacy_integration(): proportion_array, proportion_info, ) = s.compartments.get_transition_array() - parsed_parameters = s.compartments.parse_parameters( - params, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(params, s.parameters.pnames, unique_strings) states = seir.steps_SEIR( s, @@ -142,11 +138,7 @@ def test_constant_population_legacy_integration(): totalpop = 0 for i in range(s.nnodes): totalpop += states[0].sum(axis=1)[it, i] - assert ( - states[0].sum(axis=1)[it, i] - 1e-3 - < origpop[i] - < states[0].sum(axis=1)[it, i] + 1e-3 - ) + assert states[0].sum(axis=1)[it, i] - 1e-3 < origpop[i] < states[0].sum(axis=1)[it, i] + 1e-3 assert completepop - 1e-3 < totalpop < completepop + 1e-3 @@ -191,9 +183,7 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): seeding_data, seeding_amounts = s.seedingAndIC.load_seeding(sim_id=100, setup=s) initial_conditions = s.seedingAndIC.draw_ic(sim_id=100, setup=s) - npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames - ) + npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) params = s.parameters.parameters_reduce(params, npi) @@ -204,9 +194,7 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): proportion_array, proportion_info, ) = s.compartments.get_transition_array() - parsed_parameters = s.compartments.parse_parameters( - params, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(params, s.parameters.pnames, unique_strings) for i in range(5): states = seir.steps_SEIR( @@ -220,20 +208,8 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): seeding_amounts, ) df = seir.states2Df(s, states) - assert ( - df[ - (df["mc_value_type"] == "prevalence") - & (df["mc_infection_stage"] == "R") - ].loc[str(s.tf), "10001"] - > 1 - ) - assert ( - df[ - (df["mc_value_type"] == "prevalence") - & (df["mc_infection_stage"] == "R") - ].loc[str(s.tf), "20002"] - > 1 - ) + assert df[(df["mc_value_type"] == "prevalence") & (df["mc_infection_stage"] == "R")].loc[str(s.tf), "10001"] > 1 + assert df[(df["mc_value_type"] == "prevalence") & (df["mc_infection_stage"] == "R")].loc[str(s.tf), "20002"] > 1 states = seir.steps_SEIR( s, @@ -246,27 +222,9 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): seeding_amounts, ) df = seir.states2Df(s, states) - assert ( - df[ - (df["mc_value_type"] == "prevalence") - & (df["mc_infection_stage"] == "R") - ].loc[str(s.tf), "20002"] - > 1 - ) - assert ( - df[ - (df["mc_value_type"] == "incidence") - & (df["mc_infection_stage"] == "I1") - ].max()["20002"] - > 0 - ) - assert ( - df[ - (df["mc_value_type"] == "incidence") - & (df["mc_infection_stage"] == "I1") - ].max()["10001"] - > 0 - ) + assert df[(df["mc_value_type"] == "prevalence") & (df["mc_infection_stage"] == "R")].loc[str(s.tf), "20002"] > 1 + assert df[(df["mc_value_type"] == "incidence") & (df["mc_infection_stage"] == "I1")].max()["20002"] > 0 + assert df[(df["mc_value_type"] == "incidence") & (df["mc_infection_stage"] == "I1")].max()["10001"] > 0 def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): @@ -311,9 +269,7 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): seeding_data, seeding_amounts = s.seedingAndIC.load_seeding(sim_id=100, setup=s) initial_conditions = s.seedingAndIC.draw_ic(sim_id=100, setup=s) - npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames - ) + npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) params = s.parameters.parameters_reduce(params, npi) @@ -324,9 +280,7 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): proportion_array, proportion_info, ) = s.compartments.get_transition_array() - parsed_parameters = s.compartments.parse_parameters( - params, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(params, s.parameters.pnames, unique_strings) for i in range(5): states = seir.steps_SEIR( @@ -341,20 +295,8 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): ) df = seir.states2Df(s, states) - assert ( - df[ - (df["mc_value_type"] == "incidence") - & (df["mc_infection_stage"] == "I1") - ].max()["20002"] - > 0 - ) - assert ( - df[ - (df["mc_value_type"] == "incidence") - & (df["mc_infection_stage"] == "I1") - ].max()["10001"] - > 0 - ) + assert df[(df["mc_value_type"] == "incidence") & (df["mc_infection_stage"] == "I1")].max()["20002"] > 0 + assert df[(df["mc_value_type"] == "incidence") & (df["mc_infection_stage"] == "I1")].max()["10001"] > 0 def test_steps_SEIR_no_spread(): @@ -398,9 +340,7 @@ def test_steps_SEIR_no_spread(): s.mobility.data = s.mobility.data * 0 - npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames - ) + npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) params = s.parameters.parameters_reduce(params, npi) @@ -411,9 +351,7 @@ def test_steps_SEIR_no_spread(): proportion_array, proportion_info, ) = s.compartments.get_transition_array() - parsed_parameters = s.compartments.parse_parameters( - params, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(params, s.parameters.pnames, unique_strings) for i in range(10): states = seir.steps_SEIR( @@ -428,11 +366,7 @@ def test_steps_SEIR_no_spread(): ) df = seir.states2Df(s, states) assert ( - df[ - (df["mc_value_type"] == "prevalence") - & (df["mc_infection_stage"] == "R") - ].loc[str(s.tf), "20002"] - == 0.0 + df[(df["mc_value_type"] == "prevalence") & (df["mc_infection_stage"] == "R")].loc[str(s.tf), "20002"] == 0.0 ) states = seir.steps_SEIR( @@ -447,11 +381,7 @@ def test_steps_SEIR_no_spread(): ) df = seir.states2Df(s, states) assert ( - df[ - (df["mc_value_type"] == "prevalence") - & (df["mc_infection_stage"] == "R") - ].loc[str(s.tf), "20002"] - == 0.0 + df[(df["mc_value_type"] == "prevalence") & (df["mc_infection_stage"] == "R")].loc[str(s.tf), "20002"] == 0.0 ) @@ -554,9 +484,7 @@ def test_continuation_resume(): seir.onerun_SEIR(sim_id2write=sim_id2write, s=s, config=config) states_new = pq.read_table( - file_paths.create_file_name( - s.in_run_id, s.in_prefix, sim_id2write, "seir", "parquet" - ), + file_paths.create_file_name(s.in_run_id, s.in_prefix, sim_id2write, "seir", "parquet"), ).to_pandas() states_new = states_new[states_new["date"] == "2020-03-15"].reset_index(drop=True) assert ( @@ -568,13 +496,9 @@ def test_continuation_resume(): .all() ) - seir.onerun_SEIR( - sim_id2write=sim_id2write + 1, s=s, sim_id2load=sim_id2write, load_ID=True, config=config - ) + seir.onerun_SEIR(sim_id2write=sim_id2write + 1, s=s, sim_id2load=sim_id2write, load_ID=True, config=config) states_new = pq.read_table( - file_paths.create_file_name( - s.in_run_id, s.in_prefix, sim_id2write + 1, "seir", "parquet" - ), + file_paths.create_file_name(s.in_run_id, s.in_prefix, sim_id2write + 1, "seir", "parquet"), ).to_pandas() states_new = states_new[states_new["date"] == "2020-03-15"].reset_index(drop=True) for path in ["model_output/seir", "model_output/snpi", "model_output/spar"]: @@ -627,9 +551,7 @@ def test_inference_resume(): ) seir.onerun_SEIR(sim_id2write=int(sim_id2write), s=s, config=config) npis_old = pq.read_table( - file_paths.create_file_name( - s.in_run_id, s.in_prefix, sim_id2write, "snpi", "parquet" - ) + file_paths.create_file_name(s.in_run_id, s.in_prefix, sim_id2write, "snpi", "parquet") ).to_pandas() config.clear() @@ -675,13 +597,9 @@ def test_inference_resume(): out_prefix=prefix, ) - seir.onerun_SEIR( - sim_id2write=sim_id2write + 1, s=s, sim_id2load=sim_id2write, load_ID=True, config=config - ) + seir.onerun_SEIR(sim_id2write=sim_id2write + 1, s=s, sim_id2load=sim_id2write, load_ID=True, config=config) npis_new = pq.read_table( - file_paths.create_file_name( - s.in_run_id, s.in_prefix, sim_id2write + 1, "snpi", "parquet" - ) + file_paths.create_file_name(s.in_run_id, s.in_prefix, sim_id2write + 1, "snpi", "parquet") ).to_pandas() assert npis_old["npi_name"].isin(["None", "Wuhan", "KansasCity"]).all() @@ -737,9 +655,7 @@ def test_parallel_compartments_with_vacc(): seeding_data, seeding_amounts = s.seedingAndIC.load_seeding(sim_id=100, setup=s) initial_conditions = s.seedingAndIC.draw_ic(sim_id=100, setup=s) - npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames - ) + npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) params = s.parameters.parameters_reduce(params, npi) @@ -750,9 +666,7 @@ def test_parallel_compartments_with_vacc(): proportion_array, proportion_info, ) = s.compartments.get_transition_array() - parsed_parameters = s.compartments.parse_parameters( - params, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(params, s.parameters.pnames, unique_strings) for i in range(5): states = seir.steps_SEIR( @@ -835,9 +749,7 @@ def test_parallel_compartments_no_vacc(): seeding_data, seeding_amounts = s.seedingAndIC.load_seeding(sim_id=100, setup=s) initial_conditions = s.seedingAndIC.draw_ic(sim_id=100, setup=s) - npi = NPI.NPIBase.execute( - npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames - ) + npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, geoids=s.spatset.nodenames) params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) params = s.parameters.parameters_reduce(params, npi) @@ -848,9 +760,7 @@ def test_parallel_compartments_no_vacc(): proportion_array, proportion_info, ) = s.compartments.get_transition_array() - parsed_parameters = s.compartments.parse_parameters( - params, s.parameters.pnames, unique_strings - ) + parsed_parameters = s.compartments.parse_parameters(params, s.parameters.pnames, unique_strings) for i in range(5): s.npi_config_seir = config["interventions"]["settings"]["Scenario_vacc"] diff --git a/scripts/clean_s3.py b/scripts/clean_s3.py index 13b496e9f..707623fe8 100644 --- a/scripts/clean_s3.py +++ b/scripts/clean_s3.py @@ -9,9 +9,7 @@ s3 = boto3.client("s3") paginator = s3.get_paginator("list_objects_v2") -pages = paginator.paginate( - Bucket=bucket, Prefix="", Delimiter="/" -) # needs paginator cause more than 1000 files +pages = paginator.paginate(Bucket=bucket, Prefix="", Delimiter="/") # needs paginator cause more than 1000 files to_prun = [] # folders: diff --git a/scripts/copy_for_continuation.py b/scripts/copy_for_continuation.py index 92e34eb42..9d83cfd94 100644 --- a/scripts/copy_for_continuation.py +++ b/scripts/copy_for_continuation.py @@ -77,14 +77,10 @@ def detect_old_run_id(fp): fn = files[0] old_run_id = detect_old_run_id(fn) new_name = ( - fn.replace("seir", "cont") - .replace(f"{input_folder}/model_output", "model_output") - .replace(old_run_id, run_id) + fn.replace("seir", "cont").replace(f"{input_folder}/model_output", "model_output").replace(old_run_id, run_id) ) - print( - f"detected old_run_id: {old_run_id} which will be replaced by user provided run_id: {run_id}" - ) + print(f"detected old_run_id: {old_run_id} which will be replaced by user provided run_id: {run_id}") empty_str = "°" * len(input_folder) print(f"file: \n OLD NAME: {fn}\n NEW NAME: {empty_str}{new_name}") for fn in tqdm.tqdm(files): diff --git a/scripts/csv_to_csr.py b/scripts/csv_to_csr.py index 9e5e1a19e..2fc6ebf8c 100755 --- a/scripts/csv_to_csr.py +++ b/scripts/csv_to_csr.py @@ -26,25 +26,17 @@ def convert(config_file): geodata_file = os.path.join(spatial_base_path, spatial_config["geodata"].get()) mobility_file = os.path.join(spatial_base_path, spatial_config["mobility"].get()) - mobility_npz = os.path.join( - spatial_base_path, spatial_config["mobility"].get().replace(".csv", ".npz") - ) + mobility_npz = os.path.join(spatial_base_path, spatial_config["mobility"].get().replace(".csv", ".npz")) nodenames_key = spatial_config["nodenames"].get() - data = pd.read_csv( - geodata_file, converters={nodenames_key: lambda x: str(x)} - ) # geoids and populations + data = pd.read_csv(geodata_file, converters={nodenames_key: lambda x: str(x)}) # geoids and populations nnodes = len(data) nodenames = data[nodenames_key].tolist() - mobility_data = pd.read_csv( - mobility_file, converters={"ori": lambda x: str(x), "dest": lambda x: str(x)} - ) + mobility_data = pd.read_csv(mobility_file, converters={"ori": lambda x: str(x), "dest": lambda x: str(x)}) mobility = scipy.sparse.lil_matrix((nnodes, nnodes)) for index, row in mobility_data.iterrows(): - mobility[nodenames.index(row["ori"]), nodenames.index(row["dest"])] = row[ - "amount" - ] + mobility[nodenames.index(row["ori"]), nodenames.index(row["dest"])] = row["amount"] if nodenames.index(row["ori"]) == nodenames.index(row["dest"]): raise ValueError( f"Mobility fluxes with same origin and destination: '{row['ori']}' to {row['dest']} in long form matrix. This is not supported" diff --git a/scripts/fast_report.py b/scripts/fast_report.py index 0b77b5a8d..fa18e15f4 100644 --- a/scripts/fast_report.py +++ b/scripts/fast_report.py @@ -95,9 +95,7 @@ def generate_pdf(max_files, filename, config_file): varplot = ["incidI", "hosp_curr", "icu_curr", "incidH", "incidICU", "incidD"] - fig, axes = plt.subplots( - len(varplot), len(all_hosp_sim), figsize=(23, 20), sharex=True - ) + fig, axes = plt.subplots(len(varplot), len(all_hosp_sim), figsize=(23, 20), sharex=True) for vi, var in enumerate(varplot): for scn, key in enumerate(list(all_hosp_sim.keys())): if len(all_hosp_sim) > 1: diff --git a/scripts/merge_past_dynamics.py b/scripts/merge_past_dynamics.py index c09cf4884..4b3bb4c25 100644 --- a/scripts/merge_past_dynamics.py +++ b/scripts/merge_past_dynamics.py @@ -46,15 +46,11 @@ only_in_sim = list(set(sim.columns) - set(past_dynamics.columns)) only_in_pastdyn = list(set(past_dynamics.columns) - set(sim.columns)) c.drop(only_in_pastdyn, inplace=True, axis=1) - c.loc[ - (c["time"] <= max(past_dynamics["time"])) & (c["comp"] != "S"), only_in_sim - ] = 0 + c.loc[(c["time"] <= max(past_dynamics["time"])) & (c["comp"] != "S"), only_in_sim] = 0 pop_ois = [] for nd in only_in_sim: pop_ois.append(float(geodata[geodata["geoid"] == nd].pop2010)) - c.loc[ - (c["time"] <= max(past_dynamics["time"])) & (c["comp"] == "S"), only_in_sim - ] = pop_ois + c.loc[(c["time"] <= max(past_dynamics["time"])) & (c["comp"] == "S"), only_in_sim] = pop_ois pa_df = pa.Table.from_pandas(c.round(), preserve_index=False) pa.parquet.write_table(pa_df, filename) print("DONE") diff --git a/scripts/quantile_summarize_geoid_level.py b/scripts/quantile_summarize_geoid_level.py index 6ae16a04d..5946e21ab 100644 --- a/scripts/quantile_summarize_geoid_level.py +++ b/scripts/quantile_summarize_geoid_level.py @@ -16,9 +16,7 @@ import numpy as np from pyspark.sql import functions as F, SparkSession, SQLContext, Window -PROBS = np.concatenate( - [[0.01, 0.025], np.arange(start=0.05, stop=0.95, step=0.05), [0.975, 0.99]] -) +PROBS = np.concatenate([[0.01, 0.025], np.arange(start=0.05, stop=0.95, step=0.05), [0.975, 0.99]]) METRICS = ["hosp_curr", "cum_death", "death", "infections", "cum_infections", "hosp"] spark = SparkSession.builder.appName("quantile report").getOrCreate() @@ -71,15 +69,9 @@ def process(config_file, scenarios, output, start_date, end_date, name_filter): config = confuse.Configuration("COVIDScenarioPipeline") config.set_file(config_file) - input_paths = ( - f"{config['spatial_setup']['setup_name'].get()}_{scenario}" - for scenario in scenarios - ) + input_paths = (f"{config['spatial_setup']['setup_name'].get()}_{scenario}" for scenario in scenarios) paths = itertools.chain( - *( - pathlib.Path("hospitalization/model_output").glob(p + "/**/*.parquet") - for p in input_paths - ) + *(pathlib.Path("hospitalization/model_output").glob(p + "/**/*.parquet") for p in input_paths) ) paths = (str(p) for p in paths if p.is_file()) paths = filter(lambda p: re.search(name_filter, p), paths) @@ -96,9 +88,7 @@ def process(config_file, scenarios, output, start_date, end_date, name_filter): df = df.filter((df.time > start_date.date()) & (df.time <= end_date.date())) df = df.withColumn( "cum_infections", - F.sum(df.infections).over( - Window.partitionBy(df.geoid).orderBy(df.time, df.uid) - ), + F.sum(df.infections).over(Window.partitionBy(df.geoid).orderBy(df.time, df.uid)), ) df = df.withColumn( "cum_death", @@ -111,10 +101,7 @@ def process(config_file, scenarios, output, start_date, end_date, name_filter): (metric, prob, f"{metric}__{str(round(prob, 3)).replace('.', '_')}") for metric, prob in itertools.product(METRICS, PROBS) ] - agg_sql = ", ".join( - f"percentile_approx({metric}, {prob}, 100) AS {name}" - for metric, prob, name in metric_probs - ) + agg_sql = ", ".join(f"percentile_approx({metric}, {prob}, 100) AS {name}" for metric, prob, name in metric_probs) rollup_df = sqlContext.sql( f"""\ SELECT geoid, time, {agg_sql} FROM df diff --git a/slurm_batch/inference_job.py b/slurm_batch/inference_job.py index 93e1e7f3b..704539fba 100644 --- a/slurm_batch/inference_job.py +++ b/slurm_batch/inference_job.py @@ -189,9 +189,7 @@ def launch_batch( if "filtering" in config: config["filtering"]["simulations_per_slot"] = sims_per_job if not os.path.exists(config["filtering"]["data_path"]): - print( - f"ERROR: filtering.data_path path {config['filtering']['data_path']} does not exist!" - ) + print(f"ERROR: filtering.data_path path {config['filtering']['data_path']} does not exist!") return 1 else: print(f"WARNING: no filtering section found in {config_file}!") @@ -230,9 +228,7 @@ def autodetect_params(config, *, num_jobs=None, sims_per_job=None, num_blocks=No return (num_jobs, sims_per_job, num_blocks) if "filtering" not in config or "simulations_per_slot" not in config["filtering"]: - raise click.UsageError( - "filtering::simulations_per_slot undefined in config, can't autodetect parameters" - ) + raise click.UsageError("filtering::simulations_per_slot undefined in config, can't autodetect parameters") sims_per_slot = int(config["filtering"]["simulations_per_slot"]) if num_jobs is None: @@ -242,17 +238,10 @@ def autodetect_params(config, *, num_jobs=None, sims_per_job=None, num_blocks=No if sims_per_job is None: if num_blocks is not None: sims_per_job = int(math.ceil(sims_per_slot / num_blocks)) - print( - f"Setting number of blocks to {num_blocks} [via num_blocks (-k) argument]" - ) - print( - f"Setting sims per job to {sims_per_job} [via {sims_per_slot} simulations_per_slot in config]" - ) + print(f"Setting number of blocks to {num_blocks} [via num_blocks (-k) argument]") + print(f"Setting sims per job to {sims_per_job} [via {sims_per_slot} simulations_per_slot in config]") else: - geoid_fname = ( - pathlib.Path(config["spatial_setup"]["base_path"]) - / config["spatial_setup"]["geodata"] - ) + geoid_fname = pathlib.Path(config["spatial_setup"]["base_path"]) / config["spatial_setup"]["geodata"] with open(geoid_fname) as geoid_fp: num_geoids = sum(1 for line in geoid_fp) @@ -274,9 +263,7 @@ def autodetect_params(config, *, num_jobs=None, sims_per_job=None, num_blocks=No if num_blocks is None: num_blocks = int(math.ceil(sims_per_slot / sims_per_job)) - print( - f"Setting number of blocks to {num_blocks} [via {sims_per_slot} simulations_per_slot in config]" - ) + print(f"Setting number of blocks to {num_blocks} [via {sims_per_slot} simulations_per_slot in config]") return (num_jobs, sims_per_job, num_blocks) @@ -320,9 +307,7 @@ def launch(self, job_name, config_file, scenarios, p_death_names): manifest["cmd"] = " ".join(sys.argv[:]) manifest["job_name"] = job_name manifest["data_sha"] = subprocess.getoutput("git rev-parse HEAD") - manifest["csp_sha"] = subprocess.getoutput( - "cd COVIDScenarioPipeline; git rev-parse HEAD" - ) + manifest["csp_sha"] = subprocess.getoutput("cd COVIDScenarioPipeline; git rev-parse HEAD") # Prepare to tar up the current directory, excluding any dvc outputs, so it # can be shipped to S3 @@ -333,29 +318,16 @@ def launch(self, job_name, config_file, scenarios, p_death_names): for p in os.listdir("."): if p == "COVIDScenarioPipeline": for q in os.listdir("COVIDScenarioPipeline"): - if not ( - q == "packrat" - or q == "sample_data" - or q == "build" - or q.startswith(".") - ): + if not (q == "packrat" or q == "sample_data" or q == "build" or q.startswith(".")): tar.add(os.path.join("COVIDScenarioPipeline", q)) elif q == "sample_data": for r in os.listdir("COVIDScenarioPipeline/sample_data"): if r != "united-states-commutes": - tar.add( - os.path.join( - "COVIDScenarioPipeline", "sample_data", r - ) - ) - elif not ( - p.startswith(".") or p.endswith("tar.gz") or p in self.outputs - ): + tar.add(os.path.join("COVIDScenarioPipeline", "sample_data", r)) + elif not (p.startswith(".") or p.endswith("tar.gz") or p in self.outputs): tar.add( p, - filter=lambda x: None - if os.path.basename(x.name).startswith(".") - else x, + filter=lambda x: None if os.path.basename(x.name).startswith(".") else x, ) tar.close() @@ -394,15 +366,11 @@ def launch(self, job_name, config_file, scenarios, p_death_names): cur_env_vars = base_env_vars.copy() cur_env_vars.append({"name": "COVID_SCENARIOS", "value": s}) cur_env_vars.append({"name": "COVID_DEATHRATES", "value": d}) - cur_env_vars.append( - {"name": "COVID_PREFIX", "value": f"{config['name']}/{s}/{d}"} - ) + cur_env_vars.append({"name": "COVID_PREFIX", "value": f"{config['name']}/{s}/{d}"}) cur_env_vars.append({"name": "COVID_BLOCK_INDEX", "value": "1"}) cur_env_vars.append({"name": "COVID_RUN_INDEX", "value": f"{self.run_id}"}) if not (self.restart_from_s3_bucket is None): - cur_env_vars.append( - {"name": "S3_LAST_JOB_OUTPUT", "value": self.restart_from_s3_bucket} - ) + cur_env_vars.append({"name": "S3_LAST_JOB_OUTPUT", "value": self.restart_from_s3_bucket}) cur_env_vars.append( { "name": "COVID_OLD_RUN_INDEX", @@ -438,9 +406,7 @@ def launch(self, job_name, config_file, scenarios, p_death_names): run_id_restart = self.run_id print(f"Launching {cur_job_name}...") if not (self.restart_from_s3_bucket is None): - print( - f"Resuming from run id is {self.restart_from_run_id} located in {self.restart_from_s3_bucket}" - ) + print(f"Resuming from run id is {self.restart_from_run_id} located in {self.restart_from_s3_bucket}") print(f"Final output will be: {results_path}/model_output/") print(f"Run id is {self.run_id}") diff --git a/test/run_tests.py b/test/run_tests.py index 7f327a31a..69220a3a0 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -38,9 +38,7 @@ def _success(test_dir): sys.executable, ] complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"make_makefile.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"make_makefile.R failed with code {complete.returncode}" assert_file("Makefile") @@ -79,9 +77,7 @@ def _success_build_US_setup(test_dir): "FALSE", ] complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"build_US_setup.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"build_US_setup.R failed with code {complete.returncode}" assert_file("data/mobility.csv") assert_file("data/geodata.csv") @@ -105,9 +101,7 @@ def _success_build_nonUS_setup(test_dir): "mobility_data.csv", ] complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"build_nonUS_setup.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"build_nonUS_setup.R failed with code {complete.returncode}" assert_file("data/mobility.csv") assert_file("data/geodata.csv") @@ -134,9 +128,7 @@ def _success_create_seeding_nonUS(test_dir): # Make Makefile cmd = ["Rscript", "../../R/scripts/create_seeding.R", "-c", "config.yml"] complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"create_seeding.R failed for non-US setup with code {complete.returncode}" + assert complete.returncode == 0, f"create_seeding.R failed for non-US setup with code {complete.returncode}" assert_file("data/seeding.csv") @@ -180,7 +172,7 @@ def test_multitime(): # def test_report(): # _success("test_report") -# +# # assert_file("data/geodata.csv") # assert_file("data/mobility.csv") # assert_dir("data/shp") @@ -222,9 +214,7 @@ def test_inference(): ] complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"build_US_setup.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"build_US_setup.R failed with code {complete.returncode}" assert_file("data/geodata.csv") assert_file("data/mobility.csv") @@ -249,9 +239,7 @@ def test_inference(): complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"full_filter.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"full_filter.R failed with code {complete.returncode}" assert_file("data/test1/seeding.csv") assert_file("data/us_data.csv") @@ -280,9 +268,7 @@ def test_inference_multiblock(): ] complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"build_US_setup.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"build_US_setup.R failed with code {complete.returncode}" assert_file("data/geodata.csv") assert_file("data/mobility.csv") @@ -306,9 +292,7 @@ def test_inference_multiblock(): ] complete = subprocess.run(cmd_1) - assert ( - complete.returncode == 0 - ), f"full_filter.R block 1 failed with code {complete.returncode}" + assert complete.returncode == 0, f"full_filter.R block 1 failed with code {complete.returncode}" cmd_2 = [ "Rscript", @@ -329,20 +313,12 @@ def test_inference_multiblock(): complete = subprocess.run(cmd_2) - assert ( - complete.returncode == 0 - ), f"full_filter.R block 2 failed with code {complete.returncode}" + assert complete.returncode == 0, f"full_filter.R block 2 failed with code {complete.returncode}" final_prefix = "test_inference/Scenario1/low/test_inference/global/final/" - intermediate_prefix = ( - "test_inference/Scenario1/low/test_inference/global/intermediate/000000001." - ) - final_filename = file_paths.create_file_name( - "test_inference", final_prefix, 1, "llik", "parquet" - ) - intermediate_filename = file_paths.create_file_name( - "test_inference", intermediate_prefix, 2, "llik", "parquet" - ) + intermediate_prefix = "test_inference/Scenario1/low/test_inference/global/intermediate/000000001." + final_filename = file_paths.create_file_name("test_inference", final_prefix, 1, "llik", "parquet") + intermediate_filename = file_paths.create_file_name("test_inference", intermediate_prefix, 2, "llik", "parquet") final_hash = "" with open(final_filename, "rb") as f: @@ -402,9 +378,7 @@ def test_compartmental_specification(): ] complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"build_US_setup.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"build_US_setup.R failed with code {complete.returncode}" assert_file("data/geodata.csv") assert_file("data/mobility.csv") @@ -429,9 +403,7 @@ def test_compartmental_specification(): complete = subprocess.run(cmd) - assert ( - complete.returncode == 0 - ), f"full_filter.R failed with code {complete.returncode}" + assert complete.returncode == 0, f"full_filter.R failed with code {complete.returncode}" assert_file("data/test1/seeding.csv") assert_file("data/us_data.csv")