diff --git a/docs/prms/domino.rst b/docs/prms/domino.rst index a8dac732c..f562cf449 100644 --- a/docs/prms/domino.rst +++ b/docs/prms/domino.rst @@ -8,3 +8,9 @@ DOMINO has two optional parameters: * slice_threshold: the p-value threshold for considering a slice as relevant * module_threshold: the p-value threshold for considering a putative module as final module + +Wrapper Details +=============== + +If the input dataframe is empty or too 'small' (where no modules are found), +SPRAS will instead emit an empty output file. diff --git a/spras/containers.py b/spras/containers.py index 62782fb16..4706d7257 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -2,6 +2,7 @@ import platform import re import subprocess +import textwrap from pathlib import Path, PurePath, PurePosixPath from typing import Iterator, List, Optional, Tuple, Union @@ -122,6 +123,49 @@ def prepare_dsub_cmd(flags: dict[str, str | list[str]]): print(f"dsub command: {dsub_command}") return dsub_command +class ContainerError(RuntimeError): + """Raises when anything goes wrong inside a container""" + + error_code: int + stdout: Optional[str] + stderr: Optional[str] + + def __init__(self, message: str, error_code: int, stdout: Optional[str], stderr: Optional[str], *args): + """ + Constructs a new ContainerError. + + @param message: The message to display to the user. This should usually refer to the indent call to differentiate between + general logging done by Snakemake/logging calls. + @param error_code: Also known as exit status; this should generally be non-zero for ContainerErrors. + @param stdout: The standard output stream. If the origin of the stream is unknown, leave it in stdout. + @param stderr: The standard error stream. + """ + + # https://stackoverflow.com/a/26938914/7589775 + self.message = message + + self.error_code = error_code + self.stdout = stdout + self.stderr = stderr + + super(ContainerError, self).__init__(message, error_code, stdout, stderr, *args) + + def streams_contain(self, needle: str): + """ + Checks (with case sensitivity) + if any of the stdout/err streams have the provided needle. + """ + stdout = self.stdout if self.stdout else '' + stderr = self.stderr if self.stderr else '' + + return (needle in stdout) or (needle in stderr) + + # Due to + # https://github.com/snakemake/snakemake/blob/d4890b4da691506b6a258f7534ac41fdb7ef5ab4/src/snakemake/exceptions.py#L18 + # this overrides the tostr implementation to have nicer container errors + def __str__(self): + return self.message + def env_to_items(environment: dict[str, str]) -> Iterator[str]: """ Turns an environment variable dictionary to KEY=VALUE pairs. @@ -183,22 +227,27 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm if 'message' in out: # This is the format of a singularity message. # See https://singularityhub.github.io/singularity-cli/api/source/spython.main.html?highlight=execute#spython.main.execute.execute. - if 'return_code' in out and not out['return_code'] == 0: - print(f"(Program exited with non-zero exit code '{out['return_code']}')") + exit_status = int(out['return_code']) if 'return_code' in out else 0 out = ''.join(out['message']) + if exit_status != 0: + message = f'An unexpected non-zero exit status ({exit_status}) occurred while running this singularity container:\n' + indent(out) + raise ContainerError(message, exit_status, out, None) else: - print("Note: This is an unknown message format - if you want this pretty printed, please file an issue at https://github.com/Reed-CompBio/spras/issues/new.") + print("Note: The following output is an unknown message format which should be properly handled.") + print("Please file an issue at https://github.com/Reed-CompBio/spras/issues/new with this output.") out = str(out) elif not isinstance(out, str): out = str(out, "utf-8") print(indent(out)) except docker.errors.ContainerError as err: - print(f"(Command formatted as list: `{err.command}`)") - print(f"An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:") - err = str(err.stderr if err.stderr is not None else "", "utf-8") - print(indent(err)) - except Exception as err: - raise err + stdout = str(err.container.logs(stdout=True, stderr=False), 'utf-8') + stderr = str(err.container.logs(stdout=False, stderr=True), 'utf-8') + + message = textwrap.dedent(f'''\ + (Command formatted as list: `{err.command}`) + An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:\n''') + indent(stderr) + # We retrieved all of the information from docker.errors.ContainerError, so here, we ignore the original error. + raise ContainerError(message, err.exit_status, stdout, stderr) from None # TODO any issue with creating a new client each time inside this function? def run_container_docker(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None, network_disabled=False): diff --git a/spras/domino.py b/spras/domino.py index dabf4fc6b..28a391434 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -3,7 +3,7 @@ import pandas as pd -from spras.containers import prepare_volume, run_container_and_log +from spras.containers import ContainerError, prepare_volume, run_container_and_log from spras.interactome import ( add_constant, reinsert_direction_col_undirected, @@ -112,13 +112,21 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, '--output_file', mapped_slices_file] container_suffix = "domino" - run_container_and_log('slicer', - container_framework, - container_suffix, - slicer_command, - volumes, - work_dir, - out_dir) + try: + run_container_and_log('slicer', + container_framework, + container_suffix, + slicer_command, + volumes, + work_dir, + out_dir) + except ContainerError as err: + # Occurs when DOMINO gets passed some empty dataframe from network_file. + # This counts as an empty input, so we return an empty output. + if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): + pass + else: + raise err # Make the Python command to run within the container domino_command = ['domino', @@ -137,14 +145,26 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, if module_threshold is not None: domino_command.extend(['--module_threshold', str(module_threshold)]) - run_container_and_log( - 'DOMINO', - container_framework, - container_suffix, - domino_command, - volumes, - work_dir, - out_dir) + try: + run_container_and_log('DOMINO', + container_framework, + container_suffix, + domino_command, + volumes, + work_dir, + out_dir) + except ContainerError as err: + # Occurs when DOMINO gets passed some empty dataframe from network_file. + # This counts as an empty input, so we return an empty output. + if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): + pass + # Here, DOMINO + # still outputs to our output folder with a still viable HTML output. + # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 + elif err.streams_contain("ValueError: cannot apply union_all to an empty list"): + pass + else: + raise err # DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes # The filename is determined by the input active_genes and cannot be configured @@ -160,7 +180,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, # Clean up DOMINO intermediate and pickle files slices_file.unlink(missing_ok=True) Path(out_dir, 'network.slices.pkl').unlink(missing_ok=True) - Path(network + '.pkl').unlink(missing_ok=True) + Path(str(network) + '.pkl').unlink(missing_ok=True) @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file, params): diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 8b97fa2d1..8e6981cb0 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -2,7 +2,7 @@ import pandas as pd -from spras.containers import prepare_volume, run_container_and_log +from spras.containers import ContainerError, prepare_volume, run_container_and_log from spras.dataset import Dataset from spras.interactome import reinsert_direction_col_undirected from spras.prm import PRM diff --git a/test/DOMINO/input/empty/domino-active-genes.txt b/test/DOMINO/input/empty/domino-active-genes.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/DOMINO/input/empty/domino-network.txt b/test/DOMINO/input/empty/domino-network.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/DOMINO/input/domino-active-genes.txt b/test/DOMINO/input/simple/domino-active-genes.txt similarity index 100% rename from test/DOMINO/input/domino-active-genes.txt rename to test/DOMINO/input/simple/domino-active-genes.txt diff --git a/test/DOMINO/input/domino-network.txt b/test/DOMINO/input/simple/domino-network.txt similarity index 100% rename from test/DOMINO/input/domino-network.txt rename to test/DOMINO/input/simple/domino-network.txt diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 05bd3ae70..849ab1aef 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -8,9 +8,9 @@ config.init_from_file("config/config.yaml") -TEST_DIR = 'test/DOMINO/' -OUT_FILE_DEFAULT = TEST_DIR+'output/domino-output.txt' -OUT_FILE_OPTIONAL = TEST_DIR+'output/domino-output-thresholds.txt' +TEST_DIR = Path('test', 'DOMINO') +OUT_FILE_DEFAULT = TEST_DIR / 'output' / 'domino-output.txt' +OUT_FILE_OPTIONAL = TEST_DIR / 'output' / 'domino-output-thresholds.txt' class TestDOMINO: @@ -25,34 +25,32 @@ class TestDOMINO: def test_domino_required(self): # Only include required arguments - out_path = Path(OUT_FILE_DEFAULT) - out_path.unlink(missing_ok=True) + OUT_FILE_DEFAULT.unlink(missing_ok=True) DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', - active_genes=TEST_DIR+'input/domino-active-genes.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_DEFAULT) # output_file should be empty - assert out_path.exists() + assert OUT_FILE_DEFAULT.exists() def test_domino_optional(self): # Include optional arguments - out_path = Path(OUT_FILE_OPTIONAL) - out_path.unlink(missing_ok=True) + OUT_FILE_OPTIONAL.unlink(missing_ok=True) DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', - active_genes=TEST_DIR+'input/domino-active-genes.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_OPTIONAL, slice_threshold=0.4, module_threshold=0.06) # output_file should be empty - assert out_path.exists() + assert OUT_FILE_OPTIONAL.exists() def test_domino_missing_active_genes(self): # Test the expected error is raised when active_genes argument is missing with pytest.raises(ValueError): # No active_genes DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', output_file=OUT_FILE_DEFAULT) def test_domino_missing_network(self): @@ -60,22 +58,32 @@ def test_domino_missing_network(self): with pytest.raises(ValueError): # No network DOMINO.run( - active_genes=TEST_DIR+'input/domino-active-genes.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_DEFAULT) + def test_domino_empty(self): + # Test over empty files + # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 + OUT_FILE_DEFAULT.unlink(missing_ok=True) + DOMINO.run( + network=TEST_DIR / 'input' / 'empty' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'empty' / 'domino-active-genes.txt', + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists() + + # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') def test_domino_singularity(self): - out_path = Path(OUT_FILE_DEFAULT) - out_path.unlink(missing_ok=True) + OUT_FILE_DEFAULT.unlink(missing_ok=True) # Only include required arguments and run with Singularity DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', - active_genes=TEST_DIR+'input/domino-active-genes.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_DEFAULT, container_framework="singularity") - assert out_path.exists() + assert OUT_FILE_DEFAULT.exists() def test_pre_id_transform(self): """ diff --git a/test/OmicsIntegrator2/input/oi2-edges.txt b/test/OmicsIntegrator2/input/simple/oi2-edges.txt similarity index 100% rename from test/OmicsIntegrator2/input/oi2-edges.txt rename to test/OmicsIntegrator2/input/simple/oi2-edges.txt diff --git a/test/OmicsIntegrator2/input/oi2-prizes.txt b/test/OmicsIntegrator2/input/simple/oi2-prizes.txt similarity index 100% rename from test/OmicsIntegrator2/input/oi2-prizes.txt rename to test/OmicsIntegrator2/input/simple/oi2-prizes.txt diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py index aa74cd94e..85a408689 100644 --- a/test/OmicsIntegrator2/test_oi2.py +++ b/test/OmicsIntegrator2/test_oi2.py @@ -9,10 +9,10 @@ config.init_from_file("config/config.yaml") TEST_DIR = Path('test', 'OmicsIntegrator2') -EDGE_FILE = TEST_DIR / 'input' / 'oi2-edges.txt' -PRIZE_FILE = TEST_DIR / 'input' / 'oi2-prizes.txt' OUT_FILE = TEST_DIR / 'output' / 'test.tsv' +EDGE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-edges.txt' +PRIZE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-prizes.txt' class TestOmicsIntegrator2: """