Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/prms/domino.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ DOMINO has two optional parameters:

* slice_threshold: the p-value threshold for considering a slice as relevant
* module_threshold: the p-value threshold for considering a putative module as final module

Wrapper Details
===============

If the input dataframe is empty or too 'small' (where no modules are found),
SPRAS will instead emit an empty output file.
67 changes: 58 additions & 9 deletions spras/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import platform
import re
import subprocess
import textwrap
from pathlib import Path, PurePath, PurePosixPath
from typing import Iterator, List, Optional, Tuple, Union

Expand Down Expand Up @@ -122,6 +123,49 @@ def prepare_dsub_cmd(flags: dict[str, str | list[str]]):
print(f"dsub command: {dsub_command}")
return dsub_command

class ContainerError(RuntimeError):
"""Raises when anything goes wrong inside a container"""

error_code: int
stdout: Optional[str]
stderr: Optional[str]

def __init__(self, message: str, error_code: int, stdout: Optional[str], stderr: Optional[str], *args):
"""
Constructs a new ContainerError.

@param message: The message to display to the user. This should usually refer to the indent call to differentiate between
general logging done by Snakemake/logging calls.
@param error_code: Also known as exit status; this should generally be non-zero for ContainerErrors.
@param stdout: The standard output stream. If the origin of the stream is unknown, leave it in stdout.
@param stderr: The standard error stream.
"""

# https://stackoverflow.com/a/26938914/7589775
self.message = message

self.error_code = error_code
self.stdout = stdout
self.stderr = stderr

super(ContainerError, self).__init__(message, error_code, stdout, stderr, *args)

def streams_contain(self, needle: str):
"""
Checks (with case sensitivity)
if any of the stdout/err streams have the provided needle.
"""
stdout = self.stdout if self.stdout else ''
stderr = self.stderr if self.stderr else ''

return (needle in stdout) or (needle in stderr)

# Due to
# https://github.com/snakemake/snakemake/blob/d4890b4da691506b6a258f7534ac41fdb7ef5ab4/src/snakemake/exceptions.py#L18
# this overrides the tostr implementation to have nicer container errors
def __str__(self):
return self.message

def env_to_items(environment: dict[str, str]) -> Iterator[str]:
"""
Turns an environment variable dictionary to KEY=VALUE pairs.
Expand Down Expand Up @@ -183,22 +227,27 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm
if 'message' in out:
# This is the format of a singularity message.
# See https://singularityhub.github.io/singularity-cli/api/source/spython.main.html?highlight=execute#spython.main.execute.execute.
if 'return_code' in out and not out['return_code'] == 0:
print(f"(Program exited with non-zero exit code '{out['return_code']}')")
exit_status = int(out['return_code']) if 'return_code' in out else 0
out = ''.join(out['message'])
if exit_status != 0:
message = f'An unexpected non-zero exit status ({exit_status}) occurred while running this singularity container:\n' + indent(out)
raise ContainerError(message, exit_status, out, None)
else:
print("Note: This is an unknown message format - if you want this pretty printed, please file an issue at https://github.com/Reed-CompBio/spras/issues/new.")
print("Note: The following output is an unknown message format which should be properly handled.")
print("Please file an issue at https://github.com/Reed-CompBio/spras/issues/new with this output.")
out = str(out)
elif not isinstance(out, str):
out = str(out, "utf-8")
print(indent(out))
except docker.errors.ContainerError as err:
print(f"(Command formatted as list: `{err.command}`)")
print(f"An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:")
err = str(err.stderr if err.stderr is not None else "", "utf-8")
print(indent(err))
except Exception as err:
raise err
stdout = str(err.container.logs(stdout=True, stderr=False), 'utf-8')
stderr = str(err.container.logs(stdout=False, stderr=True), 'utf-8')

message = textwrap.dedent(f'''\
(Command formatted as list: `{err.command}`)
An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:\n''') + indent(stderr)
# We retrieved all of the information from docker.errors.ContainerError, so here, we ignore the original error.
raise ContainerError(message, err.exit_status, stdout, stderr) from None

# TODO any issue with creating a new client each time inside this function?
def run_container_docker(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None, network_disabled=False):
Expand Down
54 changes: 37 additions & 17 deletions spras/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

from spras.containers import prepare_volume, run_container_and_log
from spras.containers import ContainerError, prepare_volume, run_container_and_log
from spras.interactome import (
add_constant,
reinsert_direction_col_undirected,
Expand Down Expand Up @@ -112,13 +112,21 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None,
'--output_file', mapped_slices_file]

container_suffix = "domino"
run_container_and_log('slicer',
container_framework,
container_suffix,
slicer_command,
volumes,
work_dir,
out_dir)
try:
run_container_and_log('slicer',
container_framework,
container_suffix,
slicer_command,
volumes,
work_dir,
out_dir)
except ContainerError as err:
# Occurs when DOMINO gets passed some empty dataframe from network_file.
# This counts as an empty input, so we return an empty output.
if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"):
pass
else:
raise err

# Make the Python command to run within the container
domino_command = ['domino',
Expand All @@ -137,14 +145,26 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None,
if module_threshold is not None:
domino_command.extend(['--module_threshold', str(module_threshold)])

run_container_and_log(
'DOMINO',
container_framework,
container_suffix,
domino_command,
volumes,
work_dir,
out_dir)
try:
run_container_and_log('DOMINO',
container_framework,
container_suffix,
domino_command,
volumes,
work_dir,
out_dir)
except ContainerError as err:
# Occurs when DOMINO gets passed some empty dataframe from network_file.
# This counts as an empty input, so we return an empty output.
if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"):
pass
# Here, DOMINO
# still outputs to our output folder with a still viable HTML output.
# https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958
elif err.streams_contain("ValueError: cannot apply union_all to an empty list"):
pass
else:
raise err

# DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes
# The filename is determined by the input active_genes and cannot be configured
Expand All @@ -160,7 +180,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None,
# Clean up DOMINO intermediate and pickle files
slices_file.unlink(missing_ok=True)
Path(out_dir, 'network.slices.pkl').unlink(missing_ok=True)
Path(network + '.pkl').unlink(missing_ok=True)
Path(str(network) + '.pkl').unlink(missing_ok=True)

@staticmethod
def parse_output(raw_pathway_file, standardized_pathway_file, params):
Expand Down
2 changes: 1 addition & 1 deletion spras/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from spras.containers import prepare_volume, run_container_and_log
from spras.containers import ContainerError, prepare_volume, run_container_and_log
from spras.dataset import Dataset
from spras.interactome import reinsert_direction_col_undirected
from spras.prm import PRM
Expand Down
Empty file.
Empty file.
48 changes: 28 additions & 20 deletions test/DOMINO/test_domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

config.init_from_file("config/config.yaml")

TEST_DIR = 'test/DOMINO/'
OUT_FILE_DEFAULT = TEST_DIR+'output/domino-output.txt'
OUT_FILE_OPTIONAL = TEST_DIR+'output/domino-output-thresholds.txt'
TEST_DIR = Path('test', 'DOMINO')
OUT_FILE_DEFAULT = TEST_DIR / 'output' / 'domino-output.txt'
OUT_FILE_OPTIONAL = TEST_DIR / 'output' / 'domino-output-thresholds.txt'


class TestDOMINO:
Expand All @@ -25,57 +25,65 @@ class TestDOMINO:

def test_domino_required(self):
# Only include required arguments
out_path = Path(OUT_FILE_DEFAULT)
out_path.unlink(missing_ok=True)
OUT_FILE_DEFAULT.unlink(missing_ok=True)
DOMINO.run(
network=TEST_DIR+'input/domino-network.txt',
active_genes=TEST_DIR+'input/domino-active-genes.txt',
network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt',
active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt',
output_file=OUT_FILE_DEFAULT)
# output_file should be empty
assert out_path.exists()
assert OUT_FILE_DEFAULT.exists()

def test_domino_optional(self):
# Include optional arguments
out_path = Path(OUT_FILE_OPTIONAL)
out_path.unlink(missing_ok=True)
OUT_FILE_OPTIONAL.unlink(missing_ok=True)
DOMINO.run(
network=TEST_DIR+'input/domino-network.txt',
active_genes=TEST_DIR+'input/domino-active-genes.txt',
network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt',
active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt',
output_file=OUT_FILE_OPTIONAL,
slice_threshold=0.4,
module_threshold=0.06)
# output_file should be empty
assert out_path.exists()
assert OUT_FILE_OPTIONAL.exists()

def test_domino_missing_active_genes(self):
# Test the expected error is raised when active_genes argument is missing
with pytest.raises(ValueError):
# No active_genes
DOMINO.run(
network=TEST_DIR+'input/domino-network.txt',
network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt',
output_file=OUT_FILE_DEFAULT)

def test_domino_missing_network(self):
# Test the expected error is raised when network argument is missing
with pytest.raises(ValueError):
# No network
DOMINO.run(
active_genes=TEST_DIR+'input/domino-active-genes.txt',
active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt',
output_file=OUT_FILE_DEFAULT)

def test_domino_empty(self):
# Test over empty files
# https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958
OUT_FILE_DEFAULT.unlink(missing_ok=True)
DOMINO.run(
network=TEST_DIR / 'input' / 'empty' / 'domino-network.txt',
active_genes=TEST_DIR / 'input' / 'empty' / 'domino-active-genes.txt',
output_file=OUT_FILE_DEFAULT)
assert OUT_FILE_DEFAULT.exists()


# Only run Singularity test if the binary is available on the system
# spython is only available on Unix, but do not explicitly skip non-Unix platforms
@pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system')
def test_domino_singularity(self):
out_path = Path(OUT_FILE_DEFAULT)
out_path.unlink(missing_ok=True)
OUT_FILE_DEFAULT.unlink(missing_ok=True)
# Only include required arguments and run with Singularity
DOMINO.run(
network=TEST_DIR+'input/domino-network.txt',
active_genes=TEST_DIR+'input/domino-active-genes.txt',
network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt',
active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt',
output_file=OUT_FILE_DEFAULT,
container_framework="singularity")
assert out_path.exists()
assert OUT_FILE_DEFAULT.exists()

def test_pre_id_transform(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions test/OmicsIntegrator2/test_oi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
config.init_from_file("config/config.yaml")

TEST_DIR = Path('test', 'OmicsIntegrator2')
EDGE_FILE = TEST_DIR / 'input' / 'oi2-edges.txt'
PRIZE_FILE = TEST_DIR / 'input' / 'oi2-prizes.txt'
OUT_FILE = TEST_DIR / 'output' / 'test.tsv'

EDGE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-edges.txt'
PRIZE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-prizes.txt'

class TestOmicsIntegrator2:
"""
Expand Down
Loading