Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from spras.dataset import Dataset
from spras.evaluation import Evaluation
from spras.analysis import ml, summary, cytoscape
import spras.config.config as _config
from spras.util import extend_filename

# Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
# and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
Expand Down Expand Up @@ -209,7 +210,9 @@ checkpoint prepare_input:
# Use the algorithm's generate_inputs function to load the merged dataset, extract the relevant columns,
# and write the output files specified by required_inputs
# The filename_map provides the output file path for each required input file type
filename_map = {input_type: SEP.join([out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs', f'{input_type}.txt']) for input_type in runner.get_required_inputs(wildcards.algorithm)}
filename_map = {input_type: SEP.join(
[out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs', extend_filename(input_type)]
) for input_type in runner.get_required_inputs(wildcards.algorithm)}
runner.prepare_inputs(wildcards.algorithm, input.dataset_file, filename_map)

# Collect the prepared input files from the specified directory
Expand All @@ -227,7 +230,7 @@ def collect_prepared_input(wildcards):
prepared_dir = SEP.join([out_dir, 'prepared', f'{wildcards.dataset}-{wildcards.algorithm}-inputs'])

# Construct the list of expected prepared input files for the reconstruction algorithm
prepared_inputs = expand(f'{prepared_dir}{SEP}{{type}}.txt',type=runner.get_required_inputs(algorithm=wildcards.algorithm))
prepared_inputs = expand(f'{prepared_dir}{SEP}{{type}}',type=map(extend_filename, runner.get_required_inputs(algorithm=wildcards.algorithm)))
# If the directory is missing, do nothing because the missing output triggers running prepare_input
if os.path.isdir(prepared_dir):
# First, check if .snakemake_timestamp, the last written file in a directory rule,
Expand Down
2 changes: 1 addition & 1 deletion _typos.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[type.txt]
# Ignore data files
extend-glob = ["*.txt"]
extend-glob = ["*.txt", "*.sif"]
check-file = false
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
From 49b7580db0700980b8e8c8ce3777165ab56a31c2 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <[email protected]>
Date: Tue, 27 May 2025 13:56:32 -0700
Subject: [PATCH 1/2] fix: split runner domino and runner slice

before this, it was only possible to install DOMINO through pypi, because the setup.py script has a broken dependency list.
---
src/runner.py | 1 -
src/runner_slice.py | 71 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+), 1 deletion(-)
create mode 100644 src/runner_slice.py

diff --git a/src/runner.py b/src/runner.py
index a8e5ce5..7a504ab 100755
--- a/src/runner.py
+++ b/src/runner.py
@@ -68,5 +68,4 @@ def main_slicer():


if __name__=="__main__":
- main_slicer()
main_domino()
diff --git a/src/runner_slice.py b/src/runner_slice.py
new file mode 100644
index 0000000..a00130b
--- /dev/null
+++ b/src/runner_slice.py
@@ -0,0 +1,71 @@
+import argparse
+import os
+from src.core.domino import main as domino_main
+from src.core.preprocess_slices import create_slices
+from src.utils.visualize_modules import visualize_modules
+import src.constants as constants
+def main_domino():
+
+ parser = argparse.ArgumentParser(description='DOMINO: An active module identification algorithm with reduce rate of false.\n NOTE YOU SHOULD RUN THE SLICES SCRIPT FIRST! (more info, type slicer -h) \n Example input files are available @ https://github.com/Shamir-Lab/DOMINO/tree/master/examples')
+ parser.add_argument('-a', '--active_genes_files', dest='active_genes_files', help='Comma delimited list of absolute paths to files, each containing a list of active genes, separated by a new line char (\\n). e.g. /path/to/active_genes_files_1,/path/to/active_genes_files_2.', default="examples/tnfa_active_genes_file.txt")
+ parser.add_argument('-n', '--network_file', dest='network_file', help='A path to network file (sif format). e.g. /path/to/network_file.sif', default="examples/huri.sif")
+ parser.add_argument('-s', '--slices_file', dest='slices_file', help='A path to slices file (i.e. the output of "slicer" script). e.g., /path/to/slices_file.txt', default="examples/huri_slices.txt")
+ parser.add_argument('-o', '--output_folder', dest='output_folder', help='A folder where output files will be written e.g., /path/to/output', default="examples/output")
+ parser.add_argument('-c', '--use_cache', dest='use_cache', help='Use auto-generated cache network files (*.pkl) from previous executions with the same network. NOTE: (1) THIS IS NOT THE SLICES FILE! (2) If the content of the file has changed, you should set this option to "false"', default="true")
+ parser.add_argument('-p', '--parallelization', dest='parallelization', help='The number of threads allocated to the run (usually single thread is enough)', default="1")
+ parser.add_argument('-v', '--visualization', dest='visualization', help='Indicates whether a visualization of the modules ought to be generated', default="true")
+ parser.add_argument('-sth', '--slice_threshold', dest='slice_threshold', default="0.3", help='The threshold for considering a slice as relevant')
+ parser.add_argument('-mth', '--module_threshold', dest='module_threshold', default="0.05", help='The threshold for considering a putative module as final module')
+
+
+ args = parser.parse_args()
+ active_genes_files = args.active_genes_files.split(",")
+ output_folder = args.output_folder
+ network_file = args.network_file
+ slices_file = args.slices_file
+ slice_threshold = float(args.slice_threshold)
+ module_threshold = float(args.module_threshold)
+ use_cache = args.use_cache=="true"
+ parallelization = int(args.parallelization)
+ visualization = args.visualization=="true"
+
+ constants.N_OF_THREADS=parallelization
+ constants.USE_CACHE=use_cache
+
+ for cur_ag in active_genes_files:
+ G_final_modules=domino_main(active_genes_file=cur_ag, network_file=network_file, slices_file=slices_file, slice_threshold=slice_threshold, module_threshold=module_threshold)
+ activity_name=os.path.splitext(os.path.split(cur_ag)[-1])[0]
+ report_folder=os.path.join(output_folder,activity_name)
+ try:
+ os.makedirs(report_folder)
+ except:
+ pass
+
+ out_file=os.path.join(report_folder, "modules.out")
+ if len(G_final_modules) !=0:
+ open(out_file, 'w+').write("\n".join(['[%s]' % ', '.join(list(m.nodes)) for m in G_final_modules])+"\n")
+ else:
+ open(out_file, 'w+').write("")
+
+ print(f'{len(G_final_modules)} final modules are reported at {out_file}')
+ print(visualization)
+ if visualization:
+ visualize_modules(os.path.splitext(cur_ag.split('/')[-1])[0], G_final_modules, None, network_file, report_folder)
+
+def main_slicer():
+
+ parser = argparse.ArgumentParser(description='Slicer for DOMINO (step #0): A preprocessing step for the network')
+ parser.add_argument('-n', '--network_file', dest='network_file', help='A path to network file (sif format). e.g. /path/to/network_file.sif', default="examples/huri.sif")
+ parser.add_argument('-o', '--output_file', dest='output_file', default="examples/huri.sif", help='A path to the output slices file. e.g., /path/to/output/slices_file.txt')
+
+
+ args = parser.parse_args()
+ network_file = args.network_file
+ output_file = args.output_file
+ create_slices(network_file, output_file)
+
+
+
+
+if __name__=="__main__":
+ main_slicer()
--
2.47.0

98 changes: 98 additions & 0 deletions docker-wrappers/DOMINO/0002-fix-update-imports.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
From 9781c1e6c6b884f3666f3ade47d584dd7a2b50fe Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <[email protected]>
Date: Tue, 27 May 2025 14:40:05 -0700
Subject: [PATCH 2/2] fix: update imports

since we are no longer using venv, we need to update all of our imports.
---
src/core/domino.py | 8 ++++----
src/runner.py | 8 ++++----
src/runner_slice.py | 8 ++++----
src/utils/ensembl2gene_symbol.py | 2 +-
src/utils/visualize_modules.py | 4 ++--
5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/core/domino.py b/src/core/domino.py
index 89c3e87..d4e1663 100644
--- a/src/core/domino.py
+++ b/src/core/domino.py
@@ -18,10 +18,10 @@ from networkx.algorithms.community.centrality import girvan_newman
from networkx.algorithms.components import connected_components

from functools import reduce
-from src.utils.graph_influence_linear_th import linear_threshold
-from src.core.preprocess_slices import read_preprocessed_slices
-from src.core.network_builder import build_network
-import src.constants as constants
+from utils.graph_influence_linear_th import linear_threshold
+from core.preprocess_slices import read_preprocessed_slices
+from core.network_builder import build_network
+import constants as constants

G_modularity = None

diff --git a/src/runner.py b/src/runner.py
index 7a504ab..955e465 100755
--- a/src/runner.py
+++ b/src/runner.py
@@ -1,9 +1,9 @@
import argparse
import os
-from src.core.domino import main as domino_main
-from src.core.preprocess_slices import create_slices
-from src.utils.visualize_modules import visualize_modules
-import src.constants as constants
+from core.domino import main as domino_main
+from core.preprocess_slices import create_slices
+from utils.visualize_modules import visualize_modules
+import constants as constants
def main_domino():

parser = argparse.ArgumentParser(description='DOMINO: An active module identification algorithm with reduce rate of false.\n NOTE YOU SHOULD RUN THE SLICES SCRIPT FIRST! (more info, type slicer -h) \n Example input files are available @ https://github.com/Shamir-Lab/DOMINO/tree/master/examples')
diff --git a/src/runner_slice.py b/src/runner_slice.py
index a00130b..3ea8e04 100644
--- a/src/runner_slice.py
+++ b/src/runner_slice.py
@@ -1,9 +1,9 @@
import argparse
import os
-from src.core.domino import main as domino_main
-from src.core.preprocess_slices import create_slices
-from src.utils.visualize_modules import visualize_modules
-import src.constants as constants
+from core.domino import main as domino_main
+from core.preprocess_slices import create_slices
+from utils.visualize_modules import visualize_modules
+import constants as constants
def main_domino():

parser = argparse.ArgumentParser(description='DOMINO: An active module identification algorithm with reduce rate of false.\n NOTE YOU SHOULD RUN THE SLICES SCRIPT FIRST! (more info, type slicer -h) \n Example input files are available @ https://github.com/Shamir-Lab/DOMINO/tree/master/examples')
diff --git a/src/utils/ensembl2gene_symbol.py b/src/utils/ensembl2gene_symbol.py
index 72d395a..62ae9d2 100755
--- a/src/utils/ensembl2gene_symbol.py
+++ b/src/utils/ensembl2gene_symbol.py
@@ -1,4 +1,4 @@
-import src.constants as constants
+import constants as constants
import os
g2e_dict = None
e2g_dict = None
diff --git a/src/utils/visualize_modules.py b/src/utils/visualize_modules.py
index aecc29f..c27587b 100755
--- a/src/utils/visualize_modules.py
+++ b/src/utils/visualize_modules.py
@@ -9,8 +9,8 @@ import json
import pandas as pd

-from src import constants
-from src.utils.scripts import format_script
-from src.utils.ensembl2gene_symbol import e2g_convertor
+import constants
+from utils.scripts import format_script
+from utils.ensembl2gene_symbol import e2g_convertor
import zipfile

import multiprocessing
--
2.47.0

20 changes: 13 additions & 7 deletions docker-wrappers/DOMINO/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
# DOMINO wrapper
# https://github.com/Shamir-Lab/DOMINO
FROM python:3.7
FROM python:3.8.20-bullseye

RUN pip install domino-python==0.1.1
COPY requirements.txt .

# DOMINO requires data files in hard-coded locations
RUN cd /usr/local/lib/python3.7/site-packages/src/data && \
wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensg2gene_symbol.txt && \
wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/ensmusg2gene_symbol.txt && \
wget https://raw.githubusercontent.com/Shamir-Lab/DOMINO/master/src/data/graph.html.format
RUN pip install -r requirements.txt

COPY *.patch .

RUN git clone https://github.com/Shamir-Lab/DOMINO/ && \
cd /DOMINO && \
git reset --hard 85dad1515717b425b17f58f92b13a063ccccb85d && \
git config user.email "[email protected]" && \
git config user.name "Non-existent User" && \
# https://stackoverflow.com/a/4832785/7589775
git apply --ignore-space-change --ignore-whitespace --verbose /*.patch
7 changes: 7 additions & 0 deletions docker-wrappers/DOMINO/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
networkx==2.4
numpy==1.22.0
scipy==1.10.0
pandas==1.5.1
pcst-fast==1.0.7
statsmodels==0.11.0
python-louvain==0.14
18 changes: 9 additions & 9 deletions spras/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class DominoParams(BaseModel):
- it can include repeated and bidirectional edges
"""
class DOMINO(PRM[DominoParams]):
required_inputs = ['network', 'active_genes']
required_inputs = ['network.sif', 'active_genes']
dois = ["10.15252/msb.20209593"]

@staticmethod
Expand Down Expand Up @@ -79,7 +79,7 @@ def generate_inputs(data, filename_map):
edges_df['Interactor1'] = edges_df['Interactor1'].apply(pre_domino_id_transform)
edges_df['Interactor2'] = edges_df['Interactor2'].apply(pre_domino_id_transform)

edges_df.to_csv(filename_map['network'], sep='\t', index=False, columns=['Interactor1', 'ppi', 'Interactor2'],
edges_df.to_csv(filename_map['network.sif'], sep='\t', index=False, columns=['Interactor1', 'ppi', 'Interactor2'],
header=['ID_interactor_A', 'ppi', 'ID_interactor_B'])

@staticmethod
Expand All @@ -93,7 +93,7 @@ def run(inputs, output_file, args=None, container_settings=None):
# Each volume is a tuple (source, destination)
volumes = list()

bind_path, network_file = prepare_volume(inputs["network"], work_dir, container_settings)
bind_path, network_file = prepare_volume(inputs["network.sif"], work_dir, container_settings)
volumes.append(bind_path)

bind_path, node_file = prepare_volume(inputs["active_genes"], work_dir, container_settings)
Expand All @@ -109,11 +109,11 @@ def run(inputs, output_file, args=None, container_settings=None):
volumes.append(bind_path)

# Make the Python command to run within the container
slicer_command = ['slicer',
slicer_command = ['python', '/DOMINO/src/runner_slice.py',
'--network_file', network_file,
'--output_file', mapped_slices_file]

container_suffix = "domino"
container_suffix = "domino:latest"
try:
run_container_and_log('slicer',
container_suffix,
Expand All @@ -131,8 +131,7 @@ def run(inputs, output_file, args=None, container_settings=None):
raise err

# Make the Python command to run within the container
# Let visualization be always true, parallelization be always 1 thread, and use_cache be always false.
domino_command = ['domino',
domino_command = ['python', '/DOMINO/src/runner.py',
'--active_genes_files', node_file,
'--network_file', network_file,
'--slices_file', mapped_slices_file,
Expand Down Expand Up @@ -183,7 +182,7 @@ def run(inputs, output_file, args=None, container_settings=None):
# Clean up DOMINO intermediate and pickle files
slices_file.unlink(missing_ok=True)
Path(out_dir, 'network.slices.pkl').unlink(missing_ok=True)
Path(str(inputs['network']) + '.pkl').unlink(missing_ok=True)
Path(str(inputs['network.sif']) + '.pkl').unlink(missing_ok=True)

@staticmethod
def parse_output(raw_pathway_file, standardized_pathway_file, params):
Expand Down Expand Up @@ -242,7 +241,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file, params):

def pre_domino_id_transform(node_id):
"""
DOMINO requires module edges to have the 'ENSG0' string as a prefix for visualization.
DOMINO requires module edges to have the 'ENSG0' string (Ensemble format)
as a prefix for visualization.
Prepend each node id with this ID_PREFIX.
@param node_id: the node id to transform
@return the node id with the prefix added
Expand Down
10 changes: 10 additions & 0 deletions spras/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,13 @@ def duplicate_edges(df: pd.DataFrame) -> tuple[pd.DataFrame, bool]:
unique_edges_df = df_sorted.drop_duplicates(subset=["Node1", "Node2", "Direction"], keep="first", ignore_index=True)

return unique_edges_df, not unique_edges_df.equals(df)

# https://stackoverflow.com/a/49689414/7589775
def extend_filename(file_name: str, extension=".txt") -> str:
"""
Adds a default file extension if none is provided.
"""
root, ext = os.path.splitext(file_name)
if not ext:
ext = extension
return f'{root}{ext}'
Loading
Loading