Reed-CompBio · tristan-f-r · Jul 1, 2025 · Oct 31, 2024 · Nov 8, 2024 · Nov 15, 2024
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1 @@
+FROM mcr.microsoft.com/devcontainers/anaconda:1-3
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,21 @@
+// Small devcontainer which loads anaconda. All postinstallation steps have to be done manually.
+// This comes with snakemake and docker-in-docker.
+
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
+{
+	"name": "Anaconda (Python 3)",
+	"build": { 
+		"context": "..",
+		"dockerfile": "Dockerfile"
+	},
+	"features": {
+		"ghcr.io/devcontainers/features/docker-in-docker:2": {},
+		// For yamlfmt
+		"ghcr.io/devcontainers/features/go:1": {},
+		// For web display
+		"ghcr.io/devcontainers/features/node:1": {},
+		// For scripting
+		"ghcr.io/va-h/devcontainers-features/uv:1": {}
+	}
+}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,105 @@
+name: Test SPRAS
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow one concurrent deployment
+concurrency:
+  group: 'pages'
+  cancel-in-progress: true
+
+jobs:
+  pre-commit:
+    name: Run pre-commit checks
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - name: Run pre-commit checks
+        uses: pre-commit/[email protected]
+  checks:
+    name: Run workflow
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Install uv for scripting
+        uses: astral-sh/[email protected]
+        with:
+          version: "0.7.13"
+      - name: Setup conda
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          activate-environment: spras
+          environment-file: spras/environment.yml
+          auto-activate-base: false
+          miniconda-version: 'latest'
+      # Install spras in the environment using pip
+      - name: Install spras in conda env
+        shell: bash --login {0}
+        run: pip install ./spras
+      # Log conda environment contents
+      - name: Log conda environment
+        shell: bash --login {0}
+        run: conda list
+      - name: Process raw data through Snakemake
+        run: sh run_snakemake.sh
+      - name: Run Snakemake workflow for DMMMs
+        shell: bash --login {0}
+        run: snakemake --cores 1 --configfile configs/dmmm.yaml --show-failed-logs -s spras/Snakefile
+      # TODO: re-enable PRAs once RN/synthetic data PRs are merged.
+      # - name: Run Snakemake workflow for PRAs
+      #   shell: bash --login {0}
+      #   run: snakemake --cores 1 --configfile configs/pra.yaml --show-failed-logs -s spras/Snakefile
+      - name: Setup PNPM
+        uses: pnpm/action-setup@v4
+        with:
+          version: 10
+      - name: Install web dependencies
+        working-directory: ./web
+        run: pnpm install
+      - name: Run web builder
+        working-directory: ./web
+        run: pnpm build
+      - name: Upload built website distribution folder
+        uses: actions/upload-artifact@v4
+        with:
+          name: build
+          path: web/dist
+  pages:
+    needs: checks
+    if: github.event_name != 'pull_request'
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Download Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: build
+          path: dist
+      - name: Setup Pages
+        uses: actions/configure-pages@v2
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: dist
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
@@ -14,8 +14,6 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
-lib64/
 parts/
 sdist/
 var/
@@ -155,8 +153,14 @@ dmypy.json
 cython_debug/
 
 # PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+
+# Snakemake
+.snakemake
+
+# Output
+/output
+/web/output
+
+# pnpm
+.pnpm-store
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "spras"]
+	path = spras
+	url = https://github.com/Reed-CompBio/spras
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,30 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+# See https://pre-commit.com/ for documentation
+default_language_version:
+  # Match this to the version specified in environment.yml
+  python: python3.11
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0 # Use the ref you want to point at
+    hooks:
+      # Attempts to load all yaml files to verify syntax.
+      - id: check-yaml
+      # Attempts to load all TOML files to verify syntax.
+      - id: check-toml
+      # Trims trailing whitespace.
+      - id: trailing-whitespace
+        # Preserves Markdown hard linebreaks.
+        args: [--markdown-linebreak-ext=md]
+        # Do not trim whitespace from all files, input files may need trailing whitespace for empty values in columns.
+        types_or: [markdown, python, yaml]
+        # Skip this Markdown file, which has an example of an input text file within it.
+        exclude: input/README.md
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: 'v0.0.269'
+    hooks:
+      - id: ruff
+  - repo: https://github.com/google/yamlfmt
+    rev: v0.17.0
+    hooks:
+      - id: yamlfmt
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.13
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -0,0 +1,4 @@
+{
+  "recommendations": ["astro-build.astro-vscode"],
+  "unwantedRecommendations": []
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "editor.rulers": [
+        150
+    ]
+}
diff --git a/.yamlfmt.yaml b/.yamlfmt.yaml
@@ -0,0 +1,2 @@
+formatter:
+  retain_line_breaks_single: true
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,23 @@
+# Contributing
+
+## Helping Out
+
+There are `TODOs` that better enhance the reproducability of datasets or analysis of algorithm outputs, as well as
+[open resolvable issues](https://github.com/Reed-CompBio/spras-benchmarking/).
+
+## Adding a dataset
+
+To add a dataset (see `datasets/yeast-osmotic-stress` as an example of a dataset):
+1. Check that your dataset provider isn't already added (some of these datasets act as providers for multiple datasets)
+1. Create a new folder under `datasets/<your-dataset>`
+1. Add a `raw` folder containing your data
+1. Add an attached Snakefile that converts your `raw` data to `processed` data
+1. Add your snakefile to the top-level `run_snakemake.sh` file.
+1. If your dataset is a paper reproduction, add a `reproduction/raw` and `reproduction/processed` folder
+1. Add your datasets to the appropiate `configs`
+
+## Adding an algorithm
+
+If you want to add an algorithm, refer to the [SPRAS repository](https://github.com/Reed-CompBio/SPRAS) instead.
+If you want to test your new algorithm you PRed to SPRAS, you can swap out the `spras` submodule that this repository uses
+with your fork of SPRAS.
diff --git a/README.md b/README.md
@@ -1,2 +1,30 @@
-# spras-benchmarking
-Benchmarking datasets for the [SPRAS](https://github.com/Reed-CompBio/spras) project
+# SPRAS benchmarking
+
+![example workflow](https://github.com/Reed-CompBio/spras-benchmarking/actions/workflows/publish.yml/badge.svg)
+
+Benchmarking datasets for the [SPRAS](https://github.com/Reed-CompBio/spras) project. This repository contains gold standard datasets to evaluate on as well as paper reproductions & improvements incorporating new methodologies.
+
+## Setup
+
+This repository depends on SPRAS. If you want to reproduce the results of benchmarking locally,
+you will need to setup SPRAS. SPRAS depends on [Docker](https://www.docker.com/) and [Conda](https://docs.conda.io/projects/conda/en/stable/). If it is hard to install either of these tools,
+a [devcontainer](https://containers.dev/) is available for easy setup.
+
+```sh
+conda env create -f spras/environment.yml
+conda activate spras
+pip install ./spras
+```
+
+To run the postprocess output scripts, we have a `pyproject.toml` which can be used with your desired python package manager. This separates
+the `spras` conda environment from the small scripts we have. (on CI, we use [`uv`](https://docs.astral.sh/uv/).)
+
+To run the benchmarking pipeline, use:
+
+```sh
+snakemake --cores 1 --configfile configs/dmmm.yaml --show-failed-logs -s spras/Snakefile
+```
+
+> [!NOTE]
+> Each one of the dataset categories (at the time of writing, DMMM and PRA) are split into different configuration files.
+> Run each one as you would want.
diff --git a/configs/dmmm.yaml b/configs/dmmm.yaml
@@ -0,0 +1,56 @@
+# Base Settings
+hash_length: 7
+container_framework: docker
+unpack_singularity: false
+
+container_registry:
+  base_url: docker.io
+  owner: reedcompbio
+
+reconstruction_settings:
+  locations:
+    reconstruction_dir: "output"
+    run: true
+
+analysis:
+  summary:
+    include: false
+  graphspace:
+    include: false
+  cytoscape:
+    include: false
+  ml:
+    include: true
+    aggregate_per_algorithm: true
+  evaluation:
+    include: false
+
+# Custom settings
+algorithms:
+  - name: "omicsintegrator1"
+    params:
+      include: true
+      run1:
+        b: [2]
+        w: [.5]
+        d: [10]
+        mu: [2]
+  - name: "omicsintegrator2"
+    params:
+      include: true
+      run1:
+        b: [4]
+        g: [0]
+
+datasets:
+  - label: dmmmhiv060
+    node_files: ["processed_prize_060.txt"]
+    edge_files: ["phosphosite-irefindex13.0-uniprot.txt"]
+    # Placeholder
+    other_files: []
+    data_dir: "datasets/hiv/processed"
+  - label: dmmmhiv05
+    node_files: ["processed_prize_05.txt"]
+    edge_files: ["phosphosite-irefindex13.0-uniprot.txt"]
+    other_files: []
+    data_dir: "datasets/hiv/processed"
diff --git a/configs/pra.yaml b/configs/pra.yaml
@@ -0,0 +1,62 @@
+# Base Settings
+# TODO: (same for dmmm.yaml): can we deduplicate this using snakemake?
+hash_length: 7
+container_framework: docker
+unpack_singularity: false
+
+container_registry:
+  base_url: docker.io
+  owner: reedcompbio
+
+reconstruction_settings:
+  locations:
+    reconstruction_dir: "output"
+    run: true
+
+analysis:
+  summary:
+    include: false
+  graphspace:
+    include: false
+  cytoscape:
+    include: false
+  ml:
+    include: true
+    aggregate_per_algorithm: true
+  evaluation:
+    include: false
+
+# Custom settings
+algorithms:
+  - name: "omicsintegrator1"
+    params:
+      include: true
+      run1:
+        b: [2]
+        w: [.5]
+        d: [10]
+        mu: [2]
+  - name: "omicsintegrator2"
+    params:
+      include: true
+      run1:
+        b: [4]
+        g: [0]
+  - name: "pathlinker"
+    params:
+      include: true
+      run1:
+        k: [10, 20]
+  - name: "allpairs"
+    params:
+      include: true
+
+datasets:
+  - label: pramuscleskeletal2018
+    node_files: ["sources.txt", "targets.txt"]
+    # DataLoader.py can currently only load a single edge file, which is the primary network
+    edge_files: ["interactome.tsv"]
+    # Placeholder
+    other_files: []
+    # Relative path from the spras directory
+    data_dir: "datasets/rn-muscle-skeletal/processed"
diff --git a/datasets/hiv/.gitignore b/datasets/hiv/.gitignore
@@ -0,0 +1 @@
+processed
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		FROM mcr.microsoft.com/devcontainers/anaconda:1-3