Merge pull request #49 from nf-core/local-module-proseg

heylf · web-flow · commit 10bec305b92b · 2025-01-28T11:03:45.000+01:00
Local module proseg
diff --git a/modules/local/proseg/main.nf b/modules/local/proseg/main.nf
@@ -0,0 +1,82 @@
+process PROSEG {
+    tag "$meta.id"
+    label 'process_high'
+
+    container "nf-core/proseg:1.1.8"
+
+    input:
+    tuple val(meta), path(transcripts)
+
+    output:
+    tuple val(meta), path("cell-polygons.geojson.gz"), emit: cell_polygons_2d
+    path("expected-counts.csv.gz"), emit: expected_counts
+    path("cell-metadata.csv.gz"), emit: cell_metadata
+    path("transcript-metadata.csv.gz"), emit: transcript_metadata
+    path("gene-metadata.csv.gz"), emit: gene_metadata
+    path("rates.csv.gz"), emit: rates
+    path("cell-polygons-layers.geojson.gz"), emit:  cell_polygons_layers
+    path("cell-hulls.geojson.gz"), emit: cell_hulls
+    path("versions.yml"), emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def platform = preset ? "${params.preset}" : ""
+
+    // check for preset values
+    if (!(platform in ['xenium', 'cosmx', 'merscope'])) {
+        error "${platform} is an invalid platform (preset) type. Please specify xenium, cosmx, or merscope"
+    }
+
+    """
+    proseg \\
+        --${preset} \\
+        ${transcripts} \\
+        --nthreads ${task.cpus} \\
+        --output-expected-counts expected-counts.csv.gz \\
+        --output-cell-metadata cell-metadata.csv.gz \\
+        --output-transcript-metadata transcript-metadata.csv.gz \\
+        --output-gene-metadata gene-metadata.csv.gz \\
+        --output-rates rates.csv.gz \\
+        --output-cell-polygons cell-polygons.geojson.gz \\
+        --output-cell-polygon-layers cell-polygons-layers.geojson.gz \\
+        --output-cell-hulls cell-hulls.geojson.gz \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        proseg: \$(proseg --version | sed 's/proseg //')
+    END_VERSIONS
+    """
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    touch expected-counts.csv.gz
+    touch cell-metadata.csv.gz
+    touch transcript-metadata.csv.gz
+    touch gene-metadata.csv.gz
+    touch rates.csv.gz
+    touch cell-polygons.geojson.gz
+    touch cell-polygons-layers.geojson.gz
+    touch cell-hulls.geojson.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        proseg: \$(proseg --version | sed 's/proseg //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/proseg/meta.yml b/modules/local/proseg/meta.yml
@@ -0,0 +1,76 @@
+name: "proseg"
+description: Probabilistic cell segmentation for in situ spatial transcriptomics
+keywords:
+  - segmentation
+  - cell segmentation
+  - spatialomics
+  - probabilistic segmentation
+  - in situ spatial transcriptomics
+tools:
+  - "proseg":
+      description: "Proseg (probabilistic segmentation) is a cell segmentation method for in situ spatial transcriptomics. Xenium, CosMx, and MERSCOPE platforms are currently supported."
+      homepage: "https://github.com/dcjones/proseg/tree/main"
+      documentation: "https://github.com/dcjones/proseg/blob/main/README.md"
+      tool_dev_url: "https://github.com/dcjones/proseg"
+      doi: ""
+      licence: ["GNU Public License"]
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing run information
+          e.g. `[ id:'run_id']`
+    - transcripts:
+        type: file
+        description: |
+          File containing the transcript position
+        pattern: "transcripts.csv.gz"
+
+output:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing run information
+          e.g. `[ id:'run_id']`
+    - cell_polygons:
+        type: file
+        description: 2D polygons for each cell in GeoJSON format. These are flattened from 3D
+        pattern: "cell-polygons.geojson.gz"
+  - - expected_counts:
+        type: file
+        description: cell-by-gene count matrix
+        pattern: "expected-counts.csv.gz"
+  - - cell_metadata:
+        type: file
+        description: Cell centroids, volume, and other information
+        pattern: "cell-metadata.csv.gz"
+  - - transcript_metadata:
+        type: file
+        description: Transcript ids, genes, revised positions, assignment probability
+        pattern: "transcript-metadata.csv.gz"
+  - - gene_metadata:
+        type: file
+        description: Per-gene summary statistics
+        pattern: "gene-metadata.csv.gz"
+  - - rates:
+        type: file
+        description: Cell-by-gene Poisson rate parameters
+        pattern: "rates.csv.gz"
+  - - cell_polygon_layers:
+        type: file
+        description: A separate, non-overlapping cell polygon for each z-layer, preserving 3D segmentation
+        pattern: "cell-polygons-layers.geojson.gz"
+  - - cell_hulls:
+        type: file
+        description: Convex hulls around assigned transcripts
+        pattern: "cell-hulls.geojson.gz"
+  - - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@khersameesh24"
+maintainers:
+  - "@khersameesh24"
diff --git a/modules/local/proseg/preprocess/main.nf b/modules/local/proseg/preprocess/main.nf
@@ -0,0 +1,53 @@
+process PROSEG2BAYSOR {
+    tag "$meta.id"
+    label 'process_high'
+
+    container "nf-core/proseg:1.1.8"
+
+    input:
+    path(transcript_metadata)
+    path(cell_polygons)
+
+    output:
+    path("xr-transcript-metadata.csv"), emit: xr_metadata
+    path("xr-cell-polygons.geojson"), emit: xr_polygons
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "PROSEG2BAYSOR (preprocess) module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+
+    """
+    proseg-to-baysor  \
+        ${transcript_metadata} \
+        ${cell_polygons} \
+        --output-transcript-metadata xr-transcript-metadata.csv \
+        --output-cell-polygons xr-cell-polygons.geojson
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        proseg: \$(proseg --version | sed 's/proseg //')
+    END_VERSIONS
+
+    """
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "PROSEG module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    touch xr-transcript-metadata.csv
+    touch xr-cell-polygons.geojson
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        proseg: \$(proseg --version | sed 's/proseg //')
+    END_VERSIONS
+    """
+}
+
diff --git a/modules/local/proseg/tests/main.nf.test b/modules/local/proseg/tests/main.nf.test
@@ -0,0 +1,77 @@
+nextflow_process {
+
+    name "Test Process PROSEG"
+    script "../main.nf"
+    process "PROSEG"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "proseg"
+    tag "segmentation"
+    tag "cell_segmentation"
+
+
+    setup {
+            run("UNZIP") {
+                script "modules/nf-core/unzip/main.nf"
+                process {
+                    """
+                    input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)]
+                    """
+                }
+            }
+    }
+
+    test("proseg - transcripts.csv") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                            [id: "test_run_proseg"],
+                        ]).combine(UNZIP.out.unzipped_archive.map { it[1] } + "/transcripts.csv")
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+                { assert file(process.out.expected_counts.get(0).get(1).find { file(it).name == 'expected-counts.csv.gz' }).exists() },
+                { assert file(process.out.cell_metadata.get(0).get(1).find { file(it).name == 'cell-metadata.csv.gz' }).exists() },
+                { assert file(process.out.transcript_metadata.get(0).get(1).find { file(it).name == 'transcript-metadata.csv.gz' }).exists() },
+                { assert file(process.out.gene_metadata.get(0).get(1).find { file(it).name == 'gene-metadata.csv.gz' }).exists() },
+                { assert file(process.out.rates.get(0).get(1).find { file(it).name == 'rates.csv.gz' }).exists() },
+                { assert file(process.out.cell_polygons.get(0).get(1).find { file(it).name == 'cell-polygons.geojson.gz' }).exists() },
+                { assert file(process.out.cell_polygons_layers.get(0).get(1).find { file(it).name == 'cell-polygons-layers.geojson.gz' }).exists() },
+                { assert file(process.out.cell-hulls.get(0).get(1).find { file(it).name == 'cell-hulls.geojson.gz' }).exists() },
+            )
+        }
+
+    }
+
+    test("proseg stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                            [id: "test_run_proseg"],
+                        ]).combine(UNZIP.out.unzipped_archive.map { it[1] } + "/transcripts.csv")
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/local/proseg/tests/tags.yml b/modules/local/proseg/tests/tags.yml
@@ -0,0 +1,2 @@
+proseg:
+  - "modules/nf-core/proseg/**"

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+proseg:`
	`2`	`+ - "modules/nf-core/proseg/**"`