nf-core · nictru · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config
diff --git a/conf/modules.config b/conf/modules.config
@@ -524,6 +524,17 @@ process {
         ]
     }
 
+    withName: BLACKLIST {
+        ext.args   = { "-v" }
+        ext.prefix = { "${meta.id}_${meta.tool}" }
+        ext.suffix = "blacklist.bed"
+        publishDir = [
+            path: { "${params.outdir}/3_bsj_detection/tools/${meta.tool}/blacklist" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+        ]
+    }
+
     withName: FILTER_BSJS {
         ext.args   = { "-v FS='\\t' -v OFS='\\t' '{ if (\$5 >= ${params.bsj_reads}) { print } }'" }
         ext.suffix = { "${meta.tool}.filtered.bed" }

diff --git a/conf/test_igenomes.config b/conf/test_igenomes.config
diff --git a/main.nf b/main.nf
@@ -27,13 +27,14 @@ include { getGenomeAttribute      } from './subworkflows/local/utils_nfcore_circ
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-params.fasta   = getGenomeAttribute('fasta')
-params.gtf     = getGenomeAttribute('gtf')
-params.bwa     = getGenomeAttribute('bwa')
-params.star    = getGenomeAttribute('star')
-params.bowtie  = getGenomeAttribute('bowtie')
-params.bowtie2 = getGenomeAttribute('bowtie2')
-params.mature  = getGenomeAttribute('mature')
+params.fasta     = getGenomeAttribute('fasta')
+params.gtf       = getGenomeAttribute('gtf')
+params.bwa       = getGenomeAttribute('bwa')
+params.star      = getGenomeAttribute('star')
+params.bowtie    = getGenomeAttribute('bowtie')
+params.bowtie2   = getGenomeAttribute('bowtie2')
+params.mature    = getGenomeAttribute('mature')
+params.blacklist = getGenomeAttribute('blacklist')
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -95,6 +96,7 @@ workflow NFCORE_CIRCRNA {
     //
     ch_fasta = Channel.value([[id: "fasta"], file(params.fasta, checkIfExists: true)])
     ch_gtf = Channel.value([[id: "gtf"], file(params.gtf, checkIfExists: true)])
+    ch_blacklist = params.blacklist ? Channel.value(file(params.blacklist, checkIfExists: true)) : Channel.empty()
     ch_mature = params.mature ? Channel.value([[id: "mature"], file(params.mature, checkIfExists: true)]) : Channel.empty()
     ch_phenotype = params.phenotype ? Channel.value([[id: "phenotype"], file(params.phenotype, checkIfExists: true)]) : Channel.empty()
     ch_annotation = params.annotation
@@ -109,6 +111,7 @@ workflow NFCORE_CIRCRNA {
         ch_phenotype,
         ch_fasta,
         ch_gtf,
+        ch_blacklist,
         ch_mature,
         ch_annotation,
         ch_versions,

diff --git a/modules/local/combinebeds/filter/templates/filter.py b/modules/local/combinebeds/filter/templates/filter.py
@@ -28,13 +28,31 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
             yaml_str += f"{spaces}{key}: {value}\\n"
     return yaml_str
 
+# Versions
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "polars": pl.__version__,
+        "upsetplot": upsetplot.__version__,
+        "matplotlib": matplotlib.__version__
+    }
+}
+
+with open("versions.yml", "w") as f:
+    f.write(format_yaml_like(versions))
+
+# Parameters
+
 max_shift = int("${max_shift}")
 consider_strand = "${consider_strand}" == "true"
 min_tools = int("${min_tools}")
 min_samples = int("${min_samples}")
 meta_id = "${meta.id}"
 prefix = "${prefix}"
 
+# Logic
+
 df = pl.scan_csv("*.bed",
                  separator="\\t",
                  has_header=False,
@@ -67,8 +85,13 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
 df_filtered = df_aggregated[(df_aggregated["n_tools"] >= min_tools) & (df_aggregated["n_samples"] >= min_samples)]
 df_filtered = df_filtered[["chr", "start", "end", "name", "score", "strand"]]
 
+if len(df_filtered) == 0:
+    exit(0)
+
 df_filtered.to_csv("${prefix}.${suffix}", sep="\\t", header=False, index=False)
 
+# Plots
+
 for col in ["samples", "tools"]:
     series = df_aggregated[col]
     if series.explode().nunique() <= 1:
@@ -100,17 +123,3 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
 
     with open(f"{prefix}_{col}.upset_mqc.json", "w") as f:
         f.write(json.dumps(multiqc, indent=4))
-
-# Versions
-
-versions = {
-    "${task.process}": {
-        "python": platform.python_version(),
-        "polars": pl.__version__,
-        "upsetplot": upsetplot.__version__,
-        "matplotlib": matplotlib.__version__
-    }
-}
-
-with open("versions.yml", "w") as f:
-    f.write(format_yaml_like(versions))
diff --git a/nextflow.config b/nextflow.config
@@ -41,6 +41,7 @@ params {
     genome                     = null
     igenomes_base              = 's3://ngi-igenomes/igenomes/'
     igenomes_ignore            = false
+    blacklist                  = null
     bowtie                     = null
     bowtie2                    = null
     bwa                        = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -319,6 +319,16 @@
                     "help_text": "This parameter is *mandatory* if `--genome` is not specified. Needs to contain the following attributes: `gene_id`, `transcript_id` and `gene_name`.",
                     "pattern": "\\.gtf$"
                 },
+                "blacklist": {
+                    "type": "string",
+                    "fa_icon": "fas fa-ban",
+                    "format": "file-path",
+                    "exists": true,
+                    "mimetype": "text/plain",
+                    "pattern": "^\\S+\\.bed$",
+                    "description": "Path to blacklist bed file.",
+                    "default": null
+                },
                 "mature": {
                     "type": "string",
                     "description": "Path to FASTA file with mature miRNAs. This parameter needs to be specified to perform miRNA interaction analyses.",

diff --git a/subworkflows/local/bsj_detection.nf b/subworkflows/local/bsj_detection.nf
@@ -3,6 +3,7 @@ include { GAWK as EXTRACT_COUNTS                             } from '../../modul
 include { CSVTK_JOIN as COMBINE_COUNTS_PER_TOOL              } from '../../modules/nf-core/csvtk/join'
 include { GAWK as FILTER_BSJS                                } from '../../modules/nf-core/gawk'
 include { GAWK as BED_ADD_SAMPLE_TOOL                        } from '../../modules/nf-core/gawk'
+include { BEDTOOLS_INTERSECT as BLACKLIST                    } from '../../modules/nf-core/bedtools/intersect'
 include { COMBINEBEDS_READS                                  } from '../../modules/local/combinebeds/reads'
 include { COMBINEBEDS_FILTER as COMBINE_TOOLS_PER_SAMPLE     } from '../../modules/local/combinebeds/filter'
 include { COMBINEBEDS_SHIFTS as INVESTIGATE_SHIFTS           } from '../../modules/local/combinebeds/shifts'
@@ -28,6 +29,7 @@ workflow BSJ_DETECTION {
     reads
     ch_fasta
     ch_gtf
+    ch_blacklist
     ch_annotation
     bowtie_index
     bowtie2_index
@@ -109,6 +111,12 @@ workflow BSJ_DETECTION {
     ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool
         .filter{ _meta, bed -> !bed.isEmpty() }
 
+    if (params.blacklist) {
+        BLACKLIST( ch_bsj_bed_per_sample_tool.combine(ch_blacklist), [[], []] )
+        ch_versions = ch_versions.mix(BLACKLIST.out.versions)
+        ch_bsj_bed_per_sample_tool = BLACKLIST.out.intersect
+    }
+
     //
     // Analyze read-level agreement
     //

diff --git a/workflows/circrna/main.nf b/workflows/circrna/main.nf
@@ -39,6 +39,7 @@ workflow CIRCRNA {
     ch_phenotype
     ch_fasta
     ch_gtf
+    ch_blacklist
     ch_mature
     ch_annotation
     ch_versions
@@ -108,6 +109,7 @@ workflow CIRCRNA {
         FASTQC_TRIMGALORE.out.reads,
         ch_fasta,
         ch_gtf,
+        ch_blacklist,
         ch_annotation,
         bowtie_index,
         bowtie2_index,