diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config deleted file mode 100644 index b4034d82..00000000 --- a/conf/igenomes_ignored.config +++ /dev/null @@ -1,9 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Empty genomes dictionary to use when igenomes is ignored. ----------------------------------------------------------------------------------------- -*/ - -params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config index ccbb6dc5..e9ece254 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -524,6 +524,17 @@ process { ] } + withName: BLACKLIST { + ext.args = { "-v" } + ext.prefix = { "${meta.id}_${meta.tool}" } + ext.suffix = "blacklist.bed" + publishDir = [ + path: { "${params.outdir}/3_bsj_detection/tools/${meta.tool}/blacklist" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: FILTER_BSJS { ext.args = { "-v FS='\\t' -v OFS='\\t' '{ if (\$5 >= ${params.bsj_reads}) { print } }'" } ext.suffix = { "${meta.tool}.filtered.bed" } diff --git a/conf/test_igenomes.config b/conf/test_igenomes.config deleted file mode 100644 index d23ddbe8..00000000 --- a/conf/test_igenomes.config +++ /dev/null @@ -1,27 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests using igenomes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a minimal pipeline test. - - Use as follows: - nextflow run nf-core/circrna -profile test_full, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Minimal igenomes profile' - config_profile_description = 'Minimal igenomes test dataset to check pipeline function' - - // Input data for minima test using igenomes - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/samples.csv' - - genome = 'ce10' - tool = 'circexplorer2' - phenotype = 'https://raw.githubusercontent.com/nf-core/test-datasets/circrna/phenotype.csv' - skip_trimming = false - star = null // igenomes STAR version is not compatible - outdir = 'results/' - bsj_reads = 2 -} diff --git a/main.nf b/main.nf index c73ce741..7610288b 100644 --- a/main.nf +++ b/main.nf @@ -27,13 +27,14 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_circ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = getGenomeAttribute('fasta') -params.gtf = getGenomeAttribute('gtf') -params.bwa = getGenomeAttribute('bwa') -params.star = getGenomeAttribute('star') -params.bowtie = getGenomeAttribute('bowtie') -params.bowtie2 = getGenomeAttribute('bowtie2') -params.mature = getGenomeAttribute('mature') +params.fasta = getGenomeAttribute('fasta') +params.gtf = getGenomeAttribute('gtf') +params.bwa = getGenomeAttribute('bwa') +params.star = getGenomeAttribute('star') +params.bowtie = getGenomeAttribute('bowtie') +params.bowtie2 = getGenomeAttribute('bowtie2') +params.mature = getGenomeAttribute('mature') +params.blacklist = getGenomeAttribute('blacklist') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -95,6 +96,7 @@ workflow NFCORE_CIRCRNA { // ch_fasta = Channel.value([[id: "fasta"], file(params.fasta, checkIfExists: true)]) ch_gtf = Channel.value([[id: "gtf"], file(params.gtf, checkIfExists: true)]) + ch_blacklist = params.blacklist ? Channel.value(file(params.blacklist, checkIfExists: true)) : Channel.empty() ch_mature = params.mature ? Channel.value([[id: "mature"], file(params.mature, checkIfExists: true)]) : Channel.empty() ch_phenotype = params.phenotype ? Channel.value([[id: "phenotype"], file(params.phenotype, checkIfExists: true)]) : Channel.empty() ch_annotation = params.annotation @@ -109,6 +111,7 @@ workflow NFCORE_CIRCRNA { ch_phenotype, ch_fasta, ch_gtf, + ch_blacklist, ch_mature, ch_annotation, ch_versions, diff --git a/modules/local/combinebeds/filter/templates/filter.py b/modules/local/combinebeds/filter/templates/filter.py index 4c3fdd40..911792ef 100644 --- a/modules/local/combinebeds/filter/templates/filter.py +++ b/modules/local/combinebeds/filter/templates/filter.py @@ -28,6 +28,22 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: yaml_str += f"{spaces}{key}: {value}\\n" return yaml_str +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "polars": pl.__version__, + "upsetplot": upsetplot.__version__, + "matplotlib": matplotlib.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +# Parameters + max_shift = int("${max_shift}") consider_strand = "${consider_strand}" == "true" min_tools = int("${min_tools}") @@ -35,6 +51,8 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: meta_id = "${meta.id}" prefix = "${prefix}" +# Logic + df = pl.scan_csv("*.bed", separator="\\t", has_header=False, @@ -67,8 +85,13 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: df_filtered = df_aggregated[(df_aggregated["n_tools"] >= min_tools) & (df_aggregated["n_samples"] >= min_samples)] df_filtered = df_filtered[["chr", "start", "end", "name", "score", "strand"]] +if len(df_filtered) == 0: + exit(0) + df_filtered.to_csv("${prefix}.${suffix}", sep="\\t", header=False, index=False) +# Plots + for col in ["samples", "tools"]: series = df_aggregated[col] if series.explode().nunique() <= 1: @@ -100,17 +123,3 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: with open(f"{prefix}_{col}.upset_mqc.json", "w") as f: f.write(json.dumps(multiqc, indent=4)) - -# Versions - -versions = { - "${task.process}": { - "python": platform.python_version(), - "polars": pl.__version__, - "upsetplot": upsetplot.__version__, - "matplotlib": matplotlib.__version__ - } -} - -with open("versions.yml", "w") as f: - f.write(format_yaml_like(versions)) diff --git a/nextflow.config b/nextflow.config index 57160ec7..32b831ef 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,6 +41,7 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false + blacklist = null bowtie = null bowtie2 = null bwa = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 7f316add..53d7f2b1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -319,6 +319,16 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. Needs to contain the following attributes: `gene_id`, `transcript_id` and `gene_name`.", "pattern": "\\.gtf$" }, + "blacklist": { + "type": "string", + "fa_icon": "fas fa-ban", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed$", + "description": "Path to blacklist bed file.", + "default": null + }, "mature": { "type": "string", "description": "Path to FASTA file with mature miRNAs. This parameter needs to be specified to perform miRNA interaction analyses.", diff --git a/subworkflows/local/bsj_detection.nf b/subworkflows/local/bsj_detection.nf index af52790c..41d054c0 100644 --- a/subworkflows/local/bsj_detection.nf +++ b/subworkflows/local/bsj_detection.nf @@ -3,6 +3,7 @@ include { GAWK as EXTRACT_COUNTS } from '../../modul include { CSVTK_JOIN as COMBINE_COUNTS_PER_TOOL } from '../../modules/nf-core/csvtk/join' include { GAWK as FILTER_BSJS } from '../../modules/nf-core/gawk' include { GAWK as BED_ADD_SAMPLE_TOOL } from '../../modules/nf-core/gawk' +include { BEDTOOLS_INTERSECT as BLACKLIST } from '../../modules/nf-core/bedtools/intersect' include { COMBINEBEDS_READS } from '../../modules/local/combinebeds/reads' include { COMBINEBEDS_FILTER as COMBINE_TOOLS_PER_SAMPLE } from '../../modules/local/combinebeds/filter' include { COMBINEBEDS_SHIFTS as INVESTIGATE_SHIFTS } from '../../modules/local/combinebeds/shifts' @@ -28,6 +29,7 @@ workflow BSJ_DETECTION { reads ch_fasta ch_gtf + ch_blacklist ch_annotation bowtie_index bowtie2_index @@ -109,6 +111,12 @@ workflow BSJ_DETECTION { ch_bsj_bed_per_sample_tool = ch_bsj_bed_per_sample_tool .filter{ _meta, bed -> !bed.isEmpty() } + if (params.blacklist) { + BLACKLIST( ch_bsj_bed_per_sample_tool.combine(ch_blacklist), [[], []] ) + ch_versions = ch_versions.mix(BLACKLIST.out.versions) + ch_bsj_bed_per_sample_tool = BLACKLIST.out.intersect + } + // // Analyze read-level agreement // diff --git a/workflows/circrna/main.nf b/workflows/circrna/main.nf index 21ea2da0..77da8006 100644 --- a/workflows/circrna/main.nf +++ b/workflows/circrna/main.nf @@ -39,6 +39,7 @@ workflow CIRCRNA { ch_phenotype ch_fasta ch_gtf + ch_blacklist ch_mature ch_annotation ch_versions @@ -108,6 +109,7 @@ workflow CIRCRNA { FASTQC_TRIMGALORE.out.reads, ch_fasta, ch_gtf, + ch_blacklist, ch_annotation, bowtie_index, bowtie2_index,