diff --git a/conf/modules.config b/conf/modules.config index 29bc420..82cc10f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -597,7 +597,7 @@ process { ] } - withName: SNIPPY_RUN { + withName: SNIPPY_RUN_READS { publishDir = [ path: { "${params.outdir}/${meta.id}/VARIANTS/SNIPPY" }, mode: params.publish_dir_mode, @@ -610,6 +610,19 @@ process { ext.prefix = { "${meta.id}_${meta.mode}" } } + withName: SNIPPY_RUN_CONTIGS { + publishDir = [ + path: { "${params.outdir}/${meta.id}/VARIANTS/SNIPPY" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + return filename.replaceAll('^.+/', '') + } + ] + ext.args = { "--mincov ${params.snp_mincov} --minqual ${params.snp_minqual}" + } + ext.prefix = { "${meta.id}_${meta.mode}" } + } + withName: IGVREPORTS_VCF { publishDir = [ path: { "${params.outdir}/AGGREGATE/VARIANTS/IGVREPORTS" }, diff --git a/modules/local/snippy/run/environment.yml b/modules/local/snippy/run/environment.yml new file mode 100644 index 0000000..e0f44bc --- /dev/null +++ b/modules/local/snippy/run/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::snippy=4.6.0 diff --git a/modules/local/snippy/run/main.nf b/modules/local/snippy/run/main.nf new file mode 100644 index 0000000..43f7476 --- /dev/null +++ b/modules/local/snippy/run/main.nf @@ -0,0 +1,56 @@ +process SNIPPY_RUN_CONTIGS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2' : + 'biocontainers/snippy:4.6.0--hdfd78af_2' }" + + input: + tuple val(meta), path(contigs) + path reference + + output: + tuple val(meta), path("${prefix}/${prefix}.tab") , emit: tab + tuple val(meta), path("${prefix}/${prefix}.csv") , emit: csv + tuple val(meta), path("${prefix}/${prefix}.html") , emit: html + tuple val(meta), path("${prefix}/${prefix}.vcf") , emit: vcf + tuple val(meta), path("${prefix}/${prefix}.bed") , emit: bed + tuple val(meta), path("${prefix}/${prefix}.gff") , emit: gff + tuple val(meta), path("${prefix}/${prefix}.bam") , emit: bam + tuple val(meta), path("${prefix}/${prefix}.bam.bai") , emit: bai + tuple val(meta), path("${prefix}/${prefix}.log") , emit: log + tuple val(meta), path("${prefix}/${prefix}.aligned.fa") , emit: aligned_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.fa") , emit: consensus_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.subs.fa"), emit: consensus_subs_fa + tuple val(meta), path("${prefix}/${prefix}.raw.vcf") , emit: raw_vcf + tuple val(meta), path("${prefix}/${prefix}.filt.vcf") , emit: filt_vcf + tuple val(meta), path("${prefix}/${prefix}.vcf.gz") , emit: vcf_gz + tuple val(meta), path("${prefix}/${prefix}.vcf.gz.csi") , emit: vcf_csi + tuple val(meta), path("${prefix}/${prefix}.txt") , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + snippy \\ + $args \\ + --cpus $task.cpus \\ + --ram $task.memory \\ + --outdir $prefix \\ + --reference $reference \\ + --prefix $prefix \\ + --ctgs $contigs + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snippy: \$(echo \$(snippy --version 2>&1) | sed 's/snippy //') + END_VERSIONS + """ +} diff --git a/modules/local/snippy/run/meta.yml b/modules/local/snippy/run/meta.yml new file mode 100644 index 0000000..5b3c907 --- /dev/null +++ b/modules/local/snippy/run/meta.yml @@ -0,0 +1,213 @@ +name: snippy_run +description: Rapid haploid variant calling +keywords: + - variant + - fastq + - bacteria +tools: + - snippy: + description: "Rapid bacterial SNP calling and core genome alignments" + homepage: "https://github.com/tseemann/snippy" + documentation: "https://github.com/tseemann/snippy" + tool_dev_url: "https://github.com/tseemann/snippy" + licence: ["GPL v2"] + identifier: biotools:snippy +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - - reference: + type: file + description: Reference genome in FASTA format + pattern: "*.{fasta,fna,fa}" +output: + - tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.tab: + type: file + description: A simple tab-separated summary of all the variants + pattern: "*.tab" + - csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.csv: + type: file + description: A comma-separated version of the .tab file + pattern: "*.csv" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.html: + type: file + description: A HTML version of the .tab file + pattern: "*.html" + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.vcf: + type: file + description: The final annotated variants in VCF format + pattern: "*.vcf" + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.bed: + type: file + description: The variants in BED format + pattern: "*.bed" + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.gff: + type: file + description: The variants in GFF3 format + pattern: "*.gff" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.bam: + type: file + description: The alignments in BAM format. Includes unmapped, multimapping reads. + Excludes duplicates. + pattern: "*.bam" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.bam.bai: + type: file + description: Index for the .bam file + pattern: "*.bam.bai" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.log: + type: file + description: A log file with the commands run and their outputs + pattern: "*.log" + - aligned_fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.aligned.fa: + type: file + description: A version of the reference but with - at position with depth=0 + and N for 0 < depth < --mincov (does not have variants) + pattern: "*.aligned.fa" + - consensus_fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.consensus.fa: + type: file + description: A version of the reference genome with all variants instantiated + pattern: "*.consensus.fa" + - consensus_subs_fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.consensus.subs.fa: + type: file + description: A version of the reference genome with only substitution variants + instantiated + pattern: "*.consensus.subs.fa" + - raw_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.raw.vcf: + type: file + description: The unfiltered variant calls from Freebayes + pattern: "*.raw.vcf" + - filt_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.filt.vcf: + type: file + description: The filtered variant calls from Freebayes + pattern: "*.filt.vcf" + - vcf_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.vcf.gz: + type: file + description: Compressed .vcf file via BGZIP + pattern: "*.vcf.gz" + - vcf_csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.vcf.gz.csi: + type: file + description: Index for the .vcf.gz via bcftools index + pattern: "*.vcf.gz.csi" + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/${prefix}.txt: + type: file + description: Tab-separated columnar list of statistics + pattern: "*.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/local/snippy/run/tests/main.nf.test b/modules/local/snippy/run/tests/main.nf.test new file mode 100644 index 0000000..25a39ba --- /dev/null +++ b/modules/local/snippy/run/tests/main.nf.test @@ -0,0 +1,59 @@ + +nextflow_process { + + name "Test Process SNIPPY_RUN" + script "../main.nf" + process "SNIPPY_RUN" + + tag "modules" + tag "modules_nfcore" + tag "snippy" + tag "snippy/run" + + test("test-snippy-run") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.tab, + process.out.csv, + process.out.html, + path(process.out.vcf[0][1]).vcf.summary, + file(process.out.bed[0][1]).name, // empty: d41d8cd98f00b204e9800998ecf8427e + process.out.gff, + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + file(process.out.log[0][1]).name, + process.out.aligned_fa, + process.out.consensus_fa, + process.out.consensus_subs_fa, + path(process.out.raw_vcf[0][1]).vcf.summary, + path(process.out.filt_vcf[0][1]).vcf.summary, + path(process.out.vcf_gz[0][1]).vcf.summary, + file(process.out.vcf_csi[0][1]).name, + file(process.out.txt[0][1]).readLines()[3..5], + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/local/snippy/run/tests/main.nf.test.snap b/modules/local/snippy/run/tests/main.nf.test.snap new file mode 100644 index 0000000..e028bbe --- /dev/null +++ b/modules/local/snippy/run/tests/main.nf.test.snap @@ -0,0 +1,91 @@ +{ + "test-snippy-run": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tab:md5,beb9bde3bce985e53e8feba9ec5b136e" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.csv:md5,322f942115e5945c2041a88246166703" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,1ccbf0ffcadae1a6b2e11681d24c9938" + ] + ], + "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + "test.bed", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ] + ], + "2d64e4363d9f3c0e2167fce49d5087cf", + "test.bam.bai", + "test.log", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.aligned.fa:md5,47e3390d4167edf1955d162d37aca5e3" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.consensus.fa:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.consensus.subs.fa:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + "test.vcf.gz.csi", + [ + "ReferenceSize\t29829", + "Software\tsnippy 4.6.0", + "VariantTotal\t0" + ], + [ + "versions.yml:md5,210ffa28e95038dcb8bc4f1ea20adeb0" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-27T10:00:56.080676318" + } +} \ No newline at end of file diff --git a/subworkflows/local/variant_detection.nf b/subworkflows/local/variant_detection.nf index f77c44f..e5b4d80 100644 --- a/subworkflows/local/variant_detection.nf +++ b/subworkflows/local/variant_detection.nf @@ -30,24 +30,25 @@ workflow VARIANT_DETECTION { // VARIANT CALLING SUBWORKFLOW VARIANT_CALLING(reads, genome) ch_versions = ch_versions.mix(VARIANT_CALLING.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(VARIANT_CALLING.out.txt_snippy) + ch_multiqc_files = ch_multiqc_files.mix(VARIANT_CALLING.out.txt_snippy_reads) + ch_multiqc_files = ch_multiqc_files.mix(VARIANT_CALLING.out.txt_snippy_contigs) ch_multiqc_files = ch_multiqc_files.mix(VARIANT_CALLING.out.core_txt) // VARIANT VIZ SUBWORKFLOW if (!params.skip_variant_viz) { - ch_vcf = VARIANT_CALLING.out.vcf_bgz_snippy + ch_vcf = VARIANT_CALLING.out.vcf_bgz_snippy_reads .concat(VARIANT_CALLING.out.vcf_bgz_medaka) - .concat(VARIANT_CALLING.out.vcf_bgz_nucmer) - ch_vci = VARIANT_CALLING.out.vci_snippy + .concat(VARIANT_CALLING.out.vcf_bgz_snippy_contigs) + ch_vci = VARIANT_CALLING.out.vci_snippy_reads .concat(VARIANT_CALLING.out.vci_medaka) - .concat(VARIANT_CALLING.out.vci_nucmer) - ch_bam = VARIANT_CALLING.out.bam_snippy + .concat(VARIANT_CALLING.out.vci_snippy_contigs) + ch_bam = VARIANT_CALLING.out.bam_snippy_reads .concat(VARIANT_CALLING.out.bam_medaka) - .concat(VARIANT_CALLING.out.bam_nucmer_sorted) - ch_bai = VARIANT_CALLING.out.bai_snippy + .concat(VARIANT_CALLING.out.bam_snippy_contigs) + ch_bai = VARIANT_CALLING.out.bai_snippy_reads .concat(VARIANT_CALLING.out.bai_medaka) - .concat(VARIANT_CALLING.out.bai_nucmer) + .concat(VARIANT_CALLING.out.bai_snippy_contigs) ch_aln_fa = VARIANT_CALLING.out.core_aln VARIANT_VIS( diff --git a/subworkflows/local/variantcalling.nf b/subworkflows/local/variantcalling.nf index 5204e67..c7f5f13 100644 --- a/subworkflows/local/variantcalling.nf +++ b/subworkflows/local/variantcalling.nf @@ -1,16 +1,15 @@ -include { SNIPPY_RUN } from '../../modules/nf-core/snippy/run' +include { SNIPPY_RUN as SNIPPY_RUN_READS } from '../../modules/nf-core/snippy/run' +include { SNIPPY_RUN_CONTIGS } from '../../modules/local/snippy/run' include { SNIPPY_CORE } from '../../modules/nf-core/snippy/core' -include { NUCMER } from '../../modules/nf-core/nucmer' include { GUBBINS } from '../../modules/nf-core/gubbins' include { MEDAKA_VARIANT } from '../../modules/local/medaka/variant' include { DELTA2VCF } from '../../modules/local/delta2vcf' include { CORESNPFILTER } from '../../modules/local/coresnpfilter' -include { NUCMER_SAM } from '../../modules/local/nucmersam' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index' include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort' -include { TABIX_TABIX as TABIX_SNIPPY; TABIX_TABIX as TABIX_MEDAKA; TABIX_TABIX as TABIX_NUCMER; TABIX_TABIX as TABIX_CORE } from '../../modules/nf-core/tabix/tabix' -include { TABIX_BGZIP as BGZIP_SNIPPY; TABIX_BGZIP as BGZIP_MEDAKA; TABIX_BGZIP as BGZIP_NUCMER; TABIX_BGZIP as BGZIP_CORE } from '../../modules/nf-core/tabix/bgzip' +include { TABIX_TABIX as TABIX_SNIPPY_READS; TABIX_TABIX as TABIX_MEDAKA; TABIX_TABIX as TABIX_SNIPPY_CONTIGS; TABIX_TABIX as TABIX_CORE } from '../../modules/nf-core/tabix/tabix' +include { TABIX_BGZIP as BGZIP_SNIPPY_READS; TABIX_BGZIP as BGZIP_MEDAKA; TABIX_BGZIP as BGZIP_SNIPPY_CONTIGS; TABIX_BGZIP as BGZIP_CORE } from '../../modules/nf-core/tabix/bgzip' workflow VARIANT_CALLING { @@ -21,41 +20,43 @@ workflow VARIANT_CALLING { main: // initialize channels ch_versions = Channel.empty() - ch_vcf_nucmer = Channel.empty() - ch_vcf_snippy = Channel.empty() + ch_vcf_snippy_contigs = Channel.empty() + ch_vcf_snippy_reads = Channel.empty() ch_vcf_medaka = Channel.empty() ch_vcf_gubbins = Channel.empty() - ch_vcf_bgz_nucmer = Channel.empty() - ch_vcf_bgz_snippy = Channel.empty() + ch_vcf_bgz_snippy_contigs = Channel.empty() + ch_vcf_bgz_snippy_reads = Channel.empty() ch_vcf_bgz_medaka = Channel.empty() ch_vcf_gubbins = Channel.empty() - ch_sam_nucmer = Channel.empty() - ch_bam_nucmer = Channel.empty() - ch_bam_snippy = Channel.empty() + ch_sam_snippy_contigs = Channel.empty() + ch_bam_snippy_contigs = Channel.empty() + ch_bam_snippy_reads = Channel.empty() ch_bam_medaka = Channel.empty() - ch_bai_snippy = Channel.empty() + ch_bai_snippy_reads = Channel.empty() ch_bai_medaka = Channel.empty() - ch_bai_nucmer = Channel.empty() - ch_vci_snippy = Channel.empty() + ch_bai_snippy_contigs = Channel.empty() + ch_vci_snippy_reads = Channel.empty() ch_vci_medaka = Channel.empty() - ch_vci_nucmer = Channel.empty() + ch_vci_snippy_contigs = Channel.empty() // snippy output channels - ch_tab_snippy = Channel.empty() - ch_csv_snippy = Channel.empty() - ch_html_snippy = Channel.empty() - ch_bed_snippy = Channel.empty() - ch_gff_snippy = Channel.empty() - ch_log_snippy = Channel.empty() - ch_aligned_fa_snippy = Channel.empty() - ch_consensus_fa_snippy = Channel.empty() - ch_consensus_subs_fa_snippy = Channel.empty() - ch_raw_vcf_snippy = Channel.empty() - ch_filt_vcf_snippy = Channel.empty() - ch_vcf_csi_snippy = Channel.empty() - ch_vcf_gz_snippy = Channel.empty() - ch_vcf_csi_snippy = Channel.empty() - ch_vcf_gz_snippy = Channel.empty() - ch_txt_snippy = Channel.empty() + ch_tab_snippy_reads = Channel.empty() + ch_csv_snippy_reads = Channel.empty() + ch_html_snippy_reads = Channel.empty() + ch_bed_snippy_reads = Channel.empty() + ch_gff_snippy_reads = Channel.empty() + ch_log_snippy_reads = Channel.empty() + ch_aligned_fa_snippy_reads = Channel.empty() + ch_consensus_fa_snippy_reads = Channel.empty() + ch_consensus_subs_fa_snippy_reads = Channel.empty() + ch_raw_vcf_snippy_reads = Channel.empty() + ch_filt_vcf_snippy_reads = Channel.empty() + ch_vcf_csi_snippy_reads = Channel.empty() + ch_vcf_gz_snippy_reads = Channel.empty() + ch_vcf_csi_snippy_reads = Channel.empty() + ch_vcf_gz_snippy_reads = Channel.empty() + ch_txt_snippy_reads = Channel.empty() + ch_txt_snippy_contigs = Channel.empty() + // snippy core output channels ch_full_aln = Channel.empty() ch_core_aln = Channel.empty() @@ -75,44 +76,41 @@ workflow VARIANT_CALLING { // reference genome Channel.fromPath(params.reference_genome) .set { ch_reference_genome } + ch_genome.map{ + meta, path -> [meta, path[0]] + } + .set { ch_snippy_contig } + + // GENOME: RUN SNIPPY CONTIG + SNIPPY_RUN_CONTIGS(ch_snippy_contig, ch_reference_genome.first()) + + ch_vcf_snippy_contigs = SNIPPY_RUN_CONTIGS.out.vcf + ch_bam_snippy_contigs = SNIPPY_RUN_CONTIGS.out.bam + ch_bai_snippy_contigs = SNIPPY_RUN_CONTIGS.out.bai + ch_tab_snippy_contigs = SNIPPY_RUN_CONTIGS.out.tab + ch_csv_snippy_contigs = SNIPPY_RUN_CONTIGS.out.csv + ch_html_snippy_contigs = SNIPPY_RUN_CONTIGS.out.html + ch_bed_snippy_contigs = SNIPPY_RUN_CONTIGS.out.bed + ch_gff_snippy_contigs = SNIPPY_RUN_CONTIGS.out.gff + ch_log_snippy_contigs = SNIPPY_RUN_CONTIGS.out.log + ch_aligned_fa_snippy_contigs = SNIPPY_RUN_CONTIGS.out.aligned_fa + ch_consensus_fa_snippy_contigs = SNIPPY_RUN_CONTIGS.out.consensus_fa + ch_consensus_subs_fa_snippy_contigs = SNIPPY_RUN_CONTIGS.out.consensus_subs_fa + ch_raw_vcf_snippy_contigs = SNIPPY_RUN_CONTIGS.out.raw_vcf + ch_filt_vcf_snippy_contigs = SNIPPY_RUN_CONTIGS.out.filt_vcf + ch_vcf_csi_snippy_contigs = SNIPPY_RUN_CONTIGS.out.vcf_csi + ch_vcf_gz_snippy_contigs = SNIPPY_RUN_CONTIGS.out.vcf_gz + ch_vcf_csi_snippy_contigs = SNIPPY_RUN_CONTIGS.out.vcf_csi + ch_vcf_gz_snippy_contigs = SNIPPY_RUN_CONTIGS.out.vcf_gz + ch_txt_snippy_contigs = SNIPPY_RUN_CONTIGS.out.txt + + BGZIP_SNIPPY_CONTIGS(ch_vcf_snippy_contigs) // compress the VCF file + ch_versions = ch_versions.mix(BGZIP_SNIPPY_CONTIGS.out.versions) + ch_vcf_bgz_snippy_contigs = BGZIP_SNIPPY_CONTIGS.out.output + TABIX_SNIPPY_CONTIGS(ch_vcf_bgz_snippy_contigs) // index the VCF file + ch_versions = ch_versions.mix(TABIX_SNIPPY_CONTIGS.out.versions) + ch_vci_snippy_contigs = TABIX_SNIPPY_CONTIGS.out.tbi - // GENOME: RUN NUCMER - ch_genome - .combine(ch_reference_genome) - .map { meta, genome, ref -> [meta, ref, genome] } - .set { ch_nucmer } - NUCMER(ch_nucmer) - ch_versions = ch_versions.mix(NUCMER.out.versions) - NUCMER_SAM(ch_nucmer) - ch_versions = ch_versions.mix(NUCMER_SAM.out.versions) - ch_sam_nucmer = NUCMER_SAM.out.sam_fixed - reference_genome = ch_reference_genome.map { path -> - def meta = path.getName().replaceFirst(/\.[^.]+$/, '') - // Extract filename without extension - tuple(meta, path) - } - SAMTOOLS_VIEW( - ch_sam_nucmer.map { meta, sam -> [meta, sam, []] }, - reference_genome, - [], - ) - ch_bam_nucmer = SAMTOOLS_VIEW.out.bam - ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions) - SAMTOOLS_SORT(ch_bam_nucmer, reference_genome) - ch_sbam_nucmer = SAMTOOLS_SORT.out.bam - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) - SAMTOOLS_INDEX(ch_sbam_nucmer) - ch_bai_nucmer = SAMTOOLS_INDEX.out.bai - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - DELTA2VCF(NUCMER.out.delta) - ch_versions = ch_versions.mix(DELTA2VCF.out.versions) - ch_vcf_nucmer = DELTA2VCF.out.vcf - BGZIP_NUCMER(ch_vcf_nucmer) // compress the VCF file - ch_versions = ch_versions.mix(BGZIP_NUCMER.out.versions) - ch_vcf_bgz_nucmer = BGZIP_NUCMER.out.output - TABIX_NUCMER(ch_vcf_bgz_nucmer) // index the VCF file - ch_versions = ch_versions.mix(TABIX_NUCMER.out.versions) - ch_vci_nucmer = TABIX_NUCMER.out.tbi // NANOPORE: RUN MEDAKA ch_medaka = ch_input_seq.nanopore @@ -128,48 +126,49 @@ workflow VARIANT_CALLING { ch_vci_medaka = TABIX_MEDAKA.out.tbi // ILLUMINA: RUN SNIPPY - ch_snippy = ch_input_seq.illumina - SNIPPY_RUN(ch_snippy, ch_reference_genome.first()) - ch_vcf_snippy = SNIPPY_RUN.out.vcf - ch_bam_snippy = SNIPPY_RUN.out.bam - ch_bai_snippy = SNIPPY_RUN.out.bai - ch_tab_snippy = SNIPPY_RUN.out.tab - ch_csv_snippy = SNIPPY_RUN.out.csv - ch_html_snippy = SNIPPY_RUN.out.html - ch_bed_snippy = SNIPPY_RUN.out.bed - ch_gff_snippy = SNIPPY_RUN.out.gff - ch_log_snippy = SNIPPY_RUN.out.log - ch_aligned_fa_snippy = SNIPPY_RUN.out.aligned_fa - ch_consensus_fa_snippy = SNIPPY_RUN.out.consensus_fa - ch_consensus_subs_fa_snippy = SNIPPY_RUN.out.consensus_subs_fa - ch_raw_vcf_snippy = SNIPPY_RUN.out.raw_vcf - ch_filt_vcf_snippy = SNIPPY_RUN.out.filt_vcf - ch_vcf_csi_snippy = SNIPPY_RUN.out.vcf_csi - ch_vcf_gz_snippy = SNIPPY_RUN.out.vcf_gz - ch_vcf_csi_snippy = SNIPPY_RUN.out.vcf_csi - ch_vcf_gz_snippy = SNIPPY_RUN.out.vcf_gz - ch_txt_snippy = SNIPPY_RUN.out.txt - - BGZIP_SNIPPY(ch_vcf_snippy) // compress the VCF file - ch_versions = ch_versions.mix(BGZIP_SNIPPY.out.versions) - ch_vcf_bgz_snippy = BGZIP_SNIPPY.out.output - TABIX_SNIPPY(ch_vcf_bgz_snippy) // index the VCF file - ch_versions = ch_versions.mix(TABIX_SNIPPY.out.versions) - ch_vci_snippy = TABIX_SNIPPY.out.tbi - - ch_snippy_aligned_fa = SNIPPY_RUN.out.aligned_fa + ch_snippy_reads = ch_input_seq.illumina + SNIPPY_RUN_READS(ch_snippy_reads, ch_reference_genome.first()) + ch_vcf_snippy_reads = SNIPPY_RUN_READS.out.vcf + ch_bam_snippy_reads = SNIPPY_RUN_READS.out.bam + ch_bai_snippy_reads = SNIPPY_RUN_READS.out.bai + ch_tab_snippy_reads = SNIPPY_RUN_READS.out.tab + ch_csv_snippy_reads = SNIPPY_RUN_READS.out.csv + ch_html_snippy_reads = SNIPPY_RUN_READS.out.html + ch_bed_snippy_reads = SNIPPY_RUN_READS.out.bed + ch_gff_snippy_reads = SNIPPY_RUN_READS.out.gff + ch_log_snippy_reads = SNIPPY_RUN_READS.out.log + ch_aligned_fa_snippy_reads = SNIPPY_RUN_READS.out.aligned_fa + ch_consensus_fa_snippy_reads = SNIPPY_RUN_READS.out.consensus_fa + ch_consensus_subs_fa_snippy_reads = SNIPPY_RUN_READS.out.consensus_subs_fa + ch_raw_vcf_snippy_reads = SNIPPY_RUN_READS.out.raw_vcf + ch_filt_vcf_snippy_reads = SNIPPY_RUN_READS.out.filt_vcf + ch_vcf_csi_snippy_reads = SNIPPY_RUN_READS.out.vcf_csi + ch_vcf_gz_snippy_reads = SNIPPY_RUN_READS.out.vcf_gz + ch_vcf_csi_snippy_reads = SNIPPY_RUN_READS.out.vcf_csi + ch_vcf_gz_snippy_reads = SNIPPY_RUN_READS.out.vcf_gz + ch_txt_snippy_reads = SNIPPY_RUN_READS.out.txt + + + BGZIP_SNIPPY_READS(ch_vcf_snippy_reads) // compress the VCF file + ch_versions = ch_versions.mix(BGZIP_SNIPPY_READS.out.versions) + ch_vcf_bgz_snippy_reads = BGZIP_SNIPPY_READS.out.output + TABIX_SNIPPY_READS(ch_vcf_bgz_snippy_reads) // index the VCF file + ch_versions = ch_versions.mix(TABIX_SNIPPY_READS.out.versions) + ch_vci_snippy_reads = TABIX_SNIPPY_READS.out.tbi + + ch_snippy_reads_aligned_fa = SNIPPY_RUN_READS.out.aligned_fa .map { it[1] } .collect() .map { [[id: 'core_aln'], it] } - ch_snippy_vcf = SNIPPY_RUN.out.vcf + ch_snippy_reads_vcf = SNIPPY_RUN_READS.out.vcf .map { it[1] } .collect() .map { [[id: 'core_aln'], it] } // generate core SNP alignment - ch_snippy_core = ch_snippy_vcf.combine(ch_snippy_aligned_fa, by: 0) - SNIPPY_CORE(ch_snippy_core, ch_reference_genome) + ch_snippy_reads_core = ch_snippy_reads_vcf.combine(ch_snippy_reads_aligned_fa, by: 0) + SNIPPY_CORE(ch_snippy_reads_core, ch_reference_genome) ch_versions = ch_versions.mix(SNIPPY_CORE.out.versions) ch_full_aln = SNIPPY_CORE.out.full_aln ch_core_tab = SNIPPY_CORE.out.tab @@ -205,39 +204,55 @@ workflow VARIANT_CALLING { emit: versions = ch_versions - vcf_nucmer = ch_vcf_nucmer - vcf_snippy = ch_vcf_snippy + vcf_snippy_contigs = ch_vcf_snippy_contigs + vcf_snippy_reads = ch_vcf_snippy_reads vcf_medaka = ch_vcf_medaka vcf_gubbins = ch_vcf_gubbins - vcf_bgz_nucmer = ch_vcf_bgz_nucmer - vcf_bgz_snippy = ch_vcf_bgz_snippy + vcf_bgz_snippy_contigs = ch_vcf_bgz_snippy_contigs + vcf_bgz_snippy_reads = ch_vcf_bgz_snippy_reads vcf_bgz_medaka = ch_vcf_bgz_medaka - sam_nucmer = ch_sam_nucmer - bam_snippy = ch_bam_snippy + bam_snippy_reads = ch_bam_snippy_reads bam_medaka = ch_bam_medaka - bam_nucmer = ch_bam_nucmer - bam_nucmer_sorted = ch_sbam_nucmer - bai_snippy = ch_bai_snippy + bam_snippy_contigs = ch_bam_snippy_contigs + bai_snippy_reads = ch_bai_snippy_reads bai_medaka = ch_bai_medaka - bai_nucmer = ch_bai_nucmer - vci_snippy = ch_vci_snippy + bai_snippy_contigs = ch_bai_snippy_contigs + vci_snippy_reads = ch_vci_snippy_reads vci_medaka = ch_vci_medaka - vci_nucmer = ch_vci_nucmer - // snippy outputs - tab_snippy = ch_tab_snippy - csv_snippy = ch_csv_snippy - html_snippy = ch_html_snippy - bed_snippy = ch_bed_snippy - gff_snippy = ch_gff_snippy - log_snippy = ch_log_snippy - aligned_fa_snippy = ch_aligned_fa_snippy - consensus_fa_snippy = ch_consensus_fa_snippy - consensus_subs_fa_snippy = ch_consensus_subs_fa_snippy - raw_vcf_snippy = ch_raw_vcf_snippy - filt_vcf_snippy = ch_filt_vcf_snippy - vcf_csi_snippy = ch_vcf_csi_snippy - vcf_gz_snippy = ch_vcf_gz_snippy - txt_snippy = ch_txt_snippy + vci_snippy_contigs = ch_vci_snippy_contigs + + // snippy reads outputs + tab_snippy_reads = ch_tab_snippy_reads + csv_snippy_reads = ch_csv_snippy_reads + html_snippy_reads = ch_html_snippy_reads + bed_snippy_reads = ch_bed_snippy_reads + gff_snippy_reads = ch_gff_snippy_reads + log_snippy_reads = ch_log_snippy_reads + aligned_fa_snippy_reads = ch_aligned_fa_snippy_reads + consensus_fa_snippy_reads = ch_consensus_fa_snippy_reads + consensus_subs_fa_snippy_reads = ch_consensus_subs_fa_snippy_reads + raw_vcf_snippy_reads = ch_raw_vcf_snippy_reads + filt_vcf_snippy_reads = ch_filt_vcf_snippy_reads + vcf_csi_snippy_reads = ch_vcf_csi_snippy_reads + vcf_gz_snippy_reads = ch_vcf_gz_snippy_reads + txt_snippy_reads = ch_txt_snippy_reads + + // snippy contigs outputs + tab_snippy_contigs = ch_tab_snippy_contigs + csv_snippy_contigs = ch_csv_snippy_contigs + html_snippy_contigs = ch_html_snippy_contigs + bed_snippy_contigs = ch_bed_snippy_contigs + gff_snippy_contigs = ch_gff_snippy_contigs + log_snippy_contigs = ch_log_snippy_contigs + aligned_fa_snippy_contigs = ch_aligned_fa_snippy_contigs + consensus_fa_snippy_contigs = ch_consensus_fa_snippy_contigs + consensus_subs_fa_snippy_contigs = ch_consensus_subs_fa_snippy_contigs + raw_vcf_snippy_contigs = ch_raw_vcf_snippy_contigs + filt_vcf_snippy_contigs = ch_filt_vcf_snippy_contigs + vcf_csi_snippy_contigs = ch_vcf_csi_snippy_contigs + vcf_gz_snippy_contigs = ch_vcf_gz_snippy_contigs + txt_snippy_contigs = ch_txt_snippy_contigs + // snippy core outputs core_tab = ch_core_tab core_vcf = ch_core_vcf