Skip to content
This repository was archived by the owner on Jan 27, 2020. It is now read-only.

Commit 5898973

Browse files
committed
Merge remote-tracking branch 'upstream/dev' into Dump
2 parents 4dbd4f8 + c4b7f39 commit 5898973

File tree

15 files changed

+106
-44
lines changed

15 files changed

+106
-44
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,37 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
### `Changed`
11+
12+
- [#744](https://github.com/SciLifeLab/Sarek/pull/744) - Refactor `germlineVC.nf`
13+
- [#776](https://github.com/SciLifeLab/Sarek/pull/776) - Helper script now download annotations for VEP CADD plugin
14+
1015
### `Added`
1116

1217
- [#753](https://github.com/SciLifeLab/Sarek/pull/753) - Update `binac`, `cfc` configuration
1318
- [#766](https://github.com/SciLifeLab/Sarek/pull/766) - Added `ps` in `r-base` and `runallelecount` containers
19+
- [#774](https://github.com/SciLifeLab/Sarek/pull/774) - Autogenerates memory requirements from MarkDuplicates when less that 8G is available. cf [nf-core/rnaseq#179](https://github.com/nf-core/rnaseq/pull/179)
20+
- [#775](https://github.com/SciLifeLab/Sarek/pull/775) - Update paths for munin configuration
21+
- [#777](https://github.com/SciLifeLab/Sarek/pull/777) - Add GeneSplicer `1.0` to container
22+
- [#777](https://github.com/SciLifeLab/Sarek/pull/777) - Add possibility to use VEP GeneSplicer plugin
23+
- [#777](https://github.com/SciLifeLab/Sarek/pull/777) - Add `removeVCF()` function to remove `.ann`, `.gz` and `.vcf` from a VCF filename
1424

1525
### `Changed`
1626

1727
- [#741](https://github.com/SciLifeLab/Sarek/pull/741) - Use [dump](https://www.nextflow.io/docs/latest/operator.html#dump) operator
1828
- [#744](https://github.com/SciLifeLab/Sarek/pull/744) - Refactor `germlineVC.nf`
29+
- [#776](https://github.com/SciLifeLab/Sarek/pull/776) - Helper script now download annotations for VEP CADD plugin
1930

2031
### `Fixed`
2132

33+
- [#747](https://github.com/SciLifeLab/Sarek/pull/747) - Exclude Manta `*candidateSV.vcf` from annotation
2234
- [#749](https://github.com/SciLifeLab/Sarek/pull/749) - Fix config problematic use of queue `core` for uppmax-slurm
2335
- [#760](https://github.com/SciLifeLab/Sarek/pull/749) - Fix undefined `task.mem`
2436
- [#751](https://github.com/SciLifeLab/Sarek/pull/751), [#756](https://github.com/SciLifeLab/Sarek/pull/756) - Typos in `igenomes.config`
2537
- [#757](https://github.com/SciLifeLab/Sarek/pull/757) - Typos in `binac`, `cfc` configuration
2638
- [#758](https://github.com/SciLifeLab/Sarek/pull/758) - Typos in `ASCAT` documentation
2739
- [#765](https://github.com/SciLifeLab/Sarek/pull/765) - Check only for references that are needed to fix [#754](https://github.com/SciLifeLab/Sarek/issues/754)
40+
- [#777](https://github.com/SciLifeLab/Sarek/pull/777) - Fix name collision in `annotate.nf`
2841

2942
### `Deprecated`
3043

annotate.nf

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ if (annotateVCF == []) {
7676
Channel.empty().mix(
7777
Channel.fromPath("${params.outDir}/VariantCalling/*/HaplotypeCaller/*.vcf.gz")
7878
.flatten().map{vcf -> ['haplotypecaller', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
79-
Channel.fromPath("${params.outDir}/VariantCalling/*/Manta/*SV.vcf.gz")
79+
Channel.fromPath("${params.outDir}/VariantCalling/*/Manta/*[!candidate]SV.vcf.gz")
8080
.flatten().map{vcf -> ['manta', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
8181
Channel.fromPath("${params.outDir}/VariantCalling/*/MuTect2/*.vcf.gz")
8282
.flatten().map{vcf -> ['mutect2', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
@@ -104,7 +104,7 @@ vcfForVep = vcfForVep.map {
104104
}
105105

106106
process RunBcftoolsStats {
107-
tag {"${idPatient} - ${vcf}"}
107+
tag {"${idPatient} - ${variantCaller} - ${vcf}"}
108108

109109
publishDir "${params.outDir}/Reports/BCFToolsStats", mode: params.publishDirMode
110110

@@ -130,11 +130,13 @@ process RunVcftools {
130130
set variantCaller, idPatient, file(vcf) from vcfForVCFtools
131131

132132
output:
133-
file ("${vcf.simpleName}.*") into vcfReport
133+
file ("${reducedVCF}.*") into vcfReport
134134

135135
when: !params.noReports
136136

137-
script: QC.vcftools(vcf)
137+
script:
138+
reducedVCF = SarekUtils.reduceVCF(vcf)
139+
QC.vcftools(vcf)
138140
}
139141

140142
vcfReport = vcfReport.dump(tag:'VCFTools')
@@ -143,7 +145,7 @@ process RunSnpeff {
143145
tag {"${idPatient} - ${variantCaller} - ${vcf}"}
144146

145147
publishDir params.outDir, mode: params.publishDirMode, saveAs: {
146-
if (it == "${vcf.simpleName}_snpEff.ann.vcf") null
148+
if (it == "${reducedVCF}_snpEff.ann.vcf") null
147149
else "Annotation/${idPatient}/snpEff/${it}"
148150
}
149151

@@ -153,25 +155,26 @@ process RunSnpeff {
153155
val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb)
154156

155157
output:
156-
set file("${vcf.simpleName}_snpEff.genes.txt"), file("${vcf.simpleName}_snpEff.csv"), file("${vcf.simpleName}_snpEff.summary.html") into snpeffOutput
157-
set val("snpEff"), variantCaller, idPatient, file("${vcf.simpleName}_snpEff.ann.vcf") into snpeffVCF
158+
set file("${reducedVCF}_snpEff.genes.txt"), file("${reducedVCF}_snpEff.csv"), file("${reducedVCF}_snpEff.summary.html") into snpeffOutput
159+
set val("snpEff"), variantCaller, idPatient, file("${reducedVCF}_snpEff.ann.vcf") into snpeffVCF
158160

159161
when: 'snpeff' in tools || 'merge' in tools
160162

161163
script:
164+
reducedVCF = SarekUtils.reduceVCF(vcf)
162165
cache = (params.snpEff_cache && params.annotation_cache) ? "-dataDir \${PWD}/${dataDir}" : ""
163166
"""
164167
snpEff -Xmx${task.memory.toGiga()}g \
165168
${snpeffDb} \
166-
-csvStats ${vcf.simpleName}_snpEff.csv \
169+
-csvStats ${reducedVCF}_snpEff.csv \
167170
-nodownload \
168171
${cache} \
169172
-canon \
170173
-v \
171174
${vcf} \
172-
> ${vcf.simpleName}_snpEff.ann.vcf
175+
> ${reducedVCF}_snpEff.ann.vcf
173176
174-
mv snpEff_summary.html ${vcf.simpleName}_snpEff.summary.html
177+
mv snpEff_summary.html ${reducedVCF}_snpEff.summary.html
175178
"""
176179
}
177180

@@ -194,7 +197,7 @@ process RunVEP {
194197
tag {"${idPatient} - ${variantCaller} - ${vcf}"}
195198

196199
publishDir params.outDir, mode: params.publishDirMode, saveAs: {
197-
if (it == "${vcf.simpleName}_VEP.summary.html") "Annotation/${idPatient}/VEP/${it}"
200+
if (it == "${reducedVCF}_VEP.summary.html") "Annotation/${idPatient}/VEP/${it}"
198201
else null
199202
}
200203

@@ -210,34 +213,40 @@ process RunVEP {
210213
])
211214

212215
output:
213-
set finalAnnotator, variantCaller, idPatient, file("${vcf.simpleName}_VEP.ann.vcf") into vepVCF
214-
file("${vcf.simpleName}_VEP.summary.html") into vepReport
216+
set finalAnnotator, variantCaller, idPatient, file("${reducedVCF}_VEP.ann.vcf") into vepVCF
217+
file("${reducedVCF}_VEP.summary.html") into vepReport
215218

216219
when: 'vep' in tools || 'merge' in tools
217220

218221
script:
222+
reducedVCF = SarekUtils.reduceVCF(vcf)
219223
finalAnnotator = annotator == "snpEff" ? 'merge' : 'VEP'
220224
genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
221225
dir_cache = (params.vep_cache && params.annotation_cache) ? " \${PWD}/${dataDir}" : "/.vep"
222226
cadd = (params.cadd_cache && params.cadd_WG_SNVs && params.cadd_InDels) ? "--plugin CADD,whole_genome_SNVs.tsv.gz,InDels.tsv.gz" : ""
227+
genesplicer = params.genesplicer ? "--plugin GeneSplicer,/opt/conda/envs/sarek-2.3/bin/genesplicer,/opt/conda/envs/sarek-2.3/share/genesplicer-1.0-1/human,context=200,tmpdir=\$PWD/${reducedVCF}" : "--offline"
223228
"""
229+
mkdir ${reducedVCF}
230+
224231
vep \
225232
-i ${vcf} \
226-
-o ${vcf.simpleName}_VEP.ann.vcf \
233+
-o ${reducedVCF}_VEP.ann.vcf \
227234
--assembly ${genome} \
228235
${cadd} \
236+
${genesplicer} \
229237
--cache \
230238
--cache_version ${cache_version} \
231239
--dir_cache ${dir_cache} \
232240
--everything \
233241
--filter_common \
234242
--fork ${task.cpus} \
235243
--format vcf \
236-
--offline \
237244
--per_gene \
238-
--stats_file ${vcf.simpleName}_VEP.summary.html \
245+
--stats_file ${reducedVCF}_VEP.summary.html \
239246
--total_length \
240247
--vcf
248+
249+
rm -rf ${reducedVCF}
241250
"""
242251
}
243252

@@ -257,6 +266,7 @@ process CompressVCF {
257266
set annotator, variantCaller, idPatient, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (vcfCompressed, vcfCompressedoutput)
258267

259268
script:
269+
reducedVCF = SarekUtils.reduceVCF(vcf)
260270
finalAnnotator = annotator == "merge" ? "VEP" : annotator
261271
"""
262272
bgzip < ${vcf} > ${vcf}.gz

build.nf

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,10 @@ process BuildCache_VEP {
332332
}
333333

334334
caddFileToDownload = (params.cadd_version) && (params.genome == "GRCh37" || params.genome == "GRCh38") ?
335-
Channel.from("https://krishna.gs.washington.edu/download/CADD/${params.cadd_version}/${params.genome}/InDels.tsv.gz",
336-
"https://krishna.gs.washington.edu/download/CADD/${params.cadd_version}/${params.genome}/whole_genome_SNVs.tsv.gz")
337-
: Channel.empty()
335+
Channel.from(
336+
"https://krishna.gs.washington.edu/download/CADD/${params.cadd_version}/${params.genome}/InDels_inclAnno.tsv.gz",
337+
"https://krishna.gs.washington.edu/download/CADD/${params.cadd_version}/${params.genome}/whole_genome_SNVs_inclAnno.tsv.gz"
338+
) : Channel.empty()
338339

339340
process DownloadCADD {
340341
tag {caddFile}
@@ -352,7 +353,7 @@ process DownloadCADD {
352353
script:
353354
"""
354355
wget --quiet ${caddFile}
355-
tabix *.tsv.gz
356+
wget --quiet ${caddFile}.tbi
356357
"""
357358
}
358359

conf/munin.config

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ env {
1111
}
1212

1313
params {
14-
cadd_InDels = "/data0/btb/cache/CADD/${params.genome}/InDels.tsv.gz"
15-
cadd_InDels_tbi = "/data0/btb/cache/CADD/${params.genome}/InDels.tsv.gz.tbi"
16-
cadd_WG_SNVs = "/data0/btb/cache/CADD/${params.genome}/whole_genome_SNVs.tsv.gz"
17-
cadd_WG_SNVs_tbi = "/data0/btb/cache/CADD/${params.genome}/whole_genome_SNVs.tsv.gz.tbi"
18-
containerPath = '/data0/btb/containers/'
19-
genome_base = params.genome == 'GRCh37' ? '/data0/btb/references/igenomes/Homo_sapiens/GATK/GRCh37/' : params.genome == 'GRCh38' ? '/data0/btb/references/igenomes/Homo_sapiens/GATK/GRCh38/' : 'References/smallGRCh37'
14+
cadd_InDels = "/data1/cache/CADD/${params.genome}/InDels.tsv.gz"
15+
cadd_InDels_tbi = "/data1/cache/CADD/${params.genome}/InDels.tsv.gz.tbi"
16+
cadd_WG_SNVs = "/data1/cache/CADD/${params.genome}/whole_genome_SNVs.tsv.gz"
17+
cadd_WG_SNVs_tbi = "/data1/cache/CADD/${params.genome}/whole_genome_SNVs.tsv.gz.tbi"
18+
containerPath = '/data1/containers/'
19+
genome_base = params.genome == 'smallGRCh37' ? 'References/smallGRCh37' : '/data1/references/igenomes/Homo_sapiens/GATK/${params.genome}/'
2020
singleCPUMem = 15.GB
21-
snpEff_cache = '/data0/btb/cache/snpEff'
21+
snpEff_cache = '/data1/cache/snpEff'
2222
totalMemory = 754.GB
23-
vep_cache = '/data0/btb/cache/VEP'
23+
vep_cache = '/data1/cache/VEP'
2424
}
2525

2626
executor.$local.cpus = 48

containers/vepgrch37/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ channels:
88

99
dependencies:
1010
- ensembl-vep=95.2
11+
- genesplicer=1.0

containers/vepgrch38/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ channels:
88

99
dependencies:
1010
- ensembl-vep=95.2
11+
- genesplicer=1.0

docs/ANNOTATION.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,13 @@ Such files are meant to be share between multiple users, so this script is mainl
5353
```
5454
nextflow run build.nf --cadd_cache /Path/To/CADDcache --genome <GENOME>
5555
```
56+
57+
## Using VEP GeneSplicer plugin
58+
59+
To enable the use of the VEP GeneSplicer plugin:
60+
- use the `--genesplicer` flag
61+
62+
Example:
63+
```
64+
nextflow run annotate.nf --tools VEP --annotateVCF file.vcf.gz --genome GRCh38 --genesplicer
65+
```

docs/CONTAINERS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Additional containers need to be downloaded for somatic variant calling with ASC
3535
- Contain **[FastQC][fastqc-link]** 0.11.8
3636
- Contain **[FreeBayes][freebayes-link]** 1.2.0
3737
- Contain **[GATK4][gatk4-link]** 4.0.9.0
38+
- Contain **[GeneSplicer][genesplicer-link]** 1.0
3839
- Contain **[HTSlib][htslib-link]** 1.9
3940
- Contain **[IGVtools][igvtools-link]** 2.3.93
4041
- Contain **[Manta][manta-link]** 1.4.0
@@ -62,12 +63,14 @@ Additional containers need to be downloaded for somatic variant calling with ASC
6263
### vepgrch37 [![vepgrch37-docker status][vepgrch37-docker-badge]][vepgrch37-docker-link]
6364

6465
- Based on `nfcore/base:latest`
66+
- Contain **[GeneSplicer][genesplicer-link]** 1.0
6567
- Contain **[VEP][vep-link]** 95.1
6668
- Contain cache for GRCh37 version 95
6769

6870
### vepgrch38 [![vepgrch38-docker status][vepgrch38-docker-badge]][vepgrch38-docker-link]
6971

7072
- Based on `nfcore/base:latest`
73+
- Contain **[GeneSplicer][genesplicer-link]** 1.0
7174
- Contain **[VEP][vep-link]** 95.1
7275
- Contain cache for GRCh38 version 95
7376

@@ -131,6 +134,7 @@ You'll just need to specify the correct repository either in command line or in
131134
[fastqc-link]: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
132135
[freebayes-link]: https://github.com/ekg/freebayes
133136
[gatk4-link]: https://github.com/broadinstitute/gatk
137+
[genesplicer-link]: https://ccb.jhu.edu/software/genesplicer/
134138
[htslib-link]: https://github.com/samtools/htslib
135139
[igvtools-link]: http://software.broadinstitute.org/software/igv/
136140
[manta-link]: https://github.com/Illumina/manta

docs/INSTALL.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,15 @@ This workflow itself needs little installation.
1616

1717
Nextflow will automatically fetch Sarek from GitHub when launched if `SciLifeLab/Sarek` is specified as the workflow name.
1818

19+
You can also specify Nextflow to pull Sarek using:
20+
```bash
21+
nextflow pull SciLifeLab/Sarek
22+
```
23+
1924
Sarek use Singularity containers to package all the different tools.
2025

21-
If you plan to use the automatic pull of Singularity images, you can use the [`singularity.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/singularity.config) configuration file. You can also set up the Nextflow environnement variable `NXF_SINGULARITY_CACHEDIR` to choose where to store them.
26+
If you plan to use the automatic pull of Singularity images, you can use the [`singularity.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/singularity.config) configuration file.
27+
You can also set up the Nextflow environnement variable `NXF_SINGULARITY_CACHEDIR` to choose where to store them.
2228

2329
For example
2430
```bash

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ dependencies:
1515
- fontconfig=2.12.6 #for FastQC
1616
- freebayes=1.2.0
1717
- gatk4=4.0.9.0
18+
- genesplicer=1.0
1819
- htslib=1.9
1920
- igvtools=2.3.93
2021
- manta=1.4.0

0 commit comments

Comments
 (0)