-
Notifications
You must be signed in to change notification settings - Fork 1
/
gatk_genotype.sge
executable file
·60 lines (44 loc) · 1.13 KB
/
gatk_genotype.sge
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/bash
#gatk_genotype.sge
#sge submission script to call joint genotypes across samples
#M. Supple
#last modified 9 August 2016
#usage
#qsub gatk_genotype.sge </path/to/gvcf/dir/> <path/to/reference/ref.fa>
#</path/to/bams/dir/> is a directory with the input g.vcf files
#<path/to/reference/ref.fa> is the reference sequence
#note: see hardcoded parameters below
#requires
#gatk
#output
#single genotyped VCF
#$ -N gatk_genotype
#$ -o gatk_genotype.output
#$ -q hugemem.q
#$ -l virtual_free=220g,h_vmem=221g
#$ -j y
#$ -cwd
#print some useful sge information
echo Job $JOB_NAME started `date` in queue $QUEUE with id=$JOB_ID on `hostname`
#get input information
gvcfdir=$1
reference=$2
het=0.005
indelhet=.0005
#determine samples to use
variants=""
samples=($(ls $gvcfdir/S*.g.vcf))
for ((a=0; a<${#samples[@]}; a++))
do
variants+="--variant ${samples[a]} "
done
echo $variants
#call variants
java -Xmx150G -jar /home/msupple/programs/GenomeAnalysisTK.jar \
-T GenotypeGVCFs \
-hets $het -indelHeterozygosity $indelhet \
-R $reference \
$variants \
-o combined.vcf
#print note that job completed
echo done `date`