-
Notifications
You must be signed in to change notification settings - Fork 1
/
gatk_filter_prefinal.sge
executable file
·75 lines (58 loc) · 1.86 KB
/
gatk_filter_prefinal.sge
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
#gatk_filter_prefinal.sge
#sge submission script for basic filtering of vcf to remove junk and generate table for thresholding
#M. Supple
#last updated 29 August 2016
#usage
#qsub gatk_filter_prefinal.sge <vcf> <path/to/reference/ref.fa>
#<vcf> is an input vcf to be filtered
#<path/to/reference/ref.fa> is the reference sequence
#requires
#gatk
#output
#vcf with basic filtered SNPs
#sge submission info
#$ -N gatk_filter_prefinal
#$ -o gatk_filter_prefinal.output
#$ -l virtual_free=20g,h_vmem=20.1g
#$ -cwd
#$ -j y
#print some sge info
echo Job $JOB_NAME started `date` in queue $QUEUE with id=$JOB_ID on `hostname`
#read in input
vcf=$1
reference=$2
#subset for just SNPs
java -jar /home/msupple/programs/GenomeAnalysisTK.jar \
-T SelectVariants \
-R $reference \
-V $vcf \
-selectType SNP \
-o ${vcf%.vcf}.SNPs.vcf
#filter with "default" values and remove sites with no variation relative to the reference
java -jar /home/msupple/programs/GenomeAnalysisTK.jar \
-T VariantFiltration \
-R $reference \
-V ${vcf%.vcf}.SNPs.vcf \
-filter "AF == 1.0" \
-filterName "maxMAF" \
-o ${vcf%.vcf}.SNPs.prefilt.vcf
#remove non-biallelic and failed loci from file
java -jar /home/msupple/programs/GenomeAnalysisTK.jar \
-T SelectVariants \
-R $reference \
-V ${vcf%.vcf}.SNPs.prefilt.vcf \
--excludeFiltered \
--restrictAllelesTo BIALLELIC \
-o ${vcf%.vcf}.SNPs.prefilt.pass.vcf
#make table of values for thresholding
java -jar /home/msupple/programs/GenomeAnalysisTK.jar \
-T VariantsToTable \
-R $reference \
-V ${vcf%.vcf}.SNPs.prefilt.pass.vcf \
-F CHROM -F POS -F QUAL -F AF -F ExcessHet -F InbreedingCoeff -F MQ -F MQRankSum -F QD \
-F HET -F HOM-REF -F HOM-VAR -F NO-CALL -F NCALLED \
--allowMissingData \
-o stats.table
#print note that job completed
echo done `date`