diff --git a/analyses/input/palette.txt b/analyses/input/palette.txt new file mode 100644 index 0000000..716d107 --- /dev/null +++ b/analyses/input/palette.txt @@ -0,0 +1,3 @@ +#e40032 +cornflowerblue +#008000 diff --git a/analyses/input/test-input.tsv b/analyses/input/test-input.tsv index 239c379..272b50b 100644 --- a/analyses/input/test-input.tsv +++ b/analyses/input/test-input.tsv @@ -1,3 +1,5 @@ Kids_First_Participant_ID cohort_participant_id chr start end ref alt distance source Consequence Hugo_Symbol plot_group broad_histology -PT_JYZA0PNV C95571 chr11 108315911 108315911 G A 0 germline missense_variant&splice_region_variant ATM Low-grade glioma Low-grade astrocytic tumor -PT_BQFM8NH0 C2722113 chr11 44171609 44171609 A G -2 germline splice_acceptor_variant EXT2 Non-neoplastic tumor Benign tumor +PT_40HTPY49 C99753 chr17 31201036 31201036 G A -1 somatic splice_acceptor_variant NF1 DIPG or DMG Diffuse astrocytic and oligodendroglial tumor +PT_TF84B5J9 C3533913 chr1 240801514 240801514 G A -6 somatic splice_region_variant,intron_variant,splice_polypyrimidine_tract_variant RGS7 Oligodendroglioma Diffuse astrocytic and oligodendroglial tumor +PT_RDWE18AC C4114473 chr12 6951966 6951966 C T -17 somatic intron_variant,splice_polypyrimidine_tract_variant PTPN6 Other high-grade glioma Diffuse astrocytic and oligodendroglial tumor +PT_4347ZBEX C216603 chr11 108272506 108272506 A G -26 somatic intron_variant ATM Other high-grade glioma Diffuse astrocytic and oligodendroglial tumor diff --git a/analyses/plots/ATM-PT_4347ZBEX-chr11:108271506-108273506.pdf b/analyses/plots/ATM-PT_4347ZBEX-chr11:108271506-108273506.pdf new file mode 100644 index 0000000..5582fc1 Binary files /dev/null and b/analyses/plots/ATM-PT_4347ZBEX-chr11:108271506-108273506.pdf differ diff --git a/analyses/plots/NF1-PT_40HTPY49-chr17:31200036-31202036.pdf b/analyses/plots/NF1-PT_40HTPY49-chr17:31200036-31202036.pdf new file mode 100644 index 0000000..d27a5d7 Binary files /dev/null and b/analyses/plots/NF1-PT_40HTPY49-chr17:31200036-31202036.pdf differ diff --git a/analyses/plots/PTPN6-PT_RDWE18AC-chr12:6950966-6952966.pdf b/analyses/plots/PTPN6-PT_RDWE18AC-chr12:6950966-6952966.pdf new file mode 100644 index 0000000..812ce8d Binary files /dev/null and b/analyses/plots/PTPN6-PT_RDWE18AC-chr12:6950966-6952966.pdf differ diff --git a/analyses/plots/RGS7-PT_TF84B5J9-chr1:240800514-240802514.pdf b/analyses/plots/RGS7-PT_TF84B5J9-chr1:240800514-240802514.pdf new file mode 100644 index 0000000..1857c31 Binary files /dev/null and b/analyses/plots/RGS7-PT_TF84B5J9-chr1:240800514-240802514.pdf differ diff --git a/analyses/results/PT_40HTPY49-NF1-chr17:31200036-31202036.tsv b/analyses/results/PT_40HTPY49-NF1-chr17:31200036-31202036.tsv new file mode 100644 index 0000000..22c4345 --- /dev/null +++ b/analyses/results/PT_40HTPY49-NF1-chr17:31200036-31202036.tsv @@ -0,0 +1 @@ +PT_40HTPY49 bams/663ed64d-47e8-4bec-a0fb-79a61b9574c8-PT_40HTPY49-NF1-chr17:31200036-31202036.bam 663ed64d-47e8-4bec-a0fb-79a61b9574c8 diff --git a/analyses/results/PT_4347ZBEX-ATM-chr11:108271506-108273506.tsv b/analyses/results/PT_4347ZBEX-ATM-chr11:108271506-108273506.tsv new file mode 100644 index 0000000..3b186f7 --- /dev/null +++ b/analyses/results/PT_4347ZBEX-ATM-chr11:108271506-108273506.tsv @@ -0,0 +1,6 @@ +PT_4347ZBEX bams/1c03b784-4cd1-44a7-a089-d81045ecb7f5-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam 1c03b784-4cd1-44a7-a089-d81045ecb7f5 +PT_4347ZBEX bams/8c59e6b4-3553-4ac3-9fbb-2b0ff8225a57-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam 8c59e6b4-3553-4ac3-9fbb-2b0ff8225a57 +PT_4347ZBEX bams/0d9c68f8-a507-4a50-b52c-cc4f80a43e61-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam 0d9c68f8-a507-4a50-b52c-cc4f80a43e61 +PT_4347ZBEX bams/f1f38907-2974-42a5-a45c-9d3ac1c919df-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam f1f38907-2974-42a5-a45c-9d3ac1c919df +PT_4347ZBEX bams/9aa5f343-bea5-40a5-ad10-fc35b035db2c-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam 9aa5f343-bea5-40a5-ad10-fc35b035db2c +PT_4347ZBEX bams/07db69a8-5f22-4d6c-9b0c-3de147750de9-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam 07db69a8-5f22-4d6c-9b0c-3de147750de9 diff --git a/analyses/results/PT_RDWE18AC-PTPN6-chr12:6950966-6952966.tsv b/analyses/results/PT_RDWE18AC-PTPN6-chr12:6950966-6952966.tsv new file mode 100644 index 0000000..c671ed2 --- /dev/null +++ b/analyses/results/PT_RDWE18AC-PTPN6-chr12:6950966-6952966.tsv @@ -0,0 +1 @@ +PT_RDWE18AC bams/a511f66a-f51c-48b8-bba4-788f0a4024bb-PT_RDWE18AC-PTPN6-chr12:6950966-6952966.bam a511f66a-f51c-48b8-bba4-788f0a4024bb diff --git a/analyses/results/PT_TF84B5J9-RGS7-chr1:240800514-240802514.tsv b/analyses/results/PT_TF84B5J9-RGS7-chr1:240800514-240802514.tsv new file mode 100644 index 0000000..fe47545 --- /dev/null +++ b/analyses/results/PT_TF84B5J9-RGS7-chr1:240800514-240802514.tsv @@ -0,0 +1 @@ +PT_TF84B5J9 bams/8c898407-550a-4a12-8e02-70d3837475d5-PT_TF84B5J9-RGS7-chr1:240800514-240802514.bam 8c898407-550a-4a12-8e02-70d3837475d5 diff --git a/analyses/results/bams/md5sum.txt b/analyses/results/bams/md5sum.txt index 0278c9e..55b483b 100644 --- a/analyses/results/bams/md5sum.txt +++ b/analyses/results/bams/md5sum.txt @@ -1,4 +1,18 @@ -b813cb028bbacb56f7e86146dfcec3a2 results/bams/49629206-ef0a-4636-83cf-b37021e60def-PT_JYZA0PNV-ATM-chr11:108305911-108325911.bam -881cf3a5cf83ff062256184f072c5ca9 results/bams/49629206-ef0a-4636-83cf-b37021e60def-PT_JYZA0PNV-ATM-chr11:108305911-108325911.bam.bai -cd15f9a0ca11bf83e2682325004f031e results/bams/ccc08827-804f-4966-95bd-8fc960e45093-PT_BQFM8NH0-EXT2-chr11:44161609-44181609.bam -d3cb164ed575c1859886acac74ba6183 results/bams/ccc08827-804f-4966-95bd-8fc960e45093-PT_BQFM8NH0-EXT2-chr11:44161609-44181609.bam.bai +f62bf553c3db2b9fb144df554477489d results/bams/07db69a8-5f22-4d6c-9b0c-3de147750de9-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam +4f3e4ca851c4355d4035a3175c905a3c results/bams/07db69a8-5f22-4d6c-9b0c-3de147750de9-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam.bai +9b6df5998f8788ae28a7daa3fd6c5bf8 results/bams/0d9c68f8-a507-4a50-b52c-cc4f80a43e61-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam +25b4296848b1ce76e59176dd539d74e4 results/bams/0d9c68f8-a507-4a50-b52c-cc4f80a43e61-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam.bai +70dd448523195a0eb2bfc5548da7ee1b results/bams/1c03b784-4cd1-44a7-a089-d81045ecb7f5-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam +0fa550bdf250cc6992e39cce08296ba5 results/bams/1c03b784-4cd1-44a7-a089-d81045ecb7f5-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam.bai +0c037b4944eeb7ebb98fed6783c6fc6a results/bams/663ed64d-47e8-4bec-a0fb-79a61b9574c8-PT_40HTPY49-NF1-chr17:31200036-31202036.bam +1c4217a69be28488abafd02f7011cc6e results/bams/663ed64d-47e8-4bec-a0fb-79a61b9574c8-PT_40HTPY49-NF1-chr17:31200036-31202036.bam.bai +0d0489d210a8380b56472aea82a47ffb results/bams/8c59e6b4-3553-4ac3-9fbb-2b0ff8225a57-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam +12068a3499bebeb97b0ddaad92743536 results/bams/8c59e6b4-3553-4ac3-9fbb-2b0ff8225a57-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam.bai +123959c5f584da5df0fa7b223496e262 results/bams/8c898407-550a-4a12-8e02-70d3837475d5-PT_TF84B5J9-RGS7-chr1:240800514-240802514.bam +75c68e7eadb2ca948fc4b39d65dcf873 results/bams/8c898407-550a-4a12-8e02-70d3837475d5-PT_TF84B5J9-RGS7-chr1:240800514-240802514.bam.bai +0ed8b395e9181ba5774c6a42c09e3819 results/bams/9aa5f343-bea5-40a5-ad10-fc35b035db2c-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam +cbb569ea87983495397ff0a577453036 results/bams/9aa5f343-bea5-40a5-ad10-fc35b035db2c-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam.bai +0fd43e5e8a0391bec1e0c8c3f502d3e7 results/bams/a511f66a-f51c-48b8-bba4-788f0a4024bb-PT_RDWE18AC-PTPN6-chr12:6950966-6952966.bam +17cdd453ac057423c953502cb398845e results/bams/a511f66a-f51c-48b8-bba4-788f0a4024bb-PT_RDWE18AC-PTPN6-chr12:6950966-6952966.bam.bai +98868b9270b6fad2736bdf78aaac9a59 results/bams/f1f38907-2974-42a5-a45c-9d3ac1c919df-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam +5f4c6b31d9d67fd23772195a7254eb1c results/bams/f1f38907-2974-42a5-a45c-9d3ac1c919df-PT_4347ZBEX-ATM-chr11:108271506-108273506.bam.bai diff --git a/analyses/01-cram-to-bam.sh b/analyses/run-ggsashimi.sh similarity index 66% rename from analyses/01-cram-to-bam.sh rename to analyses/run-ggsashimi.sh index ab2fa68..b0f41cc 100644 --- a/analyses/01-cram-to-bam.sh +++ b/analyses/run-ggsashimi.sh @@ -5,7 +5,7 @@ kf_id_col=1 # KF patient ID column chr_col=3 # Chromosome pos_col=4 # Position label_col=11 # Additional label to add to plot for identification, i.e. gene -window=10000 # Bases to plot either side of the position given +window=1000 # Bases to plot either side of the position given ## Set up input files while getopts i:m:k:c:p:l: opt; do @@ -49,6 +49,26 @@ else fi fi +## Download GENCODE v39 annotations +URL="https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_39/gencode.v39.annotation.gtf.gz" +gencode="../data/gencode.v39.annotation.gtf" + +if [ -f "$gencode" ]; then + echo "Using gencode: $gencode" +else + echo "Downloading gencode..." + curl -L -o "$gencode.gz" "$URL" + + # Verify download succeeded + if [ $? -eq 0 ]; then + gunzip $gencode.gz + echo "Download complete: $gencode" + else + echo "Download failed." + exit 1 + fi +fi + #################################################### # Loop through variant file @@ -66,7 +86,8 @@ while read line; do ## get file id crams=$(grep "$KF_id" $manifest | grep "Aligned.out.sorted.cram" | grep -v "crai" | cut -f2) - + input_path="results/${KF_id}-${label}-${coordinates}.tsv" + ## loop through each CRAM per patient ## TODO: Make select from BS_ID an option? for cram in $crams; do @@ -76,17 +97,25 @@ while read line; do prefix=$(basename "$cram" .Aligned.out.sorted.cram) echo "Converting $cram_path" - bam_path="results/bams/${prefix}-${KF_id}-${label}-${coordinates}.bam" - # input_path="variants/${prefix}-${KF_id}-${label}-${coordinates}.tsv" - + bam_path="bams/${prefix}-${KF_id}-${label}-${coordinates}.bam" + samtools view \ -T $ref_genome \ -b \ "$cram_path" \ "$coordinates" \ - -o "$bam_path" + -o "results/$bam_path" - samtools index "$bam_path" + samtools index "results/$bam_path" + # create input tsv for ggsashimi + echo "$KF_id"$'\t'"$bam_path"$'\t'"$prefix" >> "$input_path" done -done < <(tail -n +2 $input_file) \ No newline at end of file + + # run ggsashimi + python3 ../ggsashimi.py -b "$input_path" -c "$coordinates" --shrink \ + -g $gencode -P input/palette.txt \ + -C 1 -O 1 -A median_j -M 3 \ + -o "plots/${label}-${KF_id}-${coordinates}" + +done < <(tail -n +2 $input_file)