Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/add_pr_labels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
# GITHUB_TOKEN: ${{ secrets.delphis_bot_org_token }}

- name: Add label if .janno files exist
if: env.has_janno == 'true'
if: env.has_janno == 'true' && github.event.pull_request.head.repo.full_name == 'poseidon-framework/minotaur-recipes'
uses: actions-ecosystem/action-add-labels@v1
with:
# github_token: ${{ secrets.delphis_bot_org_token }}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Keep track of config versions
minotaur_release='1.0.0' // The release tag of the poseidon-eager repository used for processing and config file retrieval
config_template_version='1.0.0'
package_config_version='1.0.0'
minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf"

// This configuration file is designed to be a used with the nf-core/eager pipeline.
// Instead of having to specify all other configurations for the Minotaur pipeline
// on runtime, they are all contained in this file and loaded automatically upon
// specifying this config file during runtime. Additionally, any parameters that
// need to be altered from the defaults can be specified here.
//
// The intention is to make it easy for users to understand and reproduce the output
// from processing with the Minotaur workflow processing from the contents of a
// single file.

// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch.
// The loaded config includes code that loads the institutional configs from https://github.com/poseidon-framework/minotaur-institutional-configs.
includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing.

// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping.
// TODO: Select the appropriate config for the CaptureType of the package.
includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config"

params {
// Keep track of config file versions used when processing
config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}"
config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)"

/*
TODO: If you need to change any of the default processing parameters for this package
you can specify these parameters below.
Any parameters not specified in any of the config files default to their nf-core/eager default values.

For information on all available parameters and their default values see:
https://nf-co.re/eager/2.5.1/parameters

You can see the latest default values for parameters within poseidon-eager at:
https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config
*/
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Poseidon_ID Genetic_Sex Group_Name Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue
I10859 F Mexico_Colonial_ n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a B2 Q1a2a1a1 bone_petrous
I10860 F Mexico_Colonial_African n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a L3e2b+152 n/a bone_petrous
I10861 M Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a B2+16278 Q1a2a1a1 bone_petrous
I10862 M Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2 Q1a2a1a bone_petrous
I10863 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2m n/a bone_petrous
I8558 M Mexico_Colonial_o n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a X2d2 I2a2a1b bone_petrous
I8559 M Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 C14 PSUAMS-4431 380 20 1446 1570 1624 n/a R n/a bone_petrous
I8555 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 C14 PSUAMS-4430 335 20 1481 1615 1639 n/a A2+(64) n/a bone_petrous
I8556 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2r n/a bone_petrous
I8557 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2+(64) n/a bone_petrous
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
poseidon_IDs udg library_built notes run_accession sample_accession study_accession sample_alias secondary_sample_accession first_public last_updated instrument_model library_layout library_source instrument_platform library_name library_strategy fastq_ftp fastq_aspera fastq_bytes fastq_md5 read_count submitted_ftp submitted_md5
I10859 half ds n/a ERR10024820 SAMEA110460290 PRJEB50901 I10859_auto ERS12558320 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558320 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/020/ERR10024820/ERR10024820.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/020/ERR10024820/ERR10024820.fastq.gz 299547010 b52e33953172c9cc125c0004717e36cc 10500049 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024820/I10859_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024820/I10859_auto.bam.bai 6d8218d59d2b359a9eda9056fb2cd070;9033be2c8f161b8b8dff468bd2d7a788
I10860 half ds n/a ERR10024822 SAMEA110460292 PRJEB50901 I10860_auto ERS12558322 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558322 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/022/ERR10024822/ERR10024822.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/022/ERR10024822/ERR10024822.fastq.gz 173605539 ecd6e523cc8879911603c6b8233a5e8d 5845153 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024822/I10860_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024822/I10860_auto.bam.bai 2f9e63dded86c72c9f0d058788deda81;e1252775805908d320e7130d8d9116d7
I10860 half ds n/a ERR10024823 SAMEA110460293 PRJEB50901 I10860_mt ERS12558323 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558323 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/023/ERR10024823/ERR10024823.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/023/ERR10024823/ERR10024823.fastq.gz 1710042 3a5c1f02190b55f7335c1396899b3dd8 97318 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024823/I10860_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024823/I10860_mt.bam.bai e294ec10218d8a714df665a17a912c03;8b08fe6051a428d1d63b7c669c0bfa3b
I10861 half ds n/a ERR10024824 SAMEA110460294 PRJEB50901 I10861_auto ERS12558324 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558324 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/024/ERR10024824/ERR10024824.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/024/ERR10024824/ERR10024824.fastq.gz 202652666 96a037f98e7b9be878aef98b9ddbe9b9 6608872 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024824/I10861_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024824/I10861_auto.bam.bai efb6758fade0da4bef5effd1393ae132;4455ebc52988f33eb380bfe8095f1b58
I10861 half ds n/a ERR10024825 SAMEA110460295 PRJEB50901 I10861_mt ERS12558325 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558325 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/025/ERR10024825/ERR10024825.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/025/ERR10024825/ERR10024825.fastq.gz 1591243 f0cf33874043b27ad23f53606b134d74 88092 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024825/I10861_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024825/I10861_mt.bam.bai 35de16aa41f23efb3a973bd7b5d8f8f1;2ea76d6f28015939372f21465d8f64fb
I10862 half ds n/a ERR10024826 SAMEA110460296 PRJEB50901 I10862_auto ERS12558326 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558326 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/026/ERR10024826/ERR10024826.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/026/ERR10024826/ERR10024826.fastq.gz 172569326 ed36dbd332430655c97450986befd877 5911868 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024826/I10862_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024826/I10862_auto.bam.bai c7ea6a786dcc8fa7660af56108f11a76;e8314f9614fe7a0cce4748228dd76535
I10863 half ds n/a ERR10024828 SAMEA110460298 PRJEB50901 I10863_auto ERS12558328 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558328 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/028/ERR10024828/ERR10024828.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/028/ERR10024828/ERR10024828.fastq.gz 399969993 3db35b7d6cc684c2811ac21b87d886ba 13805187 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024828/I10863_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024828/I10863_auto.bam.bai 31ee33f822086e23d6ac1a3935cabaf2;afab3f2556b6992edcb2c0afef8fd8b8
I8558 half ds n/a ERR10024836 SAMEA110460306 PRJEB50901 I8558_auto ERS12558336 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558336 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024836/I8558_auto.bam dbc563d88e769f01b640fb88fceac28b
I8558 half ds n/a ERR10024837 SAMEA110460307 PRJEB50901 I8558_mt ERS12558337 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558337 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/037/ERR10024837/ERR10024837.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/037/ERR10024837/ERR10024837.fastq.gz 443549 6ebac4d22bae77c80d4e807212872e61 20021 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024837/I8558_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024837/I8558_mt.bam.bai 42691afc59127a2bb097faf4fec0a442;186e3ef209e7585078971ee770ace5f8
I8559 half ds n/a ERR10024839 SAMEA110460309 PRJEB50901 I8559_mt ERS12558339 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558339 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/039/ERR10024839/ERR10024839.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/039/ERR10024839/ERR10024839.fastq.gz 395981 a71e771484127cedae344949313f6d8b 17877 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024839/I8559_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024839/I8559_mt.bam.bai ff622b7c39714f925802522f3d6ca72b;39fd548c8780a30e1f93cb6e783791ae
I10859 half ds n/a ERR10024821 SAMEA110460291 PRJEB50901 I10859_mt ERS12558321 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558321 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/021/ERR10024821/ERR10024821.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/021/ERR10024821/ERR10024821.fastq.gz 4784965 f29196418c329647e4400087740b0b60 263041 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024821/I10859_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024821/I10859_mt.bam.bai 361b1cb823c9c9cf0831e142520a6439;930782ca7105775f37811ff3b477321b
I10862 half ds n/a ERR10024827 SAMEA110460297 PRJEB50901 I10862_mt ERS12558327 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558327 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/027/ERR10024827/ERR10024827.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/027/ERR10024827/ERR10024827.fastq.gz 1378040 f88ecdca84065186eea8c26487f89abb 78650 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024827/I10862_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024827/I10862_mt.bam.bai ff36e70b76c92effc97943e88d74abf3;d1b5172cbc0e2f2d4c59adfa0ef54393
I10863 half ds n/a ERR10024829 SAMEA110460299 PRJEB50901 I10863_mt ERS12558329 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558329 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/029/ERR10024829/ERR10024829.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/029/ERR10024829/ERR10024829.fastq.gz 4258100 79a3b2e29a82da51060777b5bc9372ac 226286 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024829/I10863_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024829/I10863_mt.bam.bai 8298a549f5af819c38167a9074487772;9351a683caf3995e424caeb793d935b1
I8555 half ds n/a ERR10024830 SAMEA110460300 PRJEB50901 I8555_auto ERS12558330 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558330 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024830/I8555_auto.bam 9a050bc3bf289482878d8d2a050b1c40
I8556 half ds n/a ERR10024832 SAMEA110460302 PRJEB50901 I8556_auto ERS12558332 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558332 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/032/ERR10024832/ERR10024832.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/032/ERR10024832/ERR10024832.fastq.gz 10069607 fc5eaef693aa8781b72a74838801f2d9 373844 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024832/I8556_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024832/I8556_auto.bam.bai 4b070f9fd03730cf6f5a698658dba143;65517a967ff15291d5937c827c25286c
I8555 half ds n/a ERR10024831 SAMEA110460301 PRJEB50901 I8555_mt ERS12558331 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558331 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/031/ERR10024831/ERR10024831.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/031/ERR10024831/ERR10024831.fastq.gz 261250 67e3316bc126ae6c288e21221e086b69 11338 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024831/I8555_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024831/I8555_mt.bam.bai c4929a7c4a9499e395673b79f47a3df4;8fe761372b7d4c33601665636b38054f
I8556 half ds n/a ERR10024833 SAMEA110460303 PRJEB50901 I8556_mt ERS12558333 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558333 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/033/ERR10024833/ERR10024833.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/033/ERR10024833/ERR10024833.fastq.gz 111468 3bf94c8ac5dbf5cedbcb72b00eb44c3f 5006 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024833/I8556_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024833/I8556_mt.bam.bai 0db8191b86f143f72cb6c41ebc059236;dc3ac28aefd3964eb04058a83d6d4dc8
I8557 half ds n/a ERR10024834 SAMEA110460304 PRJEB50901 I8557_auto ERS12558334 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558334 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024834/I8557_auto.bam c960aba6eed82c7da8ea69445d7d88d9
I8557 half ds n/a ERR10024835 SAMEA110460305 PRJEB50901 I8557_mt ERS12558335 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558335 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/035/ERR10024835/ERR10024835.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/035/ERR10024835/ERR10024835.fastq.gz 315440 b9be6056bf59344ed4a03c96f490fc7e 14220 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024835/I8557_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024835/I8557_mt.bam.bai de7d94f822891b6fc5f146e653e78c62;dba1653f35a2e9d526373a5228e1a7a9
I8559 half ds n/a ERR10024838 SAMEA110460308 PRJEB50901 I8559_auto ERS12558338 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558338 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024838/I8559_auto.bam 3ce524a5460c0c903fbf3892c4dfcf28
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file BAM_target
I10859 I10859_ERS12558320 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10859_ERS12558320_L1_R1.fastq.gz NA NA ERR10024820.fastq.gz NA NA
I10860 I10860_ERS12558322 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10860_ERS12558322_L1_R1.fastq.gz NA NA ERR10024822.fastq.gz NA NA
I10860 I10860_ERS12558323 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10860_ERS12558323_L1_R1.fastq.gz NA NA ERR10024823.fastq.gz NA NA
I10861 I10861_ERS12558324 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10861_ERS12558324_L1_R1.fastq.gz NA NA ERR10024824.fastq.gz NA NA
I10861 I10861_ERS12558325 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10861_ERS12558325_L1_R1.fastq.gz NA NA ERR10024825.fastq.gz NA NA
I10862 I10862_ERS12558326 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10862_ERS12558326_L1_R1.fastq.gz NA NA ERR10024826.fastq.gz NA NA
I10863 I10863_ERS12558328 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10863_ERS12558328_L1_R1.fastq.gz NA NA ERR10024828.fastq.gz NA NA
I8558 I8558_ERS12558336 1 4 SE Homo sapiens (modern human) double half NA NA <PATH_TO_DATA>/I8558_ERS12558336_L1.bam NA NA I8558_auto.bam
I8558 I8558_ERS12558337 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I8558_ERS12558337_L1_R1.fastq.gz NA NA ERR10024837.fastq.gz NA NA
I8559 I8559_ERS12558339 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I8559_ERS12558339_L1_R1.fastq.gz NA NA ERR10024839.fastq.gz NA NA
I10859 I10859_ERS12558321 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10859_ERS12558321_L1_R1.fastq.gz NA NA ERR10024821.fastq.gz NA NA
I10862 I10862_ERS12558327 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10862_ERS12558327_L1_R1.fastq.gz NA NA ERR10024827.fastq.gz NA NA
I10863 I10863_ERS12558329 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I10863_ERS12558329_L1_R1.fastq.gz NA NA ERR10024829.fastq.gz NA NA
I8555 I8555_ERS12558330 1 4 SE Homo sapiens (modern human) double half NA NA <PATH_TO_DATA>/I8555_ERS12558330_L1.bam NA NA I8555_auto.bam
I8556 I8556_ERS12558332 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I8556_ERS12558332_L1_R1.fastq.gz NA NA ERR10024832.fastq.gz NA NA
I8555 I8555_ERS12558331 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I8555_ERS12558331_L1_R1.fastq.gz NA NA ERR10024831.fastq.gz NA NA
I8556 I8556_ERS12558333 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I8556_ERS12558333_L1_R1.fastq.gz NA NA ERR10024833.fastq.gz NA NA
I8557 I8557_ERS12558334 1 4 SE Homo sapiens (modern human) double half NA NA <PATH_TO_DATA>/I8557_ERS12558334_L1.bam NA NA I8557_auto.bam
I8557 I8557_ERS12558335 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/I8557_ERS12558335_L1_R1.fastq.gz NA NA ERR10024835.fastq.gz NA NA
I8559 I8559_ERS12558338 1 4 SE Homo sapiens (modern human) double half NA NA <PATH_TO_DATA>/I8559_ERS12558338_L1.bam NA NA I8559_auto.bam
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash
set -uo pipefail ## Pipefail, complain on new unassigned variables.

## Track the version of the TSV_patch template used
VERSION='0.2.1dev'

## This script is applied to the eager input TSV file locally to edit the dummy
## path to the fastQ files added by `create_eager_input.sh` to a real local
## path provided as a positional argument. Any further local tweaks to the
## TSV before running eager should be added below that in the form of bash
## commands to aid in reproducibility.

## usage tsv_patch.sh <local_data_dir> <input_tsv> <path/to/source_me.sh>

local_data_dir="$(readlink -f ${1})"
input_tsv="$(readlink -f ${2})"
output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv"
columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM")
source $(readlink -f ${3}) ## Path to helper function script should be provided as 3rd argument. https://github.com/poseidon-framework/poseidon-eager/blob/main/scripts/source_me.sh

## Index non-proliferated columns and exclude them from the finalised TSV
cut_selector=''
tsv_header=($(head -n1 ${input_tsv}))
for col_name in ${columns_to_keep[@]}; do
let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing
if [[ ! ${idx} -eq -1 ]]; then
cut_selector+="${idx},"
fi
done

## Remove added columns, and put columns in right order
cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv}
sed -i -e "s|<PATH_TO_DATA>|${local_data_dir}|g" ${output_tsv}

## Any further commands to edit the file before finalisation should be added below as shown
# sed -ie 's/replace_this/with_this/g' ${output_tsv}

## Keep track of versions
version_file="$(dirname ${input_tsv})/script_versions.txt"
## Remove versions from older run if there
grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new
## Then add new versions
echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new
echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new
mv ${version_file}.new ${version_file}
Loading