diff --git a/.github/workflows/add_pr_labels.yml b/.github/workflows/add_pr_labels.yml index d6a6a9c..9d70973 100644 --- a/.github/workflows/add_pr_labels.yml +++ b/.github/workflows/add_pr_labels.yml @@ -32,7 +32,7 @@ jobs: # GITHUB_TOKEN: ${{ secrets.delphis_bot_org_token }} - name: Add label if .janno files exist - if: env.has_janno == 'true' + if: env.has_janno == 'true' && github.event.pull_request.head.repo.full_name == 'poseidon-framework/minotaur-recipes' uses: actions-ecosystem/action-add-labels@v1 with: # github_token: ${{ secrets.delphis_bot_org_token }} diff --git a/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.config b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.config new file mode 100644 index 0000000..bd778e1 --- /dev/null +++ b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.config @@ -0,0 +1,41 @@ +// Keep track of config versions +minotaur_release='1.0.0' // The release tag of the poseidon-eager repository used for processing and config file retrieval +config_template_version='1.0.0' +package_config_version='1.0.0' +minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf" + +// This configuration file is designed to be a used with the nf-core/eager pipeline. +// Instead of having to specify all other configurations for the Minotaur pipeline +// on runtime, they are all contained in this file and loaded automatically upon +// specifying this config file during runtime. Additionally, any parameters that +// need to be altered from the defaults can be specified here. +// +// The intention is to make it easy for users to understand and reproduce the output +// from processing with the Minotaur workflow processing from the contents of a +// single file. + +// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch. +// The loaded config includes code that loads the institutional configs from https://github.com/poseidon-framework/minotaur-institutional-configs. +includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing. + +// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping. +// TODO: Select the appropriate config for the CaptureType of the package. +includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config" + +params { + // Keep track of config file versions used when processing + config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}" + config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)" + + /* + TODO: If you need to change any of the default processing parameters for this package + you can specify these parameters below. + Any parameters not specified in any of the config files default to their nf-core/eager default values. + + For information on all available parameters and their default values see: + https://nf-co.re/eager/2.5.1/parameters + + You can see the latest default values for parameters within poseidon-eager at: + https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config + */ +} diff --git a/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.janno b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.janno new file mode 100644 index 0000000..cdac922 --- /dev/null +++ b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.janno @@ -0,0 +1,11 @@ +Poseidon_ID Genetic_Sex Group_Name Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue +I10859 F Mexico_Colonial_ n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a B2 Q1a2a1a1 bone_petrous +I10860 F Mexico_Colonial_African n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a L3e2b+152 n/a bone_petrous +I10861 M Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a B2+16278 Q1a2a1a1 bone_petrous +I10862 M Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2 Q1a2a1a bone_petrous +I10863 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2m n/a bone_petrous +I8558 M Mexico_Colonial_o n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a X2d2 I2a2a1b bone_petrous +I8559 M Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 C14 PSUAMS-4431 380 20 1446 1570 1624 n/a R n/a bone_petrous +I8555 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 C14 PSUAMS-4430 335 20 1481 1615 1639 n/a A2+(64) n/a bone_petrous +I8556 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2r n/a bone_petrous +I8557 F Mexico_Colonial n/a n/a n/a n/a n/a n/a Mexico MX Campeche Campeche 20.24 -90.46 contextual n/a n/a n/a 1540 1610 1680 n/a A2+(64) n/a bone_petrous diff --git a/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.ssf b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.ssf new file mode 100644 index 0000000..573637b --- /dev/null +++ b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.ssf @@ -0,0 +1,21 @@ +poseidon_IDs udg library_built notes run_accession sample_accession study_accession sample_alias secondary_sample_accession first_public last_updated instrument_model library_layout library_source instrument_platform library_name library_strategy fastq_ftp fastq_aspera fastq_bytes fastq_md5 read_count submitted_ftp submitted_md5 +I10859 half ds n/a ERR10024820 SAMEA110460290 PRJEB50901 I10859_auto ERS12558320 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558320 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/020/ERR10024820/ERR10024820.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/020/ERR10024820/ERR10024820.fastq.gz 299547010 b52e33953172c9cc125c0004717e36cc 10500049 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024820/I10859_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024820/I10859_auto.bam.bai 6d8218d59d2b359a9eda9056fb2cd070;9033be2c8f161b8b8dff468bd2d7a788 +I10860 half ds n/a ERR10024822 SAMEA110460292 PRJEB50901 I10860_auto ERS12558322 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558322 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/022/ERR10024822/ERR10024822.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/022/ERR10024822/ERR10024822.fastq.gz 173605539 ecd6e523cc8879911603c6b8233a5e8d 5845153 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024822/I10860_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024822/I10860_auto.bam.bai 2f9e63dded86c72c9f0d058788deda81;e1252775805908d320e7130d8d9116d7 +I10860 half ds n/a ERR10024823 SAMEA110460293 PRJEB50901 I10860_mt ERS12558323 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558323 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/023/ERR10024823/ERR10024823.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/023/ERR10024823/ERR10024823.fastq.gz 1710042 3a5c1f02190b55f7335c1396899b3dd8 97318 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024823/I10860_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024823/I10860_mt.bam.bai e294ec10218d8a714df665a17a912c03;8b08fe6051a428d1d63b7c669c0bfa3b +I10861 half ds n/a ERR10024824 SAMEA110460294 PRJEB50901 I10861_auto ERS12558324 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558324 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/024/ERR10024824/ERR10024824.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/024/ERR10024824/ERR10024824.fastq.gz 202652666 96a037f98e7b9be878aef98b9ddbe9b9 6608872 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024824/I10861_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024824/I10861_auto.bam.bai efb6758fade0da4bef5effd1393ae132;4455ebc52988f33eb380bfe8095f1b58 +I10861 half ds n/a ERR10024825 SAMEA110460295 PRJEB50901 I10861_mt ERS12558325 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558325 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/025/ERR10024825/ERR10024825.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/025/ERR10024825/ERR10024825.fastq.gz 1591243 f0cf33874043b27ad23f53606b134d74 88092 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024825/I10861_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024825/I10861_mt.bam.bai 35de16aa41f23efb3a973bd7b5d8f8f1;2ea76d6f28015939372f21465d8f64fb +I10862 half ds n/a ERR10024826 SAMEA110460296 PRJEB50901 I10862_auto ERS12558326 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558326 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/026/ERR10024826/ERR10024826.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/026/ERR10024826/ERR10024826.fastq.gz 172569326 ed36dbd332430655c97450986befd877 5911868 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024826/I10862_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024826/I10862_auto.bam.bai c7ea6a786dcc8fa7660af56108f11a76;e8314f9614fe7a0cce4748228dd76535 +I10863 half ds n/a ERR10024828 SAMEA110460298 PRJEB50901 I10863_auto ERS12558328 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558328 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/028/ERR10024828/ERR10024828.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/028/ERR10024828/ERR10024828.fastq.gz 399969993 3db35b7d6cc684c2811ac21b87d886ba 13805187 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024828/I10863_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024828/I10863_auto.bam.bai 31ee33f822086e23d6ac1a3935cabaf2;afab3f2556b6992edcb2c0afef8fd8b8 +I8558 half ds n/a ERR10024836 SAMEA110460306 PRJEB50901 I8558_auto ERS12558336 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558336 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024836/I8558_auto.bam dbc563d88e769f01b640fb88fceac28b +I8558 half ds n/a ERR10024837 SAMEA110460307 PRJEB50901 I8558_mt ERS12558337 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558337 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/037/ERR10024837/ERR10024837.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/037/ERR10024837/ERR10024837.fastq.gz 443549 6ebac4d22bae77c80d4e807212872e61 20021 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024837/I8558_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024837/I8558_mt.bam.bai 42691afc59127a2bb097faf4fec0a442;186e3ef209e7585078971ee770ace5f8 +I8559 half ds n/a ERR10024839 SAMEA110460309 PRJEB50901 I8559_mt ERS12558339 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558339 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/039/ERR10024839/ERR10024839.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/039/ERR10024839/ERR10024839.fastq.gz 395981 a71e771484127cedae344949313f6d8b 17877 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024839/I8559_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024839/I8559_mt.bam.bai ff622b7c39714f925802522f3d6ca72b;39fd548c8780a30e1f93cb6e783791ae +I10859 half ds n/a ERR10024821 SAMEA110460291 PRJEB50901 I10859_mt ERS12558321 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558321 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/021/ERR10024821/ERR10024821.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/021/ERR10024821/ERR10024821.fastq.gz 4784965 f29196418c329647e4400087740b0b60 263041 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024821/I10859_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024821/I10859_mt.bam.bai 361b1cb823c9c9cf0831e142520a6439;930782ca7105775f37811ff3b477321b +I10862 half ds n/a ERR10024827 SAMEA110460297 PRJEB50901 I10862_mt ERS12558327 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558327 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/027/ERR10024827/ERR10024827.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/027/ERR10024827/ERR10024827.fastq.gz 1378040 f88ecdca84065186eea8c26487f89abb 78650 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024827/I10862_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024827/I10862_mt.bam.bai ff36e70b76c92effc97943e88d74abf3;d1b5172cbc0e2f2d4c59adfa0ef54393 +I10863 half ds n/a ERR10024829 SAMEA110460299 PRJEB50901 I10863_mt ERS12558329 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558329 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/029/ERR10024829/ERR10024829.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/029/ERR10024829/ERR10024829.fastq.gz 4258100 79a3b2e29a82da51060777b5bc9372ac 226286 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024829/I10863_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024829/I10863_mt.bam.bai 8298a549f5af819c38167a9074487772;9351a683caf3995e424caeb793d935b1 +I8555 half ds n/a ERR10024830 SAMEA110460300 PRJEB50901 I8555_auto ERS12558330 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558330 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024830/I8555_auto.bam 9a050bc3bf289482878d8d2a050b1c40 +I8556 half ds n/a ERR10024832 SAMEA110460302 PRJEB50901 I8556_auto ERS12558332 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558332 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/032/ERR10024832/ERR10024832.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/032/ERR10024832/ERR10024832.fastq.gz 10069607 fc5eaef693aa8781b72a74838801f2d9 373844 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024832/I8556_auto.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024832/I8556_auto.bam.bai 4b070f9fd03730cf6f5a698658dba143;65517a967ff15291d5937c827c25286c +I8555 half ds n/a ERR10024831 SAMEA110460301 PRJEB50901 I8555_mt ERS12558331 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558331 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/031/ERR10024831/ERR10024831.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/031/ERR10024831/ERR10024831.fastq.gz 261250 67e3316bc126ae6c288e21221e086b69 11338 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024831/I8555_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024831/I8555_mt.bam.bai c4929a7c4a9499e395673b79f47a3df4;8fe761372b7d4c33601665636b38054f +I8556 half ds n/a ERR10024833 SAMEA110460303 PRJEB50901 I8556_mt ERS12558333 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558333 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/033/ERR10024833/ERR10024833.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/033/ERR10024833/ERR10024833.fastq.gz 111468 3bf94c8ac5dbf5cedbcb72b00eb44c3f 5006 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024833/I8556_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024833/I8556_mt.bam.bai 0db8191b86f143f72cb6c41ebc059236;dc3ac28aefd3964eb04058a83d6d4dc8 +I8557 half ds n/a ERR10024834 SAMEA110460304 PRJEB50901 I8557_auto ERS12558334 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558334 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024834/I8557_auto.bam c960aba6eed82c7da8ea69445d7d88d9 +I8557 half ds n/a ERR10024835 SAMEA110460305 PRJEB50901 I8557_mt ERS12558335 2022-08-07 2022-08-08 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558335 OTHER ftp.sra.ebi.ac.uk/vol1/fastq/ERR100/035/ERR10024835/ERR10024835.fastq.gz fasp.sra.ebi.ac.uk:/vol1/fastq/ERR100/035/ERR10024835/ERR10024835.fastq.gz 315440 b9be6056bf59344ed4a03c96f490fc7e 14220 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024835/I8557_mt.bam;ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024835/I8557_mt.bam.bai de7d94f822891b6fc5f146e653e78c62;dba1653f35a2e9d526373a5228e1a7a9 +I8559 half ds n/a ERR10024838 SAMEA110460308 PRJEB50901 I8559_auto ERS12558338 2022-08-07 2022-08-07 Illumina HiSeq X SINGLE GENOMIC ILLUMINA ERS12558338 OTHER 0 ftp.sra.ebi.ac.uk/vol1/run/ERR100/ERR10024838/I8559_auto.bam 3ce524a5460c0c903fbf3892c4dfcf28 diff --git a/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.tsv b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.tsv new file mode 100644 index 0000000..c40d911 --- /dev/null +++ b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.tsv @@ -0,0 +1,21 @@ +Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file BAM_target +I10859 I10859_ERS12558320 1 4 SE Homo sapiens (modern human) double half /I10859_ERS12558320_L1_R1.fastq.gz NA NA ERR10024820.fastq.gz NA NA +I10860 I10860_ERS12558322 1 4 SE Homo sapiens (modern human) double half /I10860_ERS12558322_L1_R1.fastq.gz NA NA ERR10024822.fastq.gz NA NA +I10860 I10860_ERS12558323 1 4 SE Homo sapiens (modern human) double half /I10860_ERS12558323_L1_R1.fastq.gz NA NA ERR10024823.fastq.gz NA NA +I10861 I10861_ERS12558324 1 4 SE Homo sapiens (modern human) double half /I10861_ERS12558324_L1_R1.fastq.gz NA NA ERR10024824.fastq.gz NA NA +I10861 I10861_ERS12558325 1 4 SE Homo sapiens (modern human) double half /I10861_ERS12558325_L1_R1.fastq.gz NA NA ERR10024825.fastq.gz NA NA +I10862 I10862_ERS12558326 1 4 SE Homo sapiens (modern human) double half /I10862_ERS12558326_L1_R1.fastq.gz NA NA ERR10024826.fastq.gz NA NA +I10863 I10863_ERS12558328 1 4 SE Homo sapiens (modern human) double half /I10863_ERS12558328_L1_R1.fastq.gz NA NA ERR10024828.fastq.gz NA NA +I8558 I8558_ERS12558336 1 4 SE Homo sapiens (modern human) double half NA NA /I8558_ERS12558336_L1.bam NA NA I8558_auto.bam +I8558 I8558_ERS12558337 1 4 SE Homo sapiens (modern human) double half /I8558_ERS12558337_L1_R1.fastq.gz NA NA ERR10024837.fastq.gz NA NA +I8559 I8559_ERS12558339 1 4 SE Homo sapiens (modern human) double half /I8559_ERS12558339_L1_R1.fastq.gz NA NA ERR10024839.fastq.gz NA NA +I10859 I10859_ERS12558321 1 4 SE Homo sapiens (modern human) double half /I10859_ERS12558321_L1_R1.fastq.gz NA NA ERR10024821.fastq.gz NA NA +I10862 I10862_ERS12558327 1 4 SE Homo sapiens (modern human) double half /I10862_ERS12558327_L1_R1.fastq.gz NA NA ERR10024827.fastq.gz NA NA +I10863 I10863_ERS12558329 1 4 SE Homo sapiens (modern human) double half /I10863_ERS12558329_L1_R1.fastq.gz NA NA ERR10024829.fastq.gz NA NA +I8555 I8555_ERS12558330 1 4 SE Homo sapiens (modern human) double half NA NA /I8555_ERS12558330_L1.bam NA NA I8555_auto.bam +I8556 I8556_ERS12558332 1 4 SE Homo sapiens (modern human) double half /I8556_ERS12558332_L1_R1.fastq.gz NA NA ERR10024832.fastq.gz NA NA +I8555 I8555_ERS12558331 1 4 SE Homo sapiens (modern human) double half /I8555_ERS12558331_L1_R1.fastq.gz NA NA ERR10024831.fastq.gz NA NA +I8556 I8556_ERS12558333 1 4 SE Homo sapiens (modern human) double half /I8556_ERS12558333_L1_R1.fastq.gz NA NA ERR10024833.fastq.gz NA NA +I8557 I8557_ERS12558334 1 4 SE Homo sapiens (modern human) double half NA NA /I8557_ERS12558334_L1.bam NA NA I8557_auto.bam +I8557 I8557_ERS12558335 1 4 SE Homo sapiens (modern human) double half /I8557_ERS12558335_L1_R1.fastq.gz NA NA ERR10024835.fastq.gz NA NA +I8559 I8559_ERS12558338 1 4 SE Homo sapiens (modern human) double half NA NA /I8559_ERS12558338_L1.bam NA NA I8559_auto.bam diff --git a/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.tsv_patch.sh b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.tsv_patch.sh new file mode 100755 index 0000000..a73f9af --- /dev/null +++ b/packages/2022_Tiesler_colonialCampeche/2022_Tiesler_colonialCampeche.tsv_patch.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -uo pipefail ## Pipefail, complain on new unassigned variables. + +## Track the version of the TSV_patch template used +VERSION='0.2.1dev' + +## This script is applied to the eager input TSV file locally to edit the dummy +## path to the fastQ files added by `create_eager_input.sh` to a real local +## path provided as a positional argument. Any further local tweaks to the +## TSV before running eager should be added below that in the form of bash +## commands to aid in reproducibility. + +## usage tsv_patch.sh + +local_data_dir="$(readlink -f ${1})" +input_tsv="$(readlink -f ${2})" +output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv" +columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM") +source $(readlink -f ${3}) ## Path to helper function script should be provided as 3rd argument. https://github.com/poseidon-framework/poseidon-eager/blob/main/scripts/source_me.sh + +## Index non-proliferated columns and exclude them from the finalised TSV +cut_selector='' +tsv_header=($(head -n1 ${input_tsv})) +for col_name in ${columns_to_keep[@]}; do + let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing + if [[ ! ${idx} -eq -1 ]]; then + cut_selector+="${idx}," + fi +done + +## Remove added columns, and put columns in right order +cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv} +sed -i -e "s||${local_data_dir}|g" ${output_tsv} + +## Any further commands to edit the file before finalisation should be added below as shown +# sed -ie 's/replace_this/with_this/g' ${output_tsv} + +## Keep track of versions +version_file="$(dirname ${input_tsv})/script_versions.txt" +## Remove versions from older run if there +grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new +## Then add new versions +echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new +echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new +mv ${version_file}.new ${version_file} diff --git a/packages/2022_Tiesler_colonialCampeche/script_versions.txt b/packages/2022_Tiesler_colonialCampeche/script_versions.txt new file mode 100644 index 0000000..13ace45 --- /dev/null +++ b/packages/2022_Tiesler_colonialCampeche/script_versions.txt @@ -0,0 +1,2 @@ +create_eager_input.sh: 0.5.1 +source_me.sh for initial TSV: 0.5.2