From 317d796171c07bcd111d288836ca1f2dae453308 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Fri, 3 Jun 2022 10:23:18 +1200 Subject: [PATCH] Handle cases where the WDL workflow output contains exprs --- wdl2cwl/main.py | 9 +- wdl2cwl/tests/cwl_files/ATAC.cwl | 8 +- .../tests/cwl_files/BuildCembaReferences.cwl | 12 +- wdl2cwl/tests/cwl_files/align_and_count.cwl | 6 +- .../align_and_count_multiple_report.cwl | 6 +- wdl2cwl/tests/cwl_files/blast.cwl | 217 ++++++++++++ wdl2cwl/tests/cwl_files/gwas.cwl | 4 +- wdl2cwl/tests/cwl_files/merge_svs.cwl | 10 +- wdl2cwl/tests/cwl_files/workflow_inputs.cwl | 8 +- wdl2cwl/tests/test_cwl.py | 1 + wdl2cwl/tests/wdl_files/blast.wdl | 325 ++++++++++++++++++ 11 files changed, 576 insertions(+), 30 deletions(-) create mode 100644 wdl2cwl/tests/cwl_files/blast.cwl create mode 100644 wdl2cwl/tests/wdl_files/blast.wdl diff --git a/wdl2cwl/main.py b/wdl2cwl/main.py index 290fa744..990c59d0 100644 --- a/wdl2cwl/main.py +++ b/wdl2cwl/main.py @@ -722,7 +722,10 @@ def get_workflow_outputs( with WDLSourceLine(item.info, ConversionException): output_name = item.name item_expr = item.info.expr - output_source = item_expr.expr.name[::-1].replace(".", "/", 1)[::-1] + if isinstance(item_expr, WDL.Expr.Base): + output_source = self.get_expr(item_expr) + else: + output_source = item_expr.expr.name[::-1].replace(".", "/", 1)[::-1] # replace just the last occurrence of a period with a slash # by first reversing the string and the replace the first occurrence # then reversing the result @@ -1019,14 +1022,14 @@ def get_expr_get(self, wdl_get_expr: WDL.Expr.Get) -> str: def get_expr_ident(self, wdl_ident_expr: WDL.Expr.Ident) -> str: """Translate WDL Ident Expressions.""" - id_name = wdl_ident_expr.name + id_name: str = wdl_ident_expr.name referee = wdl_ident_expr.referee optional = wdl_ident_expr.type.optional if referee: with WDLSourceLine(referee, ConversionException): if isinstance(referee, WDL.Tree.Call): return id_name - if referee.expr and ( + if hasattr(referee, 'expr') and referee.expr is not None and ( wdl_ident_expr.name in self.optional_cwl_null or wdl_ident_expr.name not in self.non_static_values ): diff --git a/wdl2cwl/tests/cwl_files/ATAC.cwl b/wdl2cwl/tests/cwl_files/ATAC.cwl index aa36ec2c..e23faacb 100644 --- a/wdl2cwl/tests/cwl_files/ATAC.cwl +++ b/wdl2cwl/tests/cwl_files/ATAC.cwl @@ -1106,14 +1106,14 @@ steps: - script.bash outputs: - id: ATAC.bam_chrM_reads_compliant_output - outputSource: MakeCompliantChrMBAM/compliant_bam_output + outputSource: MakeCompliantChrMBAM.compliant_bam_output type: File - id: ATAC.bam_filtered_and_sorted_compliant_output - outputSource: MakeCompliantFilteredAndSortedBAM/compliant_bam_output + outputSource: MakeCompliantFilteredAndSortedBAM.compliant_bam_output type: File - id: ATAC.snap_qc_output - outputSource: SnapPre/snap_qc_output + outputSource: SnapPre.snap_qc_output type: File - id: ATAC.snap_output - outputSource: SnapCellByBin/snap_output + outputSource: SnapCellByBin.snap_output type: File diff --git a/wdl2cwl/tests/cwl_files/BuildCembaReferences.cwl b/wdl2cwl/tests/cwl_files/BuildCembaReferences.cwl index 98e36baf..97819ec7 100644 --- a/wdl2cwl/tests/cwl_files/BuildCembaReferences.cwl +++ b/wdl2cwl/tests/cwl_files/BuildCembaReferences.cwl @@ -347,24 +347,24 @@ steps: - script.bash outputs: - id: BuildCembaReferences.reference_fasta_dict - outputSource: CreateReferenceDictionary/ref_dict_output + outputSource: CreateReferenceDictionary.ref_dict_output type: File - id: BuildCembaReferences.reference_fasta_index - outputSource: CreateReferenceFastaIndex/ref_index_output + outputSource: CreateReferenceFastaIndex.ref_index_output type: File - id: BuildCembaReferences.fwd_converted_reference_fasta - outputSource: Convert/fwd_converted_reference_fasta_output + outputSource: Convert.fwd_converted_reference_fasta_output type: File - id: BuildCembaReferences.rev_converted_reference_fasta - outputSource: Convert/rev_converted_reference_fasta_output + outputSource: Convert.rev_converted_reference_fasta_output type: File - id: BuildCembaReferences.fwd_bowtie2_index_files - outputSource: IndexForward/bowtie2_index_files + outputSource: IndexForward.bowtie2_index_files type: items: File type: array - id: BuildCembaReferences.rev_bowtie2_index_files - outputSource: IndexReverse/bowtie2_index_files + outputSource: IndexReverse.bowtie2_index_files type: items: File type: array diff --git a/wdl2cwl/tests/cwl_files/align_and_count.cwl b/wdl2cwl/tests/cwl_files/align_and_count.cwl index 913bffe3..d68dbab9 100644 --- a/wdl2cwl/tests/cwl_files/align_and_count.cwl +++ b/wdl2cwl/tests/cwl_files/align_and_count.cwl @@ -133,11 +133,11 @@ steps: - script.bash outputs: - id: align_and_count_report.report - outputSource: align_and_count/report + outputSource: align_and_count.report type: File - id: align_and_count_report.report_top_hits - outputSource: align_and_count/report_top_hits + outputSource: align_and_count.report_top_hits type: File - id: align_and_count_report.viral_core_version - outputSource: align_and_count/viralngs_version + outputSource: align_and_count.viralngs_version type: string diff --git a/wdl2cwl/tests/cwl_files/align_and_count_multiple_report.cwl b/wdl2cwl/tests/cwl_files/align_and_count_multiple_report.cwl index 980b708f..9a238165 100644 --- a/wdl2cwl/tests/cwl_files/align_and_count_multiple_report.cwl +++ b/wdl2cwl/tests/cwl_files/align_and_count_multiple_report.cwl @@ -274,11 +274,11 @@ steps: contain at least one item.";} else { return "";}} outputs: - id: align_and_count_multiple_report.report - outputSource: align_and_count_summary/count_summary + outputSource: align_and_count_summary.count_summary type: File - id: align_and_count_multiple_report.report_top_hits - outputSource: align_and_count_summary_top_hits/count_summary + outputSource: align_and_count_summary_top_hits.count_summary type: File - id: align_and_count_multiple_report.viral_core_version - outputSource: align_and_count_summary/viralngs_version + outputSource: align_and_count_summary.viralngs_version type: string diff --git a/wdl2cwl/tests/cwl_files/blast.cwl b/wdl2cwl/tests/cwl_files/blast.cwl new file mode 100644 index 00000000..a6848143 --- /dev/null +++ b/wdl2cwl/tests/cwl_files/blast.cwl @@ -0,0 +1,217 @@ +cwlVersion: v1.2 +id: blast +class: Workflow +requirements: + - class: InlineJavascriptRequirement +inputs: + - id: blast_docker_override + type: + - string + - 'null' + - id: blast_docker + type: + - string + - 'null' + - id: queryfa + type: File + - id: fname + default: /sfs/blastdb/2019_ncov/nucl/v6/ncov + type: string + - id: method + default: blastn + type: string + - id: outfmt + default: 7 + type: int + - id: evalue + default: 10.0 + type: float + - id: Outfile + type: + - string + - 'null' + - id: threads + default: 8 + type: int + - id: runblastp.max_target_seqs + default: 100 + type: int + - id: runblastp.word_size + default: 6 + type: int + - id: runblastp.seg + default: no + type: string + - id: runblastp.comp_based_stats + default: '2' + type: string + - id: runblastp.matrix + default: BLOSUM62 + type: string + - id: runblastp.gapopen + default: 11 + type: int + - id: runblastp.gapextend + default: 1 + type: int + - id: runblastp.max_hsps + type: + - int + - 'null' + - id: runblastp.taxids + type: + - string + - 'null' + - id: runblastp.negative_taxids + type: + - string + - 'null' + - id: runblastp.lcase_masking + default: false + type: boolean + - id: runblastn.max_target_seqs + default: 100 + type: int + - id: runblastn.word_size + default: 28 + type: int + - id: runblastn.reward + default: 1 + type: int + - id: runblastn.penalty + default: -2 + type: int + - id: runblastn.strand + default: both + type: string + - id: runblastn.gapopen + default: 0 + type: int + - id: runblastn.gapextend + default: 0 + type: int + - id: runblastn.dust + default: "'20 64 1'" + type: string + - id: runblastn.max_hsps + type: + - int + - 'null' + - id: runblastn.tasks + default: megablast + type: string + - id: runblastn.taxids + type: + - string + - 'null' + - id: runblastn.negative_taxids + type: + - string + - 'null' + - id: runblastn.lcase_masking + default: false + type: boolean + - id: runblastx.max_target_seqs + default: 100 + type: int + - id: runblastx.word_size + default: 6 + type: int + - id: runblastx.seg + default: "'12 2.2 2.5'" + type: string + - id: runblastx.comp_based_stats + default: '2' + type: string + - id: runblastx.matrix + default: BLOSUM62 + type: string + - id: runblastx.gapopen + default: 11 + type: int + - id: runblastx.gapextend + default: 1 + type: int + - id: runblastx.taxids + type: + - string + - 'null' + - id: runblastx.negative_taxids + type: + - string + - 'null' + - id: runblastx.max_hsps + type: + - int + - 'null' + - id: runblastx.lcase_masking + default: false + type: boolean + - id: runtblastn.max_target_seqs + default: 100 + type: int + - id: runtblastn.word_size + default: 6 + type: int + - id: runtblastn.seg + default: "'12 2.2 2.5'" + type: string + - id: runtblastn.comp_based_stats + default: '2' + type: string + - id: runtblastn.matrix + default: BLOSUM62 + type: string + - id: runtblastn.gapopen + default: 11 + type: int + - id: runtblastn.gapextend + default: 1 + type: int + - id: runtblastn.lcase_masking + default: false + type: boolean + - id: runtblastn.max_hsps + type: + - int + - 'null' + - id: runtblastn.taxids + type: + - string + - 'null' + - id: runtblastn.negative_taxids + type: + - string + - 'null' + - id: runtblastx.taxids + type: + - string + - 'null' + - id: runtblastx.word_size + default: 3 + type: int + - id: runtblastx.max_target_seqs + default: 100 + type: int + - id: runtblastx.seg + default: "'12 2.2 2.5'" + type: string + - id: runtblastx.matrix + default: BLOSUM62 + type: string + - id: runtblastx.lcase_masking + default: false + type: boolean + - id: runtblastx.negative_taxids + type: + - string + - 'null' + - id: runtblastx.max_hsps + type: + - int + - 'null' +steps: [] +outputs: + - id: blast.fina_output + outputSource: '[inputs["runtblastx.out"] === null ' + type: File diff --git a/wdl2cwl/tests/cwl_files/gwas.cwl b/wdl2cwl/tests/cwl_files/gwas.cwl index 69061dc2..eb4515d1 100644 --- a/wdl2cwl/tests/cwl_files/gwas.cwl +++ b/wdl2cwl/tests/cwl_files/gwas.cwl @@ -188,8 +188,8 @@ steps: - script.bash outputs: - id: gwas.logistic - outputSource: run_gwas/logistic + outputSource: run_gwas.logistic type: File - id: gwas.manhattan_plot - outputSource: create_plot/manhattan_plot + outputSource: create_plot.manhattan_plot type: File diff --git a/wdl2cwl/tests/cwl_files/merge_svs.cwl b/wdl2cwl/tests/cwl_files/merge_svs.cwl index 65f13e88..2253c127 100644 --- a/wdl2cwl/tests/cwl_files/merge_svs.cwl +++ b/wdl2cwl/tests/cwl_files/merge_svs.cwl @@ -574,17 +574,17 @@ steps: - script.bash outputs: - id: mergeSvs.bcftools_merged_sv_vcf - outputSource: filterBlocklistBcftools/filtered_sv_vcf + outputSource: filterBlocklistBcftools.filtered_sv_vcf type: File - id: mergeSvs.bcftools_merged_annotated_tsv - outputSource: bcftoolsAnnotateVariants/sv_variants_tsv + outputSource: bcftoolsAnnotateVariants.sv_variants_tsv type: File - id: mergeSvs.bcftools_merged_filtered_annotated_tsv - outputSource: bcftoolsAnnotsvFilter/filtered_tsv + outputSource: bcftoolsAnnotsvFilter.filtered_tsv type: File - id: mergeSvs.survivor_merged_sv_vcf - outputSource: filterBlocklistSurvivor/filtered_sv_vcf + outputSource: filterBlocklistSurvivor.filtered_sv_vcf type: File - id: mergeSvs.survivor_merged_annotated_tsv - outputSource: survivorAnnotateVariants/sv_variants_tsv + outputSource: survivorAnnotateVariants.sv_variants_tsv type: File diff --git a/wdl2cwl/tests/cwl_files/workflow_inputs.cwl b/wdl2cwl/tests/cwl_files/workflow_inputs.cwl index e50ea2c3..ff782b14 100644 --- a/wdl2cwl/tests/cwl_files/workflow_inputs.cwl +++ b/wdl2cwl/tests/cwl_files/workflow_inputs.cwl @@ -79,14 +79,14 @@ steps: stdout: _stdout outputs: - id: foo.first_result - outputSource: first + outputSource: inputs.first type: string - id: foo.third_result - outputSource: third + outputSource: inputs.third type: float - id: foo.echo_out - outputSource: echo/out + outputSource: echo.out type: string - id: foo.echo_result - outputSource: echo/result + outputSource: echo.result type: string diff --git a/wdl2cwl/tests/test_cwl.py b/wdl2cwl/tests/test_cwl.py index 1e2b981b..742c18ea 100644 --- a/wdl2cwl/tests/test_cwl.py +++ b/wdl2cwl/tests/test_cwl.py @@ -68,6 +68,7 @@ def test_meta(caplog: pytest.LogCaptureFixture) -> None: ("vt.wdl"), ("whatshap.wdl"), ("workflow_inputs.wdl"), + ("blast.wdl"), ], ) def test_wdls(description_name: str) -> None: diff --git a/wdl2cwl/tests/wdl_files/blast.wdl b/wdl2cwl/tests/wdl_files/blast.wdl new file mode 100644 index 00000000..450c1661 --- /dev/null +++ b/wdl2cwl/tests/wdl_files/blast.wdl @@ -0,0 +1,325 @@ +version 1.0 + +workflow blast { + input { + String? blast_docker_override + String blast_docker= select_first([blast_docker_override,"swr.cn-south-1.myhuaweicloud.com/cngbdb/blast:1.2"]) + File queryfa + String fname = '/sfs/blastdb/2019_ncov/nucl/v6/ncov' + String method = 'blastn' + Int outfmt = 7 + Float evalue = 10 + String Outfile = basename(queryfa)+'.blast_result.txt' + Int threads = 8 + } + if (method == 'blastp') { + call runblastp{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'blastn'){ + call runblastn{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'blastx'){ + call runblastx{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'queryfa'){ + call runtblastn{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + if ( method == 'tblastx'){ + call runtblastx{ + input: + Fname = fname, + Queryfa = queryfa, + docker = blast_docker, + outfmt = outfmt, + evalue = evalue, + Outfile = Outfile, + threads = threads + } + } + output { + File fina_output =select_first([runtblastx.out,runblastp.out,runblastn.out,runblastx.out,runtblastn.out]) + } +} + +task runblastn { + input { + String docker + File Queryfa + String Fname + String Outfile + Int threads + #blast optional + Int outfmt + Int max_target_seqs = 100 + Float evalue + Int word_size = 28 + Int reward = 1 + Int penalty = -2 + String strand = 'both' + Int gapopen = 0 + Int gapextend = 0 + String dust = "'20 64 1'" + Int? max_hsps + String tasks = "megablast" + String? taxids + String? negative_taxids + Boolean lcase_masking = false + } + runtime{ + docker : docker + cpu : "8" + memory : "16G" + } + command { + set -e + blastn -db "${Fname}" \ + -show_gis \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -penalty ${penalty} \ + -reward ${reward} \ + -dust ${dust} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -task ${tasks} \ + -strand ${strand} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + + } + output { + File out = "${Outfile}" + } +} + +task runblastp { + input { + String docker + File Queryfa + String Fname + #blast optional + Int outfmt + String Outfile + Float evalue + Int threads + Int max_target_seqs = 100 + Int word_size = 6 + String seg = "no" + String comp_based_stats = "2" + String matrix = "BLOSUM62" + Int gapopen = 11 + Int gapextend = 1 + Int? max_hsps + String? taxids + String? negative_taxids + Boolean lcase_masking = false + } + runtime{ + docker : docker + cpu : "8" + memory : "16G" + } + command { + set -e + blastp -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -comp_based_stats ${comp_based_stats} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids} \ + + } + output { + File out = "${Outfile}" + } +} + +task runblastx { + input { + File Queryfa + String Fname + Int outfmt + Float evalue + String Outfile + String docker + Int threads + Int max_target_seqs = 100 + Int word_size = 6 + String seg = "'12 2.2 2.5'" + String comp_based_stats = "2" + String matrix = "BLOSUM62" + Int gapopen = 11 + Int gapextend = 1 + String? taxids + String? negative_taxids + Int? max_hsps + Boolean lcase_masking = false + } + runtime{ + docker : docker + cpu : "8" + memory : "16G" + } + command { + set -e + blastx -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -comp_based_stats ${comp_based_stats} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + + } + output { + File out = "$${Outfile}" + } +} + +task runtblastn { + input { + File Queryfa + String Fname + Int outfmt + Float evalue + String Outfile + String docker + Int threads + Int max_target_seqs = 100 + Int word_size = 6 + String seg = "'12 2.2 2.5'" + String comp_based_stats = "2" + String matrix = "BLOSUM62" + Int gapopen = 11 + Int gapextend = 1 + Boolean lcase_masking = false + Int? max_hsps + String? taxids + String? negative_taxids + } + runtime{ + docker :docker + cpu : "8" + memory : "16G" + } + command { + set -e + tblastn -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -comp_based_stats ${comp_based_stats} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -gapopen ${gapopen} \ + -gapextend ${gapextend} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + + } + output { + File out = "${Outfile}" + } +} + +task runtblastx { + input { + File Queryfa + String Fname + Int outfmt + String Outfile + Int threads + Float evalue + String docker + String? taxids + Int word_size = 3 + Int max_target_seqs = 100 + String seg = "'12 2.2 2.5'" + String matrix = "BLOSUM62" + Boolean lcase_masking = false + String? negative_taxids + Int? max_hsps + } + runtime{ + docker :docker + cpu : "8" + memory : "16G" + } + command { + set -e + tblastx -db "${Fname}" \ + -query ${Queryfa} \ + -outfmt ${outfmt} \ + -out ${Outfile} \ + -max_target_seqs ${max_target_seqs} \ + -evalue ${evalue} \ + -word_size ${word_size} \ + -matrix ${matrix} \ + -seg ${seg} \ + -num_threads ${threads} \ + ${true='-lcase_masking' false='' lcase_masking} ${"-max_hsps "+max_hsps} ${"-taxids " +taxids} ${"-negative_taxids " +negative_taxids}\ + } + output { + File out = "${Outfile}" + } +} + +