add subsampling, update graphviw/rulegraph

sequana · May 12, 2023 · 9dfaaf8 · 9dfaaf8
1 parent 3d33d90
commit 9dfaaf8
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 7 deletions.
diff --git a/README.rst b/README.rst
@@ -147,6 +147,8 @@ Changelog
 ========= ====================================================================
 Version   Description
 ========= ====================================================================
+1.1.0     * add subsample option and set to 1,000,000 reads to handle large 
+            runs such as promethion
 1.0.1     * CSV can now handle sample or samplename column name in samplesheet.
           * Fix the pyco file paths, update requirements and doc
 1.0.0     Stable release ready for production

diff --git a/sequana_pipelines/nanomerge/config.yaml b/sequana_pipelines/nanomerge/config.yaml
@@ -25,11 +25,20 @@ samplesheet: "samplesheet.csv"
 # by uncommenting and filling the field extra_prefixes_to_strip.
 #
 # For instance, if you have files called prefix.mess.A.fastq.gz and prefix.B.fastq.gz
-# 'prefix.' will be removed automatically because it is common, but not 'mess'. 
+# 'prefix.' will be removed automatically because it is common, but not 'mess'.
 # use thoses prefixes in the left to right order ['prefix', 'mess'] or ['prefix.mess']
 #
 # extra_prefixes_to_strip: []
 
 apptainers:
-  pycoqc: https://zenodo.org/record/7746269/files/pycoqc_2.5.2.img
+  pycoqc: "https://zenodo.org/record/7746269/files/pycoqc_2.5.2.img"
+  graphviz: "https://zenodo.org/record/7928262/files/graphviz_7.0.5.img"
+
+
+###################################################################################
+#
+# subsample: If not None, N number of reads will be randomly selected instead of
+#     the entire dataset
+pycoqc:
+  subsample: 1000000
 
diff --git a/sequana_pipelines/nanomerge/nanomerge.rules b/sequana_pipelines/nanomerge/nanomerge.rules
@@ -105,11 +105,13 @@ if config["summary"]:
             "pyco/pyco.html"
         log:
             "pyco/pyco.log"
+        params:
+            sample=config["pycoqc"]["subsample"]
         container:
             config["apptainers"]["pycoqc"]
         shell:
             """
-            pycoQC --summary_file {input} -o {output} > {log} 2>&1
+            pycoQC --summary_file {input} -o {output} --sample {params.sample} > {log} 2>&1
             """
 
 
@@ -125,15 +127,27 @@ rule merge:
 
 
 rule rulegraph:
-    input: str(manager.snakefile)
+    input:
+        workflow.snakefile
     output:
-        svg = ".sequana/rulegraph.svg"
+        "rulegraph/rulegraph.dot"
     params:
         configname = "config.yaml"
     wrapper:
         f"{sequana_wrapper_branch}/wrappers/rulegraph"
 
 
+rule dot2svg:
+    input:
+        "rulegraph/rulegraph.dot"
+    output:
+        ".sequana/rulegraph.svg"
+    container:
+        config['apptainers']['graphviz']
+    shell:
+        """dot -Tsvg {input} -o {output}"""
+
+
 rule html_report:
     input:
         list(expected_fastqs) + qc_file

diff --git a/sequana_pipelines/nanomerge/schema.yaml b/sequana_pipelines/nanomerge/schema.yaml
@@ -22,3 +22,13 @@ mapping:
     "apptainers":
         type: any
         required: true
+
+    "pycoqc":
+        type: map
+        mapping:
+          "subsample":
+            type: int
+            required: False
+
+
+
diff --git a/setup.py b/setup.py
@@ -10,8 +10,8 @@
 
 
 _MAJOR               = 1
-_MINOR               = 0
-_MICRO               = 1
+_MINOR               = 1
+_MICRO               = 0
 version = f"{_MAJOR}.{_MINOR}.{_MICRO}"
 release = f"{_MAJOR}.{_MINOR}"