Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ process {
[
"-v FS='\\t'",
"-v OFS='\\t'",
"'{ \$4 = \$1 \":\" \$2 \"-\" \$3",
"'{ \$4 = \"circ_\" \$1 \":\" \$2 \"-\" \$3",
(params.consider_strand ? " \":\" \$6" : ""),
"; print }'",
].join(' ').trim()
Expand Down
3 changes: 2 additions & 1 deletion modules/local/annotation/bed2gtf/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::polars=1.24.0
- conda-forge::polars=1.31.0
- conda-forge::pyyaml=6.0.2
4 changes: 2 additions & 2 deletions modules/local/annotation/bed2gtf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ process ANNOTATION_BED2GTF {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/polars:1.24.0--800cd3e4ff805434' :
'community.wave.seqera.io/library/polars:1.24.0--2d2d323e8514e707' }"
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/785e4d9624ef7fb24cfee74f815d3e69540e99e0ae299cbd333e047e08706f7e/data' :
'community.wave.seqera.io/library/polars_pyyaml:e53e9c9a38a99374' }"

input:
tuple val(meta), path(bed12), path(db_intersections)
Expand Down
47 changes: 18 additions & 29 deletions modules/local/annotation/bed2gtf/templates/bed2gtf.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,23 @@
#!/usr/bin/env python

import platform
import yaml

import polars as pl

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.
# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"polars": pl.__version__,
}
}

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.
with open("versions.yml", "w") as f:
f.write(yaml.dump(versions))

Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str
# Main

exons_only = bool("${exons_only}")

Expand All @@ -31,7 +27,12 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
'readNumber', 'circType', 'gene', 'transcript',
'index', 'flankIntron'
]
df = pl.scan_csv('${bed12}', separator='\\t', has_header=False, new_columns=columns)
try:
df = pl.scan_csv('${bed12}', separator='\\t', has_header=False, new_columns=columns, raise_if_empty=True)
except pl.exceptions.NoDataError:
with open('${prefix}.${suffix}', 'w') as f:
f.write('')
exit(0)

df = df.with_columns(
attributes = pl.lit('gene_id "') + pl.col('gene') + pl.lit('"; transcript_id "') + pl.col('name') + pl.lit('";'),
Expand Down Expand Up @@ -77,15 +78,3 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
df_combined = df_combined.sort('chr', 'start', 'end')

df_combined.collect().write_csv('${prefix}.${suffix}', separator='\\t', include_header=False, quote_style="never")

# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"polars": pl.__version__,
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
4 changes: 2 additions & 2 deletions modules/local/combinebeds/counts/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::polars=1.8.2
- conda-forge::upsetplot=0.9.0
- conda-forge::polars=1.31.0
- conda-forge::pyyaml=6.0.2
4 changes: 2 additions & 2 deletions modules/local/combinebeds/counts/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ process COMBINEBEDS_COUNTS {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/pandas_polars_pyarrow_upsetplot:8840b96e156438fc' :
'community.wave.seqera.io/library/pandas_polars_pyarrow_upsetplot:6982d93f61d3e2ff' }"
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/785e4d9624ef7fb24cfee74f815d3e69540e99e0ae299cbd333e047e08706f7e/data' :
'community.wave.seqera.io/library/polars_pyyaml:e53e9c9a38a99374' }"

input:
tuple val(meta), val(aggregation), path(candidates), path(beds)
Expand Down
73 changes: 38 additions & 35 deletions modules/local/combinebeds/counts/templates/counts.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
#!/usr/bin/env python

import platform
import base64
import json
import yaml

import polars as pl

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str
# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"polars": pl.__version__
}
}

with open("versions.yml", "w") as f:
f.write(yaml.dump(versions))

# Main

max_shift = int("${max_shift}")
consider_strand = "${consider_strand}" == "true"
Expand All @@ -37,11 +31,25 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:

columns = ["chr", "start", "end", "name", "score", "strand"]

df_candidates = pl.scan_csv(candidate_path, has_header=False, separator="\\t", new_columns=columns)
try:
df_candidates = pl.scan_csv(candidate_path, has_header=False, separator="\\t", new_columns=columns, raise_if_empty=True)
except pl.exceptions.NoDataError:
print("No data in ${candidates}")
with open(f"{prefix}.{suffix}", "w") as f:
f.write('')
exit(0)

df_candidates = df_candidates.select(columns)
df_candidates = df_candidates.with_columns(sample=pl.lit("candidate"), tool=pl.lit("candidate"), score=pl.lit(None))

df = pl.scan_csv(bed_paths, has_header=False, separator="\\t", new_columns=columns + ["sample", "tool"])
try:
df = pl.scan_csv(bed_paths, has_header=False, separator="\\t", new_columns=columns + ["sample", "tool"], raise_if_empty=True)
except pl.exceptions.NoDataError:
print("No data in ${beds}")
with open(f"{prefix}.{suffix}", "w") as f:
f.write('')
exit(0)

df_combined = pl.concat([df, df_candidates])

df_combined = df_combined.sort("end" ).with_columns(end_group =pl.col("end" ).diff().fill_null(0).gt(max_shift).cum_sum())
Expand All @@ -62,7 +70,14 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
df = df.filter((pl.col("start") - pl.col("start_right")).abs() <= max_shift)
df = df.filter((pl.col("end") - pl.col("end_right")).abs() <= max_shift)
df = df.group_by(["chr", "start", "end", "strand", "start_group", "end_group", "sample", "tool"]).agg(score=pl.sum("score"))
df = df.collect().lazy()

try:
df = df.collect().lazy()
except pl.exceptions.NoDataError:
print("No data after processing")
with open(f"{prefix}.{suffix}", "w") as f:
f.write('')
exit(0)

samples = df.select("sample").group_by("sample").len().collect()["sample"].to_list()
df = df.collect().pivot(on="sample", values="score", index=["chr", "start", "end", "strand", "start_group", "end_group"], aggregate_function=aggregation).lazy()
Expand All @@ -73,15 +88,3 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
df = df.fill_null(0)

df.sink_csv(f"{prefix}.{suffix}", separator="\\t", include_header=True)

# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"polars": pl.__version__
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
5 changes: 3 additions & 2 deletions modules/local/combinebeds/shifts/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::polars=1.8.2
- conda-forge::altair=5.5.0
- conda-forge::vl-convert-python==1.7.0
- conda-forge::polars=1.31.0
- conda-forge::pyyaml=6.0.2
- conda-forge::vl-convert-python=1.7.0
8 changes: 4 additions & 4 deletions modules/local/combinebeds/shifts/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ process COMBINEBEDS_SHIFTS {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/altair_polars_vl-convert-python:e6f1dca28de76d13' :
'community.wave.seqera.io/library/altair_polars_vl-convert-python:a6c5ee679445250d' }"
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cb/cb4ec5e7ad6feeda9b1cd6d194043b6cc0d2952c93a28cddc98fc7d67c078141/data' :
'community.wave.seqera.io/library/altair_polars_pyyaml_vl-convert-python:c19053ed9a1a6146' }"

input:
tuple val(meta), path(beds)

output:
path "*.png" , emit: plots
path "*.json" , emit: multiqc
path "*.png" , emit: plots, optional: true
path "*.json" , emit: multiqc, optional: true
path "versions.yml", emit: versions

script:
Expand Down
58 changes: 23 additions & 35 deletions modules/local/combinebeds/shifts/templates/shifts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,36 @@
import base64
import json
from itertools import product
import yaml

import polars as pl
import altair as alt

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str
# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"polars": pl.__version__,
"altair": alt.__version__
}
}

with open("versions.yml", "w") as f:
f.write(yaml.dump(versions))

# Main

meta_id = "${meta.id}"

df = pl.scan_csv("${beds}".split(" "),
separator="\\t",
has_header=False,
new_columns=["chr", "start", "end", "name", "score", "strand", "sample", "tool"])
try:
df = pl.scan_csv("${beds}".split(" "),
separator="\\t",
has_header=False,
raise_if_empty=True,
new_columns=["chr", "start", "end", "name", "score", "strand", "sample", "tool"])
except pl.exceptions.NoDataError:
exit(0)

df = df.group_by("chr", "start", "end", "strand").agg(tools=pl.col("tool").unique(), samples=pl.col("sample").unique())

Expand Down Expand Up @@ -105,16 +106,3 @@ def get_group_sizes(df: pl.LazyFrame, max_shift: int, consider_strand: bool) ->

with open(f"{metric}.shifts_mqc.json", "w") as f:
f.write(json.dumps(multiqc, indent=4))

# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"polars": pl.__version__,
"altair": alt.__version__
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
Loading