Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ exclude: |
)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v1.2.3
rev: v6.0.0
hooks:
- id: check-added-large-files
args: ["--maxkb=10240"]
Expand All @@ -20,30 +20,30 @@ repos:
- id: check-json
# spell check
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
rev: v2.4.2
hooks:
- id: codespell
args: ["--ignore-words-list=bais"]
# Python formatting
- repo: https://github.com/psf/black
rev: 23.7.0
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 26.3.0
hooks:
- id: black
# R formatting
- repo: https://github.com/lorenzwalthert/precommit
rev: v0.1.2
rev: v0.4.3.9021
hooks:
- id: style-files
# general linting
- repo: https://github.com/pre-commit/mirrors-prettier
rev: "v3.1.0"
rev: "v4.0.0-alpha.8"
hooks:
- id: prettier
additional_dependencies:
- prettier@3.4.0
# enforce commit format
- repo: https://github.com/compilerla/conventional-pre-commit
rev: v2.3.0
rev: v4.4.0
hooks:
- id: conventional-pre-commit
stages: [commit-msg]
Expand Down
4 changes: 1 addition & 3 deletions bin/calc_effective_genome_fraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,7 @@ def test():
chr22 50818468
chr_X 156040895
chr_Y 57227415
chr_M 16569""".split(
"\n"
)
chr_M 16569""".split("\n")
effective_genome_size = 2700000000

assert calc_egf(effective_genome_size, chrom_sizes) == 0.9391299376153861
Expand Down
1 change: 1 addition & 0 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
adapted from: https://github.com/nf-core/chipseq/blob/51eba00b32885c4d0bec60db3cb0a45eb61e34c5/bin/check_samplesheet.py
"""

import collections
import os
import errno
Expand Down
1 change: 1 addition & 0 deletions bin/compute_scaling_factors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Example:
python compute_scaling_factors.py "id1,id2,id3" "100,200,300" scaling_factors.tsv
"""

import sys


Expand Down
1 change: 1 addition & 0 deletions bin/consensus_corces.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

https://github.com/CCBR/CHAMPAGNE/issues/159
"""

import sys


Expand Down
3 changes: 2 additions & 1 deletion bin/createtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
structure is then converted into a pandas dataframe.
Example Usage:
--------------
cat sample1.qcmetrics sample2.qcmetrics sampleNth.qcmetrics | ./createQCTable > ChIPseq_QC_Table.txt
cat sample1.qcmetrics sample2.qcmetrics sampleNth.qcmetrics | ./createQCTable > ChIPseq_QC_Table.txt
Python version: 3+
"""

import pandas as pd
import sys

Expand Down
30 changes: 15 additions & 15 deletions bin/filterMetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,29 @@

Example Usage:
--------------
1.) Find total number of reads
1.) Find total number of reads
# grep 'in total' H3k4me3_gran_1.sorted.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 tnreads

2.) Find total number of mapped reads
# grep 'mapped (' H3k4me3_gran_1.sorted.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 mnreads
2.) Find total number of mapped reads
# grep 'mapped (' H3k4me3_gran_1.sorted.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 mnreads

3.) Find total number of uniquely mapped reads
# grep 'mapped (' H3k4me3_gran_1.sorted.Q5DD.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 unreads
3.) Find total number of uniquely mapped reads
# grep 'mapped (' H3k4me3_gran_1.sorted.Q5DD.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 unreads

4.) Find NRF, PCB1, PCB2
# cat H3k4me3_gran_1.nrf | ./filterMetrics H3k4me3_gran_1 nrf
4.) Find NRF, PCB1, PCB2
# cat H3k4me3_gran_1.nrf | ./filterMetrics H3k4me3_gran_1 nrf

7.) Find FRiP (in the second half of ChIP-seq pipeline)
# TO-DO
7.) Find FRiP (in the second half of ChIP-seq pipeline)
# TO-DO

8.) Find NGSQC statistics (detla RCIs)
# grep '<' NGSQC_report.txt | awk '{print $(NF)}' | xargs | ./filterMetrics H3k4me3_gran_1 ngsqc
8.) Find NGSQC statistics (detla RCIs)
# grep '<' NGSQC_report.txt | awk '{print $(NF)}' | xargs | ./filterMetrics H3k4me3_gran_1 ngsqc

9.) Find NSC, RSC, Qtag
# awk '{print $(NF-2),$(NF-1),$(NF)}' H3k4me3_gran_1.sorted.Q5DD.ppqt | ./filterMetrics H3k4me3_gran_1 ppqt
9.) Find NSC, RSC, Qtag
# awk '{print $(NF-2),$(NF-1),$(NF)}' H3k4me3_gran_1.sorted.Q5DD.ppqt | ./filterMetrics H3k4me3_gran_1 ppqt

10.) Find the Fragment Length
# awk -F '\t' '{print $3}' H3k4me3_gran_1.sorted.Q5DD.ppqt | sed -e 's/,/ /g' | ../Scripts/filterMetrics H3k4me3_gran_1 fragLen
10.) Find the Fragment Length
# awk -F '\t' '{print $3}' H3k4me3_gran_1.sorted.Q5DD.ppqt | sed -e 's/,/ /g' | ../Scripts/filterMetrics H3k4me3_gran_1 fragLen

Python version(s): 2.7 or 3.X

Expand Down
8 changes: 4 additions & 4 deletions bin/frip.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,15 @@ def process_files(bamfile, bedfiles, genome, filetypes, bedtool=None, bedsample=
]
]
nreads = count_reads_in_bam(bamfile)
(bamsample, condition) = clip_bamfile_name(bamfile)
bamsample, condition = clip_bamfile_name(bamfile)
for i in range(len(bedfileL)):
bed = bedfileL[i]
if len(filetypesL) > 1:
filetype = filetypesL[i]
else:
filetype = filetypesL[0]
if not bedtool and not bedsample:
(bedtool, bedsample) = clip_bedfile_name(bed, filetype)
bedtool, bedsample = clip_bedfile_name(bed, filetype)
noverlaps = count_reads_in_bed(bamfile, bed, genome)
frip = calculate_frip(nreads, noverlaps)
nbases = measure_bedfile_coverage(bed, genome) / 1000000
Expand All @@ -134,7 +134,7 @@ def process_files(bamfile, bedfiles, genome, filetypes, bedtool=None, bedsample=

def create_outfile_name(bamfile, outroot):
"""uses outroot to create the output file name"""
(bamsample, condition) = clip_bamfile_name(bamfile)
bamsample, condition = clip_bamfile_name(bamfile)
outtable = bamsample + "." + condition + "." + "FRiP_table.txt"
if outroot != "":
outtable = outroot + "." + outtable
Expand Down Expand Up @@ -200,7 +200,7 @@ def main():
"-s", dest="sample", default="", help="Sample name/ID of the bedfile(s)"
)

(options, args) = parser.parse_args()
options, args = parser.parse_args()
bedfiles = options.peakfiles
bamfile = options.bamfile
genomefile = options.genomefile
Expand Down
1 change: 1 addition & 0 deletions bin/get_consensus_peaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
adapted from https://github.com/CCBR/ASPEN/blob/55f909d76500c3502c1c397ef3000908649b0284/workflow/scripts/ccbr_get_consensus_peaks.py
"""

import os
import argparse
import uuid
Expand Down
1 change: 1 addition & 0 deletions bin/make_sf_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Example:
python make_sf_table.py scaling_factors_1.tsv,scaling_factors_2.tsv id1,id2,id3 ab1,ab2 100,200,300 spike_sf.tsv
"""

from collections import defaultdict
import sys

Expand Down
2 changes: 1 addition & 1 deletion bin/ngsqc_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def main():
the output file name.",
)

(options, args) = parser.parse_args()
options, args = parser.parse_args()
directory = options.directory
ext = options.ext
group = options.group
Expand Down
13 changes: 8 additions & 5 deletions modules/local/check_contrasts/templates/check_contrasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ main <- function(contrasts_filename = "${contrasts}",
assert_that(all(colnames(contrasts_df) == c("contrast_name", "group1", "group2")))

samples_df <- readr::read_csv(samplesheet_filename)
sample_names <- samples_df %>% dplyr::pull(sample)
sample_names <- samples_df %>% dplyr::pull(sample)
# check individual contrasts
purrr::pmap(contrasts_df, check_contrast, sample_names = sample_names)

Expand All @@ -32,13 +32,16 @@ check_contrast <- function(contrast_name, group1, group2, sample_names) {
group2_samples <- unlist(strsplit(group2, ","))
# Ensure each group has at least 1 sample
assert_that(length(group1_samples) > 0,
msg = glue("group1 must have at least one sample for {contrast_name}"))
msg = glue("group1 must have at least one sample for {contrast_name}")
)
assert_that(length(group2_samples) > 0,
msg = glue("group2 must have at least one sample for {contrast_name}"))
msg = glue("group2 must have at least one sample for {contrast_name}")
)
# Ensure every sample is in the sample sheet
extra_samples <- setdiff(c(group1_samples,group2_samples), sample_names)
extra_samples <- setdiff(c(group1_samples, group2_samples), sample_names)
assert_that(length(extra_samples) == 0,
msg = glue("All samples in {contrast_name} must be in the sample sheet. Extra samples found: {paste(extra_samples, collapse = ',')}"))
msg = glue("All samples in {contrast_name} must be in the sample sheet. Extra samples found: {paste(extra_samples, collapse = ',')}")
)
# Ensure each sample is in only one group
assert_that(
length(intersect(group1_samples, group2_samples)) == 0,
Expand Down
Loading