diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a6f1541..9450d63c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ exclude: | ) repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v1.2.3 + rev: v6.0.0 hooks: - id: check-added-large-files args: ["--maxkb=10240"] @@ -20,30 +20,30 @@ repos: - id: check-json # spell check - repo: https://github.com/codespell-project/codespell - rev: v2.4.1 + rev: v2.4.2 hooks: - id: codespell args: ["--ignore-words-list=bais"] # Python formatting - - repo: https://github.com/psf/black - rev: 23.7.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 26.3.0 hooks: - id: black # R formatting - repo: https://github.com/lorenzwalthert/precommit - rev: v0.1.2 + rev: v0.4.3.9021 hooks: - id: style-files # general linting - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.1.0" + rev: "v4.0.0-alpha.8" hooks: - id: prettier additional_dependencies: - prettier@3.4.0 # enforce commit format - repo: https://github.com/compilerla/conventional-pre-commit - rev: v2.3.0 + rev: v4.4.0 hooks: - id: conventional-pre-commit stages: [commit-msg] diff --git a/bin/calc_effective_genome_fraction.py b/bin/calc_effective_genome_fraction.py index a0a9f034..e0ba5f0b 100755 --- a/bin/calc_effective_genome_fraction.py +++ b/bin/calc_effective_genome_fraction.py @@ -65,9 +65,7 @@ def test(): chr22 50818468 chr_X 156040895 chr_Y 57227415 -chr_M 16569""".split( - "\n" - ) +chr_M 16569""".split("\n") effective_genome_size = 2700000000 assert calc_egf(effective_genome_size, chrom_sizes) == 0.9391299376153861 diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 791a4db2..617e934d 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -3,6 +3,7 @@ """ adapted from: https://github.com/nf-core/chipseq/blob/51eba00b32885c4d0bec60db3cb0a45eb61e34c5/bin/check_samplesheet.py """ + import collections import os import errno diff --git a/bin/compute_scaling_factors.py b/bin/compute_scaling_factors.py index 5e6e1a40..16f4188f 100755 --- a/bin/compute_scaling_factors.py +++ b/bin/compute_scaling_factors.py @@ -8,6 +8,7 @@ Example: python compute_scaling_factors.py "id1,id2,id3" "100,200,300" scaling_factors.tsv """ + import sys diff --git a/bin/consensus_corces.py b/bin/consensus_corces.py index d4270731..a525069b 100755 --- a/bin/consensus_corces.py +++ b/bin/consensus_corces.py @@ -5,6 +5,7 @@ https://github.com/CCBR/CHAMPAGNE/issues/159 """ + import sys diff --git a/bin/createtable.py b/bin/createtable.py index a4ab4845..e5d8ed42 100755 --- a/bin/createtable.py +++ b/bin/createtable.py @@ -11,9 +11,10 @@ structure is then converted into a pandas dataframe. Example Usage: -------------- - cat sample1.qcmetrics sample2.qcmetrics sampleNth.qcmetrics | ./createQCTable > ChIPseq_QC_Table.txt + cat sample1.qcmetrics sample2.qcmetrics sampleNth.qcmetrics | ./createQCTable > ChIPseq_QC_Table.txt Python version: 3+ """ + import pandas as pd import sys diff --git a/bin/filterMetrics.py b/bin/filterMetrics.py index 38a7fe34..987db59a 100755 --- a/bin/filterMetrics.py +++ b/bin/filterMetrics.py @@ -12,29 +12,29 @@ Example Usage: -------------- - 1.) Find total number of reads + 1.) Find total number of reads # grep 'in total' H3k4me3_gran_1.sorted.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 tnreads - 2.) Find total number of mapped reads - # grep 'mapped (' H3k4me3_gran_1.sorted.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 mnreads + 2.) Find total number of mapped reads + # grep 'mapped (' H3k4me3_gran_1.sorted.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 mnreads - 3.) Find total number of uniquely mapped reads - # grep 'mapped (' H3k4me3_gran_1.sorted.Q5DD.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 unreads + 3.) Find total number of uniquely mapped reads + # grep 'mapped (' H3k4me3_gran_1.sorted.Q5DD.bam.flagstat | awk '{print $1,$3}' | ./filterMetrics H3k4me3_gran_1 unreads - 4.) Find NRF, PCB1, PCB2 - # cat H3k4me3_gran_1.nrf | ./filterMetrics H3k4me3_gran_1 nrf + 4.) Find NRF, PCB1, PCB2 + # cat H3k4me3_gran_1.nrf | ./filterMetrics H3k4me3_gran_1 nrf - 7.) Find FRiP (in the second half of ChIP-seq pipeline) - # TO-DO + 7.) Find FRiP (in the second half of ChIP-seq pipeline) + # TO-DO - 8.) Find NGSQC statistics (detla RCIs) - # grep '<' NGSQC_report.txt | awk '{print $(NF)}' | xargs | ./filterMetrics H3k4me3_gran_1 ngsqc + 8.) Find NGSQC statistics (detla RCIs) + # grep '<' NGSQC_report.txt | awk '{print $(NF)}' | xargs | ./filterMetrics H3k4me3_gran_1 ngsqc - 9.) Find NSC, RSC, Qtag - # awk '{print $(NF-2),$(NF-1),$(NF)}' H3k4me3_gran_1.sorted.Q5DD.ppqt | ./filterMetrics H3k4me3_gran_1 ppqt + 9.) Find NSC, RSC, Qtag + # awk '{print $(NF-2),$(NF-1),$(NF)}' H3k4me3_gran_1.sorted.Q5DD.ppqt | ./filterMetrics H3k4me3_gran_1 ppqt - 10.) Find the Fragment Length - # awk -F '\t' '{print $3}' H3k4me3_gran_1.sorted.Q5DD.ppqt | sed -e 's/,/ /g' | ../Scripts/filterMetrics H3k4me3_gran_1 fragLen + 10.) Find the Fragment Length + # awk -F '\t' '{print $3}' H3k4me3_gran_1.sorted.Q5DD.ppqt | sed -e 's/,/ /g' | ../Scripts/filterMetrics H3k4me3_gran_1 fragLen Python version(s): 2.7 or 3.X diff --git a/bin/frip.py b/bin/frip.py index ae9cd3f8..acfcc1ba 100755 --- a/bin/frip.py +++ b/bin/frip.py @@ -113,7 +113,7 @@ def process_files(bamfile, bedfiles, genome, filetypes, bedtool=None, bedsample= ] ] nreads = count_reads_in_bam(bamfile) - (bamsample, condition) = clip_bamfile_name(bamfile) + bamsample, condition = clip_bamfile_name(bamfile) for i in range(len(bedfileL)): bed = bedfileL[i] if len(filetypesL) > 1: @@ -121,7 +121,7 @@ def process_files(bamfile, bedfiles, genome, filetypes, bedtool=None, bedsample= else: filetype = filetypesL[0] if not bedtool and not bedsample: - (bedtool, bedsample) = clip_bedfile_name(bed, filetype) + bedtool, bedsample = clip_bedfile_name(bed, filetype) noverlaps = count_reads_in_bed(bamfile, bed, genome) frip = calculate_frip(nreads, noverlaps) nbases = measure_bedfile_coverage(bed, genome) / 1000000 @@ -134,7 +134,7 @@ def process_files(bamfile, bedfiles, genome, filetypes, bedtool=None, bedsample= def create_outfile_name(bamfile, outroot): """uses outroot to create the output file name""" - (bamsample, condition) = clip_bamfile_name(bamfile) + bamsample, condition = clip_bamfile_name(bamfile) outtable = bamsample + "." + condition + "." + "FRiP_table.txt" if outroot != "": outtable = outroot + "." + outtable @@ -200,7 +200,7 @@ def main(): "-s", dest="sample", default="", help="Sample name/ID of the bedfile(s)" ) - (options, args) = parser.parse_args() + options, args = parser.parse_args() bedfiles = options.peakfiles bamfile = options.bamfile genomefile = options.genomefile diff --git a/bin/get_consensus_peaks.py b/bin/get_consensus_peaks.py index 9946155b..bedec6ac 100755 --- a/bin/get_consensus_peaks.py +++ b/bin/get_consensus_peaks.py @@ -2,6 +2,7 @@ """ adapted from https://github.com/CCBR/ASPEN/blob/55f909d76500c3502c1c397ef3000908649b0284/workflow/scripts/ccbr_get_consensus_peaks.py """ + import os import argparse import uuid diff --git a/bin/make_sf_table.py b/bin/make_sf_table.py index 921122ea..d45bffd0 100755 --- a/bin/make_sf_table.py +++ b/bin/make_sf_table.py @@ -8,6 +8,7 @@ Example: python make_sf_table.py scaling_factors_1.tsv,scaling_factors_2.tsv id1,id2,id3 ab1,ab2 100,200,300 spike_sf.tsv """ + from collections import defaultdict import sys diff --git a/bin/ngsqc_plot.py b/bin/ngsqc_plot.py index 73ba9e34..40f5a9ab 100755 --- a/bin/ngsqc_plot.py +++ b/bin/ngsqc_plot.py @@ -145,7 +145,7 @@ def main(): the output file name.", ) - (options, args) = parser.parse_args() + options, args = parser.parse_args() directory = options.directory ext = options.ext group = options.group diff --git a/modules/local/check_contrasts/templates/check_contrasts.R b/modules/local/check_contrasts/templates/check_contrasts.R index 6391b329..20efbdd7 100644 --- a/modules/local/check_contrasts/templates/check_contrasts.R +++ b/modules/local/check_contrasts/templates/check_contrasts.R @@ -13,7 +13,7 @@ main <- function(contrasts_filename = "${contrasts}", assert_that(all(colnames(contrasts_df) == c("contrast_name", "group1", "group2"))) samples_df <- readr::read_csv(samplesheet_filename) - sample_names <- samples_df %>% dplyr::pull(sample) + sample_names <- samples_df %>% dplyr::pull(sample) # check individual contrasts purrr::pmap(contrasts_df, check_contrast, sample_names = sample_names) @@ -32,13 +32,16 @@ check_contrast <- function(contrast_name, group1, group2, sample_names) { group2_samples <- unlist(strsplit(group2, ",")) # Ensure each group has at least 1 sample assert_that(length(group1_samples) > 0, - msg = glue("group1 must have at least one sample for {contrast_name}")) + msg = glue("group1 must have at least one sample for {contrast_name}") + ) assert_that(length(group2_samples) > 0, - msg = glue("group2 must have at least one sample for {contrast_name}")) + msg = glue("group2 must have at least one sample for {contrast_name}") + ) # Ensure every sample is in the sample sheet - extra_samples <- setdiff(c(group1_samples,group2_samples), sample_names) + extra_samples <- setdiff(c(group1_samples, group2_samples), sample_names) assert_that(length(extra_samples) == 0, - msg = glue("All samples in {contrast_name} must be in the sample sheet. Extra samples found: {paste(extra_samples, collapse = ',')}")) + msg = glue("All samples in {contrast_name} must be in the sample sheet. Extra samples found: {paste(extra_samples, collapse = ',')}") + ) # Ensure each sample is in only one group assert_that( length(intersect(group1_samples, group2_samples)) == 0,