diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 550a0e5f..9aa02a39 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -67,6 +67,24 @@ jobs: with: name: nextflow-log path: .nextflow.log + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + cache: "pip" + - name: Install pre-commit and dependencies + run: | + python -m pip install --upgrade pip setuptools + pip install pre-commit + - name: Run pre-commit hooks + run: | + pre-commit run --all-files --show-diff-on-failure + build-status: # https://github.com/orgs/community/discussions/4324#discussioncomment-3477871 runs-on: ubuntu-latest needs: [build] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a6f1541..619a70ce 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,18 +6,24 @@ exclude: | ^docs/.*.html| ^_extensions/| ^nextflow_schema.json| - ^.*.svg | - ^.github/CONTRIBUTING.md + ^.*.svg| + ^.github/CONTRIBUTING.md| + ^tests/data/ ) repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v1.2.3 + rev: v6.0.0 hooks: - id: check-added-large-files args: ["--maxkb=10240"] - id: end-of-file-fixer - id: trailing-whitespace - id: check-json + - repo: https://github.com/CCBR/Tools + rev: precommit-hooks + hooks: + - id: detect-absolute-paths + exclude: ^.gitignore, .*.gitignore, .*.diff$ # spell check - repo: https://github.com/codespell-project/codespell rev: v2.4.1 @@ -26,24 +32,24 @@ repos: args: ["--ignore-words-list=bais"] # Python formatting - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 26.1.0 hooks: - id: black # R formatting - repo: https://github.com/lorenzwalthert/precommit - rev: v0.1.2 + rev: v0.4.3.9020 hooks: - id: style-files # general linting - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.1.0" + rev: "v4.0.0-alpha.8" hooks: - id: prettier additional_dependencies: - prettier@3.4.0 # enforce commit format - repo: https://github.com/compilerla/conventional-pre-commit - rev: v2.3.0 + rev: v4.4.0 hooks: - id: conventional-pre-commit stages: [commit-msg] diff --git a/README.md b/README.md index 1539e386..2fcff723 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,14 @@ Initialize and run champagne with test data: ```sh # copy the champagne config files to your project directory. # --output is optional and defaults to your current working directory. -champagne init --output /data/$USER/champagne_project +champagne init --output path/to/champagne_project/ # preview the champagne jobs that will run with the test dataset -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode local -profile test -preview # launch a champagne run on slurm with the test dataset -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm -profile test ``` @@ -66,7 +66,7 @@ Once you've created a samplesheet with paths to your fastq files, run champagne with the `--input` option to specify the path to your sample sheet: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm \ --input samplesheet.csv \ --genome hg38 @@ -91,7 +91,7 @@ run_qc: true You can then use these parameters with the `-params-file` option: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm \ -params-file assets/params.yml ``` @@ -112,7 +112,7 @@ If your experiment uses a spike-in control, you can specify the spike-in genome with the `--spike_genome` parameter: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm \ --input samplesheet.csv \ --genome hg38 \ diff --git a/conf/biowulf.config b/conf/biowulf.config index 1d357a85..cdb9138b 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -7,13 +7,13 @@ params { max_cpus = 32 max_time = '72 h' - igenomes_base = '/fdb/igenomes/' + igenomes_base = '/fdb/igenomes/' // abs-path:ignore // CCBR shared resource paths - index_dir = '/data/CCBR_Pipeliner/db/PipeDB/Indices' + index_dir = "${env('CCBR_PIPELINER_DIR')}/db/PipeDB/Indices" fastq_screen_conf = "${projectDir}/assets/fastq_screen_biowulf.conf" - fastq_screen_db_dir = '/data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/' + fastq_screen_db_dir = "${env('CCBR_PIPELINER_DIR')}/db/PipeDB/lib/fastq_screen_db/" publish_dir_mode = 'link' } @@ -21,14 +21,14 @@ params { singularity { enabled = true autoMounts = true - cacheDir = "/data/CCBR_Pipeliner/SIFs" + cacheDir = "${env('CCBR_PIPELINER_DIR')}/SIFs" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' } -env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFs" +env.SINGULARITY_CACHEDIR = "${env('CCBR_PIPELINER_DIR')}/SIFs" process.clusterOptions = ' --gres=lscratch:200 ' -process.scratch = '/lscratch/$SLURM_JOB_ID' +process.scratch = '/lscratch/$SLURM_JOB_ID' // abs-path:ignore process.stageInMode = 'symlink' process.stageOutMode = 'rsync' // https://www.nextflow.io/docs/latest/reference/process.html#cache diff --git a/docs/_genomes_tail.md b/docs/_genomes_tail.md index 91a876e7..3b512e56 100644 --- a/docs/_genomes_tail.md +++ b/docs/_genomes_tail.md @@ -4,10 +4,10 @@ If you'd like to override the default blacklist used by one of the built-in geno you can provide a custom blacklist bed file or fasta file: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm \ --genome hg38 \ - --blacklist /path/to/blacklist.bed + --blacklist path/to/blacklist.bed ``` If you're providing a custom blacklist bed file, make sure its regions refer to @@ -26,7 +26,7 @@ If you'd like to use a custom genome, you'll need the following files: Prepare your custom reference genome with: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm \ -entry MAKE_REFERENCE \ --genome custom_genome \ @@ -40,7 +40,7 @@ The reference files and a config file for the genome will be written in `results Then you can run champagne using your custom genome: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm -profile biowulf \ --input samplesheet.csv \ --genome custom_genome \ diff --git a/docs/_params_head.md b/docs/_params_head.md index e3b867bb..fa5b6469 100644 --- a/docs/_params_head.md +++ b/docs/_params_head.md @@ -4,7 +4,7 @@ Any parameter can be set via the CLI using two hyphens (`--`) followed by the parameter name and value. For example: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --input assets/samplesheet_full_mm10.csv \ --contrasts assets/contrasts_full_mm10.csv \ --genome mm10 \ @@ -31,7 +31,7 @@ run_qc: true You can then use these parameters with the `-params-file` option: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ -params-file assets/params.yml ``` diff --git a/docs/devs/contributors.md b/docs/devs/contributors.md index 4c6eba61..4d8d22d1 100644 --- a/docs/devs/contributors.md +++ b/docs/devs/contributors.md @@ -1,7 +1,7 @@ # Contributors -| | | | -| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| | | | +|---|---|---| | Kelly Sovacool, PhD
Kelly Sovacool, PhD
| Samantha
Samantha
| Vishal Koparde, PhD
Vishal Koparde, PhD
| -View the [contributors graph on GitHub](https://github.com/CCBR/CHAMPAGNE/graphs/contributors) for more details. +View the [contributors graph on GitHub](https://github.com/CCBR/CHAMPAGNE/graphs/contributors) for more details. \ No newline at end of file diff --git a/docs/guide/genomes.md b/docs/guide/genomes.md index d7fe00ae..f8cfeb02 100644 --- a/docs/guide/genomes.md +++ b/docs/guide/genomes.md @@ -80,17 +80,16 @@ These genomes can be passed to the `--spike_genome` parameter. - blacklist_bed: `NO_FILE` - effective_genome_size: `4641652` - chrom_sizes: `${params.index_dir}/ecoli_k12/Chromosomes/chrom.sizes` - ### Custom blacklist If you'd like to override the default blacklist used by one of the built-in genomes, you can provide a custom blacklist bed file or fasta file: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm \ --genome hg38 \ - --blacklist /path/to/blacklist.bed + --blacklist path/to/blacklist.bed ``` If you're providing a custom blacklist bed file, make sure its regions refer to @@ -109,7 +108,7 @@ If you'd like to use a custom genome, you'll need the following files: Prepare your custom reference genome with: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm \ -entry MAKE_REFERENCE \ --genome custom_genome \ @@ -123,7 +122,7 @@ The reference files and a config file for the genome will be written in `results Then you can run champagne using your custom genome: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --mode slurm -profile biowulf \ --input samplesheet.csv \ --genome custom_genome \ diff --git a/docs/guide/output.md b/docs/guide/output.md index a7ad2622..6530becf 100644 --- a/docs/guide/output.md +++ b/docs/guide/output.md @@ -6,7 +6,7 @@ After running the pipeline, the output directory will also contain `results/`, `log/`, `work/`, and `submit_slurm.sh` (if using `--mode slurm`). ``` -/data/$USER/champagne_project +path/to/champagne_project/ ├── assets/ ├── conf/ ├── log/ @@ -30,7 +30,7 @@ After running the pipeline, the output directory will also contain `results/`, Workflow output files will be written to the `results/` directory by default in your pipeline run output directory. -For example, if you ran champagne with `champagne run --output /data/$USER/champagne_project`, the results files will be in `/data/$USER/champagne_project/results/`. +For example, if you ran champagne with `champagne run --output path/to/champagne_project/`, the results files will be in `path/to/champagne_project//results/`. All paths listed below are relative to the `results/` directory. diff --git a/docs/guide/params.md b/docs/guide/params.md index 7e8cbefe..a34ed363 100644 --- a/docs/guide/params.md +++ b/docs/guide/params.md @@ -4,7 +4,7 @@ Any parameter can be set via the CLI using two hyphens (`--`) followed by the parameter name and value. For example: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ --input assets/samplesheet_full_mm10.csv \ --contrasts assets/contrasts_full_mm10.csv \ --genome mm10 \ @@ -31,14 +31,13 @@ run_qc: true You can then use these parameters with the `-params-file` option: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ -params-file assets/params.yml ``` View the full list of pipeline parameters below. - # CCBR/CHAMPAGNE pipeline parameters CHromAtin iMmuno PrecipitAtion sequencinG aNalysis pipEline @@ -47,134 +46,144 @@ CHromAtin iMmuno PrecipitAtion sequencinG aNalysis pipEline The most commonly used pipeline options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------- | -------- | ------ | -| `input` | Path to comma-separated file containing information about the samples in the experiment.
HelpYou will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.
| `string` | | True | | -| `contrasts` | Optional contrasts specification for differential analysis | `string` | | | | -| `genome` | Reference genome (e.g. hg38, mm10). This can be a genome in conf/genomes.config, or see 'Custom genome options' to build a custom reference from a fasta & gtf file. | `string` | | True | | -| `outputDir` | | `string` | ${launchDir}/results | | True | -| `tracedir` | | `string` | ${outputDir}/pipeline_info | | True | -| `publish_dir_mode` | How to publish files to the results directory. This parameter sets Nextflow's workflow.output.mode configuration option. (accepted: `link`\|`copy`\|`move`\|`copyNoFollow`\|`rellink`\|`symlink`) | `string` | link | | | +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `input` | Path to comma-separated file containing information about the samples in the experiment.
HelpYou will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.
| `string` | | True | | +| `contrasts` | Optional contrasts specification for differential analysis | `string` | | | | +| `genome` | Reference genome (e.g. hg38, mm10). This can be a genome in conf/genomes.config, or see 'Custom genome options' to build a custom reference from a fasta & gtf file. | `string` | | True | | +| `outputDir` | | `string` | ${launchDir}/results | | True | +| `tracedir` | | `string` | ${outputDir}/pipeline_info | | True | +| `publish_dir_mode` | How to publish files to the results directory. This parameter sets Nextflow's workflow.output.mode configuration option. (accepted: `link`\|`copy`\|`move`\|`copyNoFollow`\|`rellink`\|`symlink`) | `string` | link | | | ## Custom genome options Use these to build a custom reference genome not already listed in conf/genomes.config. For an example, see conf/test.config. -| Parameter | Description | Type | Default | Required | Hidden | -| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `genome_fasta` | Genome fasta file | `string` | | | | -| `genes_gtf` | Genome gtf file | `string` | | | | -| `blacklist` | Custom blacklisted sequences as a fasta file or bed file. These will be filtered out of the trimmed reads before aligning to the reference genome. | `string` | | | | -| `read_length` | Read length used for counting unique kmers and computing the effective genome size. | `integer` | | | | -| `rename_contigs` | File with map to translate chromosome names (see assets/R64-1-1_ensembl2UCSC.txt as an example) | `string` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `genome_fasta` | Genome fasta file | `string` | | | | +| `genes_gtf` | Genome gtf file | `string` | | | | +| `blacklist` | Custom blacklisted sequences as a fasta file or bed file. These will be filtered out of the trimmed reads before aligning to the reference genome. | `string` | | | | +| `read_length` | Read length used for counting unique kmers and computing the effective genome size. | `integer` | | | | +| `rename_contigs` | File with map to translate chromosome names (see assets/R64-1-1_ensembl2UCSC.txt as an example) | `string` | | | | ## General parameters -| Parameter | Description | Type | Default | Required | Hidden | -| --------------------- | ----------- | --------- | ------- | -------- | ------ | -| `max_memory` | | `string` | 224 GB | | | -| `max_cpus` | | `integer` | 32 | | | -| `max_time` | | `string` | 72 h | | | -| `align_min_quality` | | `integer` | 6 | | | -| `min_fragment_length` | | `integer` | 200 | | | + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `max_memory` | | `string` | 224 GB | | | +| `max_cpus` | | `integer` | 32 | | | +| `max_time` | | `string` | 72 h | | | +| `align_min_quality` | | `integer` | 6 | | | +| `min_fragment_length` | | `integer` | 200 | | | ## Spike-in options Options for experiments that use a spike-in genome -| Parameter | Description | Type | Default | Required | Hidden | -| --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | --------- | -------- | ------ | -| `spike_genome` | Optional spike-in genome (e.g. dmelr6.32, ecoli_k12). If null, spike-in normalization will not be performed. | `string` | | | | -| `spike_norm_method` | Method to compute scaling factors for spike-in normalization. "guenther" uses a simple fraction of the reads aligning to the spike-in genome as described in . "delorenzi" uses deepTools multiBamSummary with --scalingFactors, which is similar to the method described in . (accepted: `delorenzi`\|`guenther`) | `string` | delorenzi | | | -| `spike_deeptools_bin_size` | When spike_norm_method is delorenzi, this sets --binSize in deepTools multiBamSummary | `integer` | 5000 | | | -| `spike_deeptools_min_map_quality` | When spike_norm_method is delorenzi, this sets --minMappingQuality in deepTools multiBamSummary | `integer` | 30 | | | +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `spike_genome` | Optional spike-in genome (e.g. dmelr6.32, ecoli_k12). If null, spike-in normalization will not be performed. | `string` | | | | +| `spike_norm_method` | Method to compute scaling factors for spike-in normalization. "guenther" uses a simple fraction of the reads aligning to the spike-in genome as described in . "delorenzi" uses deepTools multiBamSummary with --scalingFactors, which is similar to the method described in . (accepted: `delorenzi`\|`guenther`) | `string` | delorenzi | | | +| `spike_deeptools_bin_size` | When spike_norm_method is delorenzi, this sets --binSize in deepTools multiBamSummary | `integer` | 5000 | | | +| `spike_deeptools_min_map_quality` | When spike_norm_method is delorenzi, this sets --minMappingQuality in deepTools multiBamSummary | `integer` | 30 | | | ## QC options -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | -------------------------- | -------- | ------ | -| `deeptools_normalize_using` | This parameter has been renamed to "deeptools_normalize_samples". It is kept for backward compatibility. | `string` | RPGC | | True | -| `deeptools_normalize_samples` | This normalization method is applied to the samples only, not the inputs. If using a spike-in genome, recommend setting this to "None" | `string` | RPGC | | | -| `deeptools_normalize_input` | Normalization method applied to inputs only. This way you can disable additional normalization for samples such as when using spike-in normalization, but still normalize the inputs. | `string` | RPGC | | | -| `deeptools_bin_size` | | `integer` | 25 | | | -| `deeptools_smooth_length` | | `integer` | 75 | | | -| `deeptools_corr_method` | Correlation method for deeptools plotCorrelation heatmap and scatterplot (accepted: `spearman`\|`pearson`) | `string` | spearman | | | -| `deeptools_excluded_chroms` | | `string` | chrM chrX chrY | | | -| `multiqc_config` | | `string` | assets/multiqc_config.yaml | | | -| `multiqc_logo` | | `string` | assets/ccbr_logo.png | | | + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `deeptools_normalize_using` | This parameter has been renamed to "deeptools_normalize_samples". It is kept for backward compatibility. | `string` | RPGC | | True | +| `deeptools_normalize_samples` | This normalization method is applied to the samples only, not the inputs. If using a spike-in genome, recommend setting this to "None" | `string` | RPGC | | | +| `deeptools_normalize_input` | Normalization method applied to inputs only. This way you can disable additional normalization for samples such as when using spike-in normalization, but still normalize the inputs. | `string` | RPGC | | | +| `deeptools_bin_size` | | `integer` | 25 | | | +| `deeptools_smooth_length` | | `integer` | 75 | | | +| `deeptools_corr_method` | Correlation method for deeptools plotCorrelation heatmap and scatterplot (accepted: `spearman`\|`pearson`) | `string` | spearman | | | +| `deeptools_excluded_chroms` | | `string` | chrM chrX chrY | | | +| `multiqc_config` | | `string` | assets/multiqc_config.yaml | | | +| `multiqc_logo` | | `string` | assets/ccbr_logo.png | | | ## Peak callers -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------- | ----------- | --------- | ---------------------------------------- | -------- | ------ | -| `macs_narrow_q` | | `number` | 0.01 | | | -| `macs_broad_q` | | `number` | 0.01 | | | -| `macs_broad_cutoff` | | `number` | 0.01 | | | -| `gem_read_dists` | | `string` | assets/gem/Read_Distribution_default.txt | | | -| `gem_fold` | | `integer` | 3 | | | -| `gem_k_min` | | `integer` | 6 | | | -| `gem_k_max` | | `integer` | 13 | | | -| `sicer_species` | | `string` | | | | + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `macs_narrow_q` | | `number` | 0.01 | | | +| `macs_broad_q` | | `number` | 0.01 | | | +| `macs_broad_cutoff` | | `number` | 0.01 | | | +| `gem_read_dists` | | `string` | assets/gem/Read_Distribution_default.txt | | | +| `gem_fold` | | `integer` | 3 | | | +| `gem_k_min` | | `integer` | 6 | | | +| `gem_k_max` | | `integer` | 13 | | | +| `sicer_species` | | `string` | | | | ## motifs -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------- | ----------- | --------- | ---------------------------------------------------------------- | -------- | ------ | -| `homer_de_novo` | | `boolean` | True | | | -| `homer_jaspar_db` | | `string` | assets/JASPAR2022_CORE_vertebrates_non-redundant_pfms_jaspar.txt | | | + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `homer_de_novo` | | `boolean` | True | | | +| `homer_jaspar_db` | | `string` | assets/JASPAR2022_CORE_vertebrates_non-redundant_pfms_jaspar.txt | | | ## run control Toggle various steps of the pipeline on/off -| Parameter | Description | Type | Default | Required | Hidden | -| ---------------------- | ----------- | --------- | ------- | -------- | ------ | -| `run_qc` | | `boolean` | True | | | -| `run_deeptools` | | `boolean` | True | | | -| `run_normalize_input` | | `boolean` | True | | | -| `run_call_peaks` | | `boolean` | True | | | -| `run_gem` | | `boolean` | True | | | -| `run_sicer` | | `boolean` | True | | | -| `run_macs_broad` | | `boolean` | True | | | -| `run_macs_narrow` | | `boolean` | True | | | -| `run_normalize_peaks` | | `boolean` | | | | -| `run_chipseeker` | | `boolean` | | | | -| `run_homer` | | `boolean` | True | | | -| `run_meme` | | `boolean` | True | | | -| `run_consensus_union` | | `boolean` | True | | | -| `run_consensus_corces` | | `boolean` | True | | | +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `run_qc` | | `boolean` | True | | | +| `run_deeptools` | | `boolean` | True | | | +| `run_normalize_input` | | `boolean` | True | | | +| `run_call_peaks` | | `boolean` | True | | | +| `run_gem` | | `boolean` | True | | | +| `run_sicer` | | `boolean` | True | | | +| `run_macs_broad` | | `boolean` | True | | | +| `run_macs_narrow` | | `boolean` | True | | | +| `run_normalize_peaks` | | `boolean` | | | | +| `run_chipseeker` | | `boolean` | | | | +| `run_homer` | | `boolean` | True | | | +| `run_meme` | | `boolean` | True | | | +| `run_consensus_union` | | `boolean` | True | | | +| `run_consensus_corces` | | `boolean` | True | | | ## Platform options Options for the platform or HPC on which the pipeline is run. These are set by platform-specific profiles, e.g. conf/biowulf.config. If you are running the pipeline on biowulf, these will be set by CHAMPAGNE automatically. -| Parameter | Description | Type | Default | Required | Hidden | -| --------------------- | --------------------------------------------------------------------------------------------- | -------- | ------- | -------- | ------ | -| `index_dir` | Absolute path to directory containing pre-built reference genomes. | `string` | | | | -| `fastq_screen_conf` | Path to the config file for fastq screen. See assets/fastq_screen_biowulf.conf as an example. | `string` | | | | -| `fastq_screen_db_dir` | Path to the directory containing fastq screen databases. | `string` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `index_dir` | Absolute path to directory containing pre-built reference genomes. | `string` | | | | +| `fastq_screen_conf` | Path to the config file for fastq screen. See assets/fastq_screen_biowulf.conf as an example. | `string` | | | | +| `fastq_screen_db_dir` | Path to the directory containing fastq screen databases. | `string` | | | | ## containers -| Parameter | Description | Type | Default | Required | Hidden | -| -------------------------- | ----------- | -------- | ------------------------------------------------------------ | -------- | ------ | -| `containers_base` | | `string` | nciccbr/ccbr_ubuntu_base_20.04:v6.1 | | | -| `containers_deeptools` | | `string` | nciccbr/ccbr_deeptools_3.5.3:v1 | | | -| `containers_fastqc` | | `string` | nciccbr/ccrgb_qctools:v4.0 | | | -| `containers_fastq_screen` | | `string` | nciccbr/ccbr_fastq_screen_0.14.1:v1.0 | | | -| `containers_frip` | | `string` | nciccbr/ccbr_frip:v1 | | | -| `containers_gem` | | `string` | nciccbr/ccbr_gem_3.4:v1 | | | -| `containers_macs2` | | `string` | nciccbr/ccbr_macs2_2.2.9.1:v1 | | | -| `containers_multiqc` | | `string` | nciccbr/ccbr_multiqc_1.15:v1 | | | -| `containers_ngsqc` | | `string` | nciccbr/ccbr_ngsqc_0.31:v1 | | | -| `containers_phantom_peaks` | | `string` | quay.io/biocontainers/phantompeakqualtools:1.2.2--hdfd78af_1 | | | -| `containers_picard` | | `string` | nciccbr/ccbr_picard_2.27.5:v1 | | | -| `containers_preseq` | | `string` | nciccbr/ccbr_preseq_v2.0:v1 | | | -| `containers_r` | | `string` | nciccbr/ccbr_r_4.3.0:v1 | | | -| `containers_sicer` | | `string` | nciccbr/ccbr_sicer2_1.0.3:v1 | | | + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `containers_base` | | `string` | nciccbr/ccbr_ubuntu_base_20.04:v6.1 | | | +| `containers_deeptools` | | `string` | nciccbr/ccbr_deeptools_3.5.3:v1 | | | +| `containers_fastqc` | | `string` | nciccbr/ccrgb_qctools:v4.0 | | | +| `containers_fastq_screen` | | `string` | nciccbr/ccbr_fastq_screen_0.14.1:v1.0 | | | +| `containers_frip` | | `string` | nciccbr/ccbr_frip:v1 | | | +| `containers_gem` | | `string` | nciccbr/ccbr_gem_3.4:v1 | | | +| `containers_macs2` | | `string` | nciccbr/ccbr_macs2_2.2.9.1:v1 | | | +| `containers_multiqc` | | `string` | nciccbr/ccbr_multiqc_1.15:v1 | | | +| `containers_ngsqc` | | `string` | nciccbr/ccbr_ngsqc_0.31:v1 | | | +| `containers_phantom_peaks` | | `string` | quay.io/biocontainers/phantompeakqualtools:1.2.2--hdfd78af_1 | | | +| `containers_picard` | | `string` | nciccbr/ccbr_picard_2.27.5:v1 | | | +| `containers_preseq` | | `string` | nciccbr/ccbr_preseq_v2.0:v1 | | | +| `containers_r` | | `string` | nciccbr/ccbr_r_4.3.0:v1 | | | +| `containers_sicer` | | `string` | nciccbr/ccbr_sicer2_1.0.3:v1 | | | ## Other parameters -| Parameter | Description | Type | Default | Required | Hidden | -| ----------------- | ----------- | -------- | -------------------------- | -------- | ------ | -| `diffbind_report` | | `string` | assets/diffbind_report.Rmd | | True | +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `diffbind_report` | | `string` | assets/diffbind_report.Rmd | | True | diff --git a/docs/guide/spike-in.md b/docs/guide/spike-in.md index 78b6ab0d..523fd984 100644 --- a/docs/guide/spike-in.md +++ b/docs/guide/spike-in.md @@ -45,7 +45,7 @@ Using the delorenzi normalization method with _E. coli_ as the spike-in genome: ```sh champagne run \ - --output /data/$USER/champagne_project/ \ + --output path/to/champagne_project// \ --genome hg38 \ --input assets/samplesheet_full_spikein.csv \ --spike_genome ecoli_k12 \ @@ -57,7 +57,7 @@ Using the guenther normalization method with _D. melanogaster_ as the spike-in g ```sh champagne run \ - --output /data/$USER/champagne_project/ \ + --output path/to/champagne_project// \ --genome hg38 \ --input assets/samplesheet_full_spikein.csv \ --spike_genome dmelr6.32 \ diff --git a/docs/guide/usage.md b/docs/guide/usage.md index 5ed84984..a57ce29b 100644 --- a/docs/guide/usage.md +++ b/docs/guide/usage.md @@ -56,7 +56,7 @@ Commands: Initialize your project directory: ```sh -champagne init --output /data/$USER/champagne_project +champagne init --output path/to/champagne_project/ ``` Or if you do not use `--output`, your current working directory will be used as default: @@ -87,10 +87,10 @@ Example for a single-end project: ``` sample,rep,fastq_1,fastq_2,antibody,control -sampleA,1,/path/to/sample_1.R1.fastq.gz,,Ab,inputA -sampleA,2,/path/to/sample_2.R1.fastq.gz,,Ab,inputA -inputA,1,/path/to/sample1.R1.fastq.gz,,, -inputA,2,/path/to/sample1.R1.fastq.gz,,, +sampleA,1,path/to/sample_1.R1.fastq.gz,,Ab,inputA +sampleA,2,path/to/sample_2.R1.fastq.gz,,Ab,inputA +inputA,1,path/to/sample1.R1.fastq.gz,,, +inputA,2,path/to/sample1.R1.fastq.gz,,, ``` Example for a paired-end project: @@ -99,10 +99,10 @@ Example for a paired-end project: ``` sample,rep,fastq_1,fastq_2,antibody,control -sample1,1,/path/to/sample_1.R1.fastq.gz,/path/to/sample_1.R2.fastq.gz,Ab,input1 -sample1,2,/path/to/sample_2.R1.fastq.gz,/path/to/sample_1.R2.fastq.gz,Ab,input1 -input1,1,/path/to/input_1.R1.fastq.gz,/path/to/input_1.R2.fastq.gz,, -input1,2,/path/to/input_2.R1.fastq.gz,/path/to/input_2.R2.fastq.gz,, +sample1,1,path/to/sample_1.R1.fastq.gz,path/to/sample_1.R2.fastq.gz,Ab,input1 +sample1,2,path/to/sample_2.R1.fastq.gz,path/to/sample_1.R2.fastq.gz,Ab,input1 +input1,1,path/to/input_1.R1.fastq.gz,path/to/input_1.R2.fastq.gz,, +input1,2,path/to/input_2.R1.fastq.gz,path/to/input_2.R2.fastq.gz,, ``` For more examples, view the sample sheet files in the [`assets/` directory on @@ -166,7 +166,7 @@ run_qc: true You will then pass this file to the `-params-file` option when running the pipeline: ```sh -champagne run --output /data/$USER/champagne_project \ +champagne run --output path/to/champagne_project/ \ -params-file assets/params.yml ``` @@ -222,7 +222,7 @@ Run a local preview: ```sh champagne run \ - --output /data/$USER/champagne_project \ + --output path/to/champagne_project/ \ --input assets/samplesheet_test_mm10.csv \ --contrasts assets/contrasts_test_mm10.tsv \ --genome mm10 \ @@ -237,7 +237,7 @@ download containers: ```sh champagne run \ - --output /data/$USER/champagne_project \ + --output path/to/champagne_project/ \ --input assets/samplesheet_test_mm10.csv \ --contrasts assets/contrasts_test_mm10.tsv \ --genome mm10 \ @@ -251,7 +251,7 @@ Launch a pipeline run with slurm: ```sh champagne run \ - --output /data/$USER/champagne_project \ + --output path/to/champagne_project/ \ --input assets/samplesheet_test_mm10.csv \ --contrasts assets/contrasts_test_mm10.tsv \ --genome mm10 \ diff --git a/main.nf b/main.nf index 9a1b7b98..e9f22354 100644 --- a/main.nf +++ b/main.nf @@ -43,10 +43,14 @@ workflow version { } workflow debug { - - sample_sheet = Channel.fromPath(file(params.input, checkIfExists: true)) - contrast_sheet = params.contrasts ? Channel.fromPath(file(params.contrasts, checkIfExists: true)) : params.contrasts - raw_fastqs = INPUT_CHECK(sample_sheet, contrast_sheet).reads + println "CCBR_PIPELINER_DIR: ${env('CCBR_PIPELINER_DIR')}" + println "GENOME FASTA: ${params.genomes[ params.genome ].fasta}" + println "INDEX DIR: ${params.index_dir}" + file("${params.genomes[ params.genome ].fasta}", checkIfExists: true) + + // sample_sheet = Channel.fromPath(file(params.input, checkIfExists: true)) + // contrast_sheet = params.contrasts ? Channel.fromPath(file(params.contrasts, checkIfExists: true)) : params.contrasts + // raw_fastqs = INPUT_CHECK(sample_sheet, contrast_sheet).reads } diff --git a/nextflow.config b/nextflow.config index 0ffcc6b7..8927b305 100644 --- a/nextflow.config +++ b/nextflow.config @@ -80,7 +80,6 @@ params { run_meme = true run_consensus_union = true run_consensus_corces = true - } @@ -137,9 +136,9 @@ includeConfig 'conf/modules.config' // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" + R_PROFILE_USER = "/.Rprofile" // abs-path:ignore + R_ENVIRON_USER = "/.Renviron" // abs-path:ignore + JULIA_DEPOT_PATH = "/usr/local/share/julia" // abs-path:ignore } // Capture exit codes from upstream processes when piping