diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9e26ad77..ac38f2ff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -86,7 +86,7 @@ jobs: uses: adamrtalbot/detect-nf-test-changes@v0.0.6 with: head: ${{ github.sha }} - base: ${{ github.base_ref }} + base: origin/${{ github.base_ref }} include: .github/include.yml - name: print list of nf-test files run: | diff --git a/.github/workflows/docker-auto.yml b/.github/workflows/docker-auto.yml deleted file mode 100644 index 0b142ca3..00000000 --- a/.github/workflows/docker-auto.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: docker-auto - -on: - push: - branches: - - main - paths: - - "docker/**" -permissions: - contents: read -jobs: - generate-matrix: - runs-on: ubuntu-latest - outputs: - matrix-metadata: ${{ steps.metadata.outputs.matrix }} - steps: - - uses: hellofresh/action-changed-files@v3 - id: metadata - with: - pattern: docker/(?P\w+)/.* - default-patterns: | - meta.yml - Dockerfile - environment.txt - - update-docker: - needs: [generate-matrix] - strategy: - matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-metadata) }} - if: ${{ fromJson(needs.generate-matrix.outputs.matrix-metadata).include[0] }} # skip if the matrix is empty! - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: pietrobolcato/action-read-yaml@1.0.0 - id: metadata - with: - config: ${{ github.workspace }}/docker/${{ matrix.image_dir }}/meta.yml - - name: Get date - id: date - run: | - echo "DATE=$(date +"%Y-%m-%d")" >> $GITHUB_OUTPUT - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and push - uses: docker/build-push-action@v4 - # only try building & pushing the container if parsing the metadata worked - if: ${{ steps.metadata.outputs['container'] != '' }} - with: - context: docker/${{ matrix.image_dir }} - # only push container to docker hub if not triggered from a PR - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.metadata.outputs['container'] }} - build-args: | - BUILD_DATE=${{ steps.date.outputs.DATE }} - BUILD_TAG=${{ steps.metadata.outputs['version'] }} - REPONAME=${{ steps.metadata.outputs['image_name'] }} diff --git a/.github/workflows/docker-manual.yml b/.github/workflows/docker-manual.yml deleted file mode 100644 index 904cf05f..00000000 --- a/.github/workflows/docker-manual.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: docker-manual - -run-name: ${{ inputs.context_dir }} - -on: - workflow_dispatch: - inputs: - context_dir: - type: string - description: path to the directory containing the Dockerfile and meta.yml file (e.g. docker/r-quarto/) - required: true - push: - type: boolean - description: Push to DockerHub (uncheck to only build the container without pushing) - required: true - default: true -permissions: - contents: read -jobs: - build-docker: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: pietrobolcato/action-read-yaml@1.0.0 - id: metadata - with: - config: ${{ github.workspace }}/${{ github.event.inputs.context_dir }}/meta.yml - - name: Login to DockerHub - if: ${{ github.event.inputs.push == 'true' }} - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Prepare build-time variables - id: vars - run: | - echo "DATE=$(date +"%Y-%m-%d")" >> "$GITHUB_OUTPUT" - - name: Build and push - uses: docker/build-push-action@v4 - # only try building & pushing the container if parsing the metadata worked - if: ${{ steps.metadata.outputs['container'] != '' }} - with: - context: ${{ github.event.inputs.context_dir }} - push: ${{ github.event.inputs.push }} - tags: ${{ steps.metadata.outputs['container'] }} - build-args: | - BUILD_DATE=${{ steps.vars.outputs.DATE }} - BUILD_TAG=${{ steps.metadata.outputs['version'] }} - REPONAME=${{ steps.metadata.outputs['image_name'] }} diff --git a/.gitignore b/.gitignore index 95d4bda6..8b706e0f 100755 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,8 @@ log/ .nf-test/ results/ output +.singularity/ +submit_slurm.sh # tmp ignore testing fastqs test_dir/*/*/*.gz diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e20ef26..a78b7f50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## SINCLAIR development version +- All nextflow processes now use containers rather than environment modules for dependencies. (#95, @kelly-sovacool) + +### Bug fixes + +- Creates cache for celldex reference downloads in working directory (#203, @wong-nw) - Set main job memory to 2 GB for slurm mode. (#205, @kelly-sovacool) ## SINCLAIR 0.3.5 diff --git a/assets/slurm_header_biowulf.sh b/assets/slurm_header_biowulf.sh index 34be02aa..126e7b8d 100644 --- a/assets/slurm_header_biowulf.sh +++ b/assets/slurm_header_biowulf.sh @@ -6,7 +6,7 @@ #SBATCH -J "sinclair" #SBATCH --mail-type=BEGIN,END,FAIL #SBATCH --output "log/slurm_%j.log" -#SBATCH --output "log/slurm_%j.log" +#SBATCH --error "log/slurm_%j.log" module load ccbrpipeliner module load nextflow diff --git a/assets/slurm_header_frce.sh b/assets/slurm_header_frce.sh index e61fa725..66631cdc 100644 --- a/assets/slurm_header_frce.sh +++ b/assets/slurm_header_frce.sh @@ -6,7 +6,7 @@ #SBATCH -J "sinclair" #SBATCH --mail-type=BEGIN,END,FAIL #SBATCH --output "log/slurm_%j.log" -#SBATCH --output "log/slurm_%j.log" +#SBATCH --error "log/slurm_%j.log" module load nextflow NXF_SINGULARITY_CACHEDIR=/mnt/projects/CCBR-Pipelines/SIFs diff --git a/bin/batch_correction_cca.Rmd b/bin/batch_correction_cca.Rmd index 93ce5fc3..f076cc09 100755 --- a/bin/batch_correction_cca.Rmd +++ b/bin/batch_correction_cca.Rmd @@ -8,58 +8,44 @@ editor_options: params: species: "hg38" gid: "group1-group2" - mergedObj: "/data/sevillas2/sinclair/dev/results/seurat/merge/group1-group2_seurat_merged.rds" + mergedObj: "results/seurat/merge/group1-group2_seurat_merged.rds" resolution_list: "0.1,0.2,0.3,0.5,0.6,0.8,1" npcs: 50 vars_to_regress: NULL - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8/" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + scRNA_functions: "bin/scRNA_functions.R" + celldex_cache: NULL testing: "Y" --- -```{r, prep_args, message=FALSE} +```{r setup, message=FALSE} +options(future.globals.maxSize = 96000 * 1024^2) # set up params species <- params$species gid <- params$gid mergedObj <- params$mergedObj resolution <- as.numeric(strsplit(params$resolution_list, ",")[[1]]) npcs <- as.numeric(params$npcs) + vars_to_regress <- params$vars_to_regress vars_to_regress_list <- if (is.null(vars_to_regress) || toupper(vars_to_regress) == "NULL") { NULL } else { unlist(strsplit(gsub(" ", "", vars_to_regress), ",")) } -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config -testing <- params$testing -scRNA_functions <- params$scRNA_functions -``` - -```{r, handle_pkg, message=FALSE} -# source functions -source(scRNA_functions) -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -.libPaths() - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, cca == "Y") -pkg_df - -# for each package check installation, if present then load library -scRNA_handle_packages(pkg_df) +celldex_cache <- params$celldex_cache +testing <- params$testing -# additional options -# tinytex::install_tinytex(force = TRUE) -options(future.globals.maxSize = 96000 * 1024^2) +source(params$scRNA_functions) +library(Seurat) +library(SingleR) +library(tinytex) +library(AnnotationDbi) +library(org.Hs.eg.db) +library(org.Mm.eg.db) +library(Orthology.eg.db) +library(harmony) ``` ```{r, processing, message=FALSE} @@ -74,7 +60,7 @@ so[["RNA"]] <- split(so[["RNA"]], f = so$Sample) # integrate so_corrected <- MAIN_BATCH_CORRECTION(so, npcs, species, resolution, method_in = "CCAIntegration", reduction_in = "CCA", - v_list = vars_to_regress_list + v_list = vars_to_regress_list, cache_path = celldex_cache ) ``` diff --git a/bin/batch_correction_harmony.Rmd b/bin/batch_correction_harmony.Rmd index e2b8753a..f55e6b0b 100755 --- a/bin/batch_correction_harmony.Rmd +++ b/bin/batch_correction_harmony.Rmd @@ -7,56 +7,43 @@ editor_options: chunk_output_type: console params: gid: "group1_group2" - mergedObj: "/data/sevillas2/sinclair/dev/results/seurat/merge/group1-group2_seurat_merged.rds" + mergedObj: "results/seurat/merge/group1-group2_seurat_merged.rds" species: "hg38" resolution_list: "0.1,0.2,0.3,0.5,0.6,0.8,1" npcs: "50" vars_to_regress: NULL - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8/" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + scRNA_functions: "bin/scRNA_functions.R" + celldex_cache: NULL testing: "Y" --- -```{r, prep_args, message=FALSE, echo=FALSE, include=FALSE} +```{r setup, message=FALSE, echo=FALSE, include=FALSE} +options(future.globals.maxSize = 96000 * 1024^2) # set up params species <- params$species gid <- params$gid mergedObj <- params$mergedObj resolution <- as.numeric(strsplit(params$resolution_list, ",")[[1]]) npcs <- as.numeric(params$npcs) + +source(params$scRNA_functions) +library(Seurat) +library(SeuratWrappers) +library(SingleR) +library(tinytex) +library(AnnotationDbi) +library(org.Hs.eg.db) +library(org.Mm.eg.db) +library(Orthology.eg.db) +library(harmony) vars_to_regress <- params$vars_to_regress vars_to_regress_list <- if (is.null(vars_to_regress) || toupper(vars_to_regress) == "NULL") { NULL } else { unlist(strsplit(gsub(" ", "", vars_to_regress), ",")) } -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config -scRNA_functions <- params$scRNA_functions -``` - -```{r, handle_pkg, message=FALSE} -# source functions -source(scRNA_functions) - -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -.libPaths() - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, harmony == "Y") -pkg_df - -# for each package check installation, if present then load library -scRNA_handle_packages(pkg_df) +celldex_cache <- params$celldex_cache -# additional options -# tinytex::install_tinytex(force = TRUE) options(future.globals.maxSize = 96000 * 1024^2) ``` @@ -72,7 +59,7 @@ so[["RNA"]] <- split(so[["RNA"]], f = so$Sample) # integrate so_corrected <- MAIN_BATCH_CORRECTION(so, npcs, species, resolution, method_in = "HarmonyIntegration", reduction_in = "harmony", - v_list = vars_to_regress_list + v_list = vars_to_regress_list, cache_path = celldex_cache ) ``` diff --git a/bin/batch_correction_integration.Rmd b/bin/batch_correction_integration.Rmd index d7ab642b..3afd3d8f 100755 --- a/bin/batch_correction_integration.Rmd +++ b/bin/batch_correction_integration.Rmd @@ -2,29 +2,30 @@ title: "Batch Correction Analysis" author: "CCBR" date: '`r format(Sys.time(), "%a %b %d %Y - %X")`' -output: html_document +output: + html_document: + self_contained: true + editor_options: chunk_output_type: console always_allow_html: true params: gid: "text" - mergedObj: "/data/sevillas2/sinclair/dev/results/seurat/merge/group1-group2_seurat_merged.rds" - ccaObj: "/data/sevillas2/sinclair/dev/results/batch_correct/group1-group2_batch_correction_cca.rds" - rpcaObj: "/data/sevillas2/sinclair/dev/results/batch_correct/group1-group2_batch_correction_rpca.rds" - harmonyObj: "/data/sevillas2/sinclair/dev/results/batch_correct/group1-group2_batch_correction_harmony.rds" - # scviObj: "/data/sevillas2/sinclair/dev/results/batch_correct/group1-group2_batch_correction_scvi.rds" - ligerObj: "/data/sevillas2/sinclair/dev/results/batch_correct/group1-group2_batch_correction_liger.rds" + mergedObj: "results/seurat/merge/group1-group2_seurat_merged.rds" + ccaObj: "results/batch_correct/group1-group2_batch_correction_cca.rds" + rpcaObj: "results/batch_correct/group1-group2_batch_correction_rpca.rds" + harmonyObj: "results/batch_correct/group1-group2_batch_correction_harmony.rds" + scviObj: "results/batch_correct/group1-group2_batch_correction_scvi.rds" + ligerObj: "results/batch_correct/group1-group2_batch_correction_liger.rds" resolution_list: "0.1,0.2,0.3,0.5,0.6,0.8,1" citeseq: "text" annot: "text" - npcs: "20" - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + npcs: 20 + scRNA_functions: "bin/scRNA_functions.R" testing: "Y" --- -```{r, prep_args, message=FALSE, echo=FALSE, include=FALSE} +```{r setup, message=FALSE, echo=FALSE, include=FALSE} # set up params # species= params$species gid <- params$gid @@ -41,9 +42,18 @@ npcs <- as.numeric(params$npcs) citeseq <- params$citeseq annot <- params$annot -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config -scRNA_functions <- params$scRNA_functions +source(params$scRNA_functions) +library(Seurat) +library(R.utils) +library(cluster) +library(SeuratWrappers) +library(ggplot2) +library(ggpubr) +library(DT) +library(scales) +library(png) +library(gridExtra) +library(cowplot) # create tmp image directory tmp_images <- "tmp_images" @@ -51,26 +61,6 @@ unlink(tmp_images, recursive = TRUE) dir.create(file.path("tmp_images"), showWarnings = FALSE) ``` -```{r, handle_pkg, message=FALSE, echo=FALSE, include=FALSE} -# source functions -source(scRNA_functions) - -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -.libPaths() - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, int == "Y") -pkg_df - -# for each package check installation, if present then load library -scRNA_handle_packages(pkg_df) -``` - ```{r, processing, message=FALSE, echo=FALSE, include=FALSE} # read in merged objects so_merged <- readRDS(mergedObj) @@ -314,7 +304,7 @@ for (i in 1:length(objList)) { # set coluumn resolution name slm_name <- paste0("SCT_snn_res.", res) - # if the resolution column is not avaialble, create this column + # if the resolution column is not available, create this column if ((slm_name %in% colnames(obj@meta.data)) == FALSE) { print(paste0("----Missing SCT_res column ", resMod, "; creating now")) obj <- FindClusters(obj, algorithm = 3, resolution = resMod) @@ -375,6 +365,34 @@ for (i in 1:length(objList)) { fileName <- paste0(tmp_images, "/clusters_", objList[[i]], "_res_", res, ".png") ggsave(fileName, clusterPlot) + # create silhouette plots + fileName <- paste0(tmp_images, "/silhouette_", objList[[i]], "_res_", res, ".png") + png(fileName) + plot(sil, + col = sil.cols, + border = sil.cols, + main = paste0(objList[[i]], " clustering | resolution: ", resMod), + lty = 2, + sub = paste( + "Average silhouette width:", + format(round(mean(sil[, 3]), 4), nsmall = 4) + ) + ) + abline(v = mean(sil[, 3]), col = "red4", lty = 2) + dev.off() + + # create cluster plots + clusterPlot <- DimPlot(obj, + group.by = slm_name, + label = T, repel = T, + order = sort(as.numeric(as.character(unique(obj[[slm_name]][, 1]))), + decreasing = T + ) + ) + + ggtitle(paste0(objList[[i]], " clusters | resolution: ", resMod)) + fileName <- paste0(tmp_images, "/clusters_", objList[[i]], "_res_", res, ".png") + ggsave(fileName, clusterPlot) + # Store resolution vs average silhouette score resSil_mat[k, 2] <- format(round(mean(sil[, 3]), 4), nsmall = 4) k <- k + 1 diff --git a/bin/batch_correction_liger.Rmd b/bin/batch_correction_liger.Rmd index 7df5d5d2..98e07eb8 100755 --- a/bin/batch_correction_liger.Rmd +++ b/bin/batch_correction_liger.Rmd @@ -8,18 +8,18 @@ editor_options: params: species: "hg38" gid: "group1-group2" - mergedObj: "/data/sevillas2/sinclair/dev/results/seurat/merge/group1-group2_seurat_merged.rds" + mergedObj: "results/seurat/merge/group1-group2_seurat_merged.rds" resolution_list: "0.1,0.2,0.3,0.5,0.6,0.8,1" npcs: 50 vars_to_regress: NULL - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8/" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + scRNA_functions: "bin/scRNA_functions.R" + celldex_cache: NULL testing: "Y" --- -```{r, prep_args, message=FALSE} +```{r setup, message=FALSE} +options(future.globals.maxSize = 96000 * 1024^2) # set up params species <- params$species gid <- params$gid @@ -32,33 +32,14 @@ vars_to_regress_list <- if (is.null(vars_to_regress) || toupper(vars_to_regress) } else { unlist(strsplit(gsub(" ", "", vars_to_regress), ",")) } -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config -testing <- params$testing -scRNA_functions <- params$scRNA_functions -``` - -```{r, handle_pkg, message=FALSE} -# source functions -source(scRNA_functions) - -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -.libPaths() - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, liger == "Y") -pkg_df +celldex_cache <- params$celldex_cache -# for each package check installation, if present then load library -scRNA_handle_packages(pkg_df) +source(params$scRNA_functions) +library(Seurat) +library(SeuratWrappers) +library(SingleR) +library(rliger) -# additional options -# tinytex::install_tinytex(force = TRUE) options(future.globals.maxSize = 96000 * 1024^2) ``` @@ -74,7 +55,7 @@ so[["RNA"]] <- split(so[["RNA"]], f = so$Sample) # integrate so_corrected <- MAIN_BATCH_CORRECTION(so, npcs, species, resolution, method_in = "LIGER", reduction_in = "iNMF", - v_list = vars_to_regress_list + v_list = vars_to_regress_list, cache_path = celldex_cache ) ``` diff --git a/bin/batch_correction_rpca.Rmd b/bin/batch_correction_rpca.Rmd index 7d15127e..e2d50780 100755 --- a/bin/batch_correction_rpca.Rmd +++ b/bin/batch_correction_rpca.Rmd @@ -8,17 +8,17 @@ editor_options: params: species: "hg38" gid: "group1_group2" - mergedObj: "/data/sevillas2/sinclair/dev/results/seurat/merge/group1-group2_seurat_merged.rds" + mergedObj: "results/seurat/merge/group1-group2_seurat_merged.rds" resolution_list: "0.1,0.2,0.3,0.5,0.6,0.8,1" npcs: "50" vars_to_regress: NULL - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8/" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + scRNA_functions: "bin/scRNA_functions.R" + celldex_cache: NULL testing: "Y" --- -```{r, prep_args, message=FALSE, echo=FALSE, include=FALSE} +```{r setup, message=FALSE, echo=FALSE, include=FALSE} +options(future.globals.maxSize = 96000 * 1024^2) # set up params species <- params$species gid <- params$gid @@ -31,32 +31,19 @@ vars_to_regress_list <- if (is.null(vars_to_regress) || toupper(vars_to_regress) } else { unlist(strsplit(gsub(" ", "", vars_to_regress), ",")) } -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config -scRNA_functions <- params$scRNA_functions -``` - -```{r, handle_pkg, message=FALSE} -# source functions -source(scRNA_functions) - -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -.libPaths() - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, rpca == "Y") -pkg_df +celldex_cache <- params$celldex_cache -# for each package check installation, if present then load library -scRNA_handle_packages(pkg_df) +source(params$scRNA_functions) +library(Seurat) +library(SeuratWrappers) +library(SingleR) +library(tinytex) +library(AnnotationDbi) +library(org.Hs.eg.db) +library(org.Mm.eg.db) +library(Orthology.eg.db) +library(harmony) -# additional options -# tinytex::install_tinytex(force = TRUE) options(future.globals.maxSize = 96000 * 1024^2) ``` @@ -72,7 +59,7 @@ so[["RNA"]] <- split(so[["RNA"]], f = so$Sample) # integrate so_corrected <- MAIN_BATCH_CORRECTION(so, npcs, species, resolution, method_in = "RPCAIntegration", reduction_in = "RPCA", - v_list = vars_to_regress_list + v_list = vars_to_regress_list, cache_path = celldex_cache ) ``` diff --git a/bin/batch_correction_scvi.Rmd b/bin/batch_correction_scvi.Rmd index d1e4a315..0b44b5c5 100755 --- a/bin/batch_correction_scvi.Rmd +++ b/bin/batch_correction_scvi.Rmd @@ -8,19 +8,16 @@ editor_options: params: species: "hg38" gid: "group1_group2" - mergedObj: "/data/sevillas2/sinclair/dev/results/seurat/merge/group1-group2_seurat_merged.rds" resolution_list: "0.1,0.2,0.3,0.5,0.6,0.8,1" npcs: "50" vars_to_regress: NULL - python_path: "/data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env/bin" - conda_path: "/data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env" - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8/" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + celldex_cache: NULL testing: "Y" + scRNA_functions: "bin/scRNA_functions.R" --- -```{r, prep_args, message=FALSE, echo=FALSE, include=FALSE} +```{r setup, message=FALSE, echo=FALSE, include=FALSE} +options(future.globals.maxSize = 96000 * 1024^2) # set up params species <- params$species gid <- params$gid @@ -33,36 +30,13 @@ vars_to_regress_list <- if (is.null(vars_to_regress) || toupper(vars_to_regress) } else { unlist(strsplit(gsub(" ", "", vars_to_regress), ",")) } -python_path <- params$python_path -conda_path <- params$conda_path -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config -scRNA_functions <- params$scRNA_functions -``` - -```{r, handle_pkg, message=FALSE, echo=FALSE, include=FALSE} -# source functions -source(scRNA_functions) - -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -.libPaths() - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, scvi == "Y") -pkg_df +celldex_cache <- params$celldex_cache -# for each package check installation, if present then load library -scRNA_handle_packages(pkg_df) -reticulate::use_condaenv(conda_path) -reticulate::use_python(python_path) +source(params$scRNA_functions) +library(Seurat) +library(SeuratWrappers) +library(SingleR) -# additional options -# tinytex::install_tinytex(force = TRUE) options(future.globals.maxSize = 96000 * 1024^2) ``` @@ -79,7 +53,7 @@ so[["RNA"]] <- split(so[["RNA"]], f = so$Sample) so_corrected <- MAIN_BATCH_CORRECTION(so, npcs, species, resolution, method_in = "scVIIntegration", reduction_in = "integrated.scvi", v_list = vars_to_regress_list, - conda_env = conda_path + cache_path = celldex_cache ) so_corrected ``` diff --git a/bin/install_biowulf.sh b/bin/install_biowulf.sh deleted file mode 100755 index b08d718a..00000000 --- a/bin/install_biowulf.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Usage: -# bin/install_biowulf.sh -set -euxo pipefail - -repo_path=/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/ -version=`cat ${repo_path}/VERSION` -install_path=/data/CCBR_Pipeliner/Pipelines/SINCLAIR/.${version} -bin_path=${install_path}/bin/ - -. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" -conda activate py311 - -# remove artifacts from prior builds -pushd ${repo_path} -rm -rf build/ *.egg-info -popd - -echo "Installing SINCLAIR to ${install_path}" -pip install ${repo_path} --target ${install_path} --upgrade -chmod a+rx ${install_path}/sinclair/bin/*.* -chmod -R a+r ${install_path} - -if [[ ":$PATH:" != *":${bin_path}:"* ]];then - export PATH="${PATH}:${bin_path}" -fi - -if [[ ":$PYTHONPATH:" != *":${install_path}:"* ]];then - export PYTHONPATH="${PYTHONPATH}:${install_path}" -fi diff --git a/bin/packageDownload.R b/bin/packageDownload.R deleted file mode 100755 index d05b0f5c..00000000 --- a/bin/packageDownload.R +++ /dev/null @@ -1,25 +0,0 @@ -install.packages("remotes", "devtools", "BiocManager", "pak") -devtools::install_github("satijalab/seurat", ref = "seurat5") -library(pak) -seuratPkgList <- gsub(".*/", "", pak::pkg_deps("Seurat")$ref) -seuratPkgList <- seuratPkgList[-which(SeuratPkgList == "Seurat")] -install.packages(seuratPkgList) -install.packages("fastDummies", "tinytex") -BiocManager::install(c( - "AnnotationDbi", "AnnotationFilter", "AnnotationHub", "beachmat", - "Biobase", "BiocFileCache", "BiocIO", "BiocNeighbors", "BiocParallel", "BiocSingular", "BiocVersion", "Biostrings", "BiocGenerics", - "biomaRt", "bit", "bit64", "blob", "celldex", "DBI", "dbplyr", "DelayedArray", "DelayedMatrixStats", "dotCall64", "DT", - "ensembldb", "ExperimentHub", "fields", "filelock", "flexmix", "formatR", "futile.logger", "futile.options", - "GenomeInfoDb", "GenomeInfoDbData", "GenomicAlignments", "GenomicFeatures", "GenomicRanges", "hdf5r", "hms", - "interactiveDisplayBase", "IRanges", "KEGGREST", "lambda.r", "maps", "MatrixGenerics", "modeltools", "nnet", - "Orthology.eg.db", "plogr", "prettyunits", "progress", "ProtGenerics", "RcppHNSW", "RCurl", "restfulr", "Rhtslib", - "rjson", "R.methodsS3", "R.oo", "Routliers", "Rsamtools", "RSpectra", "RSQLite", "rsvd", "rtracklayer", "R.utils", - "S4Arrays", "S4Vectors", "ScaledMatrix", "scRNASeq", "SeuratWrappers", "SingleCellExperiment", "SingleR", - "snow", "spam", "sparseMatrixStats", "SummarizedExperiment", "viridis", "XML", "xml2", "XVector", "zlibbioc" -)) -devtools::install_github("chris-mcginnis-ucsf/DoubletFinder") -devtools::install_github("satijalab/seurat-wrappers", ref = "seurat5") -devtools::install_github("satijalab/seurat-data", ref = "seurat5") -devtools::install_github("satijalab/azimuth", ref = "seurat5") -devtools::install_github("stuart-lab/signac", ref = "seurat5") -devtools::install_github("immunogenomics/harmony") diff --git a/bin/scRNA_functions.R b/bin/scRNA_functions.R index 344a2862..491a0dc5 100755 --- a/bin/scRNA_functions.R +++ b/bin/scRNA_functions.R @@ -1,34 +1,4 @@ -################################################################## -# Handle packages -################################################################## -scRNA_handle_packages <- function(pkg_df) { - for (rowid in rownames(pkg_df)) { - pkg <- pkg_df[rowid, "package"] - source <- pkg_df[rowid, "source"] - version <- pkg_df[rowid, "version"] - gh_name <- pkg_df[rowid, "gh_name"] - - need_install <- pkg[!(pkg %in% installed.packages()[, "Package"])] - if (length(need_install) != 0) { - print(paste0("Installing: ", pkg)) - if (source == "bc") BiocManager::install(pkg, ask = FALSE, update = FALSE) - if (source == "cr") { - install.packages(pkg, - version = version, repos = "http://cran.us.r-project.org", - local = FALSE, ask = FALSE, update = FALSE - ) - } - if (source == "gh") remotes::install_github(gh_name, version = version, local = FALSE, update = FALSE) - } - - print(paste0("Loading: ", pkg)) - invisible(lapply(pkg, library, character.only = TRUE)) - } -} - -################################################################## -# Seurat Pre-processing -################################################################## +#' Seurat pre-processing SEURAT_CLUSTERING <- function(so_in, npcs_in) { # Runs Principal Component Analysis, FindNeighbors, clustering with the Smart Local Moving algorithm, and UMAP dimensionality reduction so <- RunPCA( @@ -50,38 +20,7 @@ CONVERT_TO_HUMAN_GENELIST <- function(gns) { return(as.character(unlist(mapped$MUS))) } -MAIN_PROCESS_SO <- function(so_in, species, npcs_in) { - # assign genes depending on species input - if (species == "hg38" || species == "hg19") { - print("--proccesing human data") - s.genes <- cc.genes$s.genes - g2m.genes <- cc.genes$g2m.genes - } else if (species == "mm10") { - print("--proccesing mouse data") - s.genes <- CONVERT_TO_HUMAN_GENELIST(cc.genes$s.genes) - g2m.genes <- CONVERT_TO_HUMAN_GENELIST(cc.genes$g2m.genes) - } - # process - so_1 <- NormalizeData(so_in, - normalization.method = "LogNormalize", - scale.factor = 10000, - assay = "RNA" - ) - so_2 <- ScaleData(so_1, assay = "RNA") - so_3 <- CellCycleScoring(so_2, - s.features = s.genes, - g2m.features = g2m.genes, - set.ident = TRUE - ) - so_4 <- SCTransform(so_3) - so_out <- SEURAT_CLUSTERING(so_4, npcs_in) - return(so_out) -} - -################################################################## -# -################################################################## RUN_SINGLEr <- function(obj, refFile, fineORmain) { obj <- DietSeurat(obj, graphs = "umap") sce <- as.SingleCellExperiment(obj, assay = "SCT") @@ -90,39 +29,58 @@ RUN_SINGLEr <- function(obj, refFile, fineORmain) { return(s$pruned.labels) } -MAIN_SINGLER <- function(so_in, species) { +fetch_celldex_ref <- function(ref_name) { + ref <- switch(ref_name, + "hpca" = , + "HumanPrimaryCellAtlasData" = celldex::fetchReference("hpca", version = "2024-02-26", realize.assays = TRUE, cache = "./"), + "blueprint_encode" = , + "BP_encode" = , + "bpencode" = , + "BlueprintEncodeData" = celldex::fetchReference("blueprint_encode", "2024-02-26", realize.assays = TRUE, cache = "./"), + "monaco" = , + "MonacoImmuneData" = celldex::fetchReference("monaco_immune", "2024-02-26", realize.assays = TRUE, cache = "./"), + "immu_cell_exp" = , + "DatabaseImmuneCellExpressionData" = , + "dice" = celldex::fetchReference("dice", "2024-02-26", realize.assays = TRUE, cache = "./"), + "immgen" = , + "ImmGenData" = celldex::fetchReference("immgen", "2024-02-26", realize.assays = TRUE, cache = "./"), + "mouseRNAseq" = , + "MouseRNAseqData" = celldex::fetchReference("mouse_rnaseq", "2024-02-26", realize.assays = TRUE, cache = "./") + ) + return(ref) +} + +MAIN_SINGLER <- function(so_in, species, cache_path = NULL) { if (species == "hg38" || species == "hg19") { - so_in$HPCA_main <- RUN_SINGLEr(so_in, celldex::HumanPrimaryCellAtlasData(), "label.main") - so_in$HPCA <- RUN_SINGLEr(so_in, celldex::HumanPrimaryCellAtlasData(), "label.fine") - so_in$BP_encode_main <- RUN_SINGLEr(so_in, celldex::BlueprintEncodeData(), "label.main") - so_in$BP_encode <- RUN_SINGLEr(so_in, celldex::BlueprintEncodeData(), "label.fine") - so_in$monaco_main <- RUN_SINGLEr(so_in, celldex::MonacoImmuneData(), "label.main") - so_in$monaco <- RUN_SINGLEr(so_in, celldex::MonacoImmuneData(), "label.fine") + so_in$HPCA_main <- RUN_SINGLEr(so_in, fetch_celldex_ref("hpca"), "label.main") + so_in$HPCA <- RUN_SINGLEr(so_in, fetch_celldex_ref("hpca"), "label.fine") + so_in$BP_encode_main <- RUN_SINGLEr(so_in, fetch_celldex_ref("BP_encode"), "label.main") + so_in$BP_encode <- RUN_SINGLEr(so_in, fetch_celldex_ref("BP_encode"), "label.fine") + so_in$monaco_main <- RUN_SINGLEr(so_in, fetch_celldex_ref("monaco"), "label.main") + so_in$monaco <- RUN_SINGLEr(so_in, fetch_celldex_ref("monaco"), "label.fine") so_in$immu_cell_exp_main <- RUN_SINGLEr( - so_in, celldex::DatabaseImmuneCellExpressionData(), + so_in, fetch_celldex_ref("dice"), "label.main" ) so_in$immu_cell_exp <- RUN_SINGLEr( - so_in, celldex::DatabaseImmuneCellExpressionData(), + so_in, fetch_celldex_ref("dice"), "label.fine" ) so_in$annot <- so_in$HPCA_main } else if (species == "mm10") { - so_in$immgen_main <- RUN_SINGLEr(so_in, celldex::ImmGenData(), "label.main") - so_in$immgen <- RUN_SINGLEr(so_in, celldex::ImmGenData(), "label.fine") - so_in$mouseRNAseq_main <- RUN_SINGLEr(so_in, celldex::MouseRNAseqData(), "label.main") - so_in$mouseRNAseq <- RUN_SINGLEr(so_in, celldex::MouseRNAseqData(), "label.fine") + so_in$immgen_main <- RUN_SINGLEr(so_in, fetch_celldex_ref("immgen"), "label.main") + so_in$immgen <- RUN_SINGLEr(so_in, fetch_celldex_ref("immgen"), "label.fine") + so_in$mouseRNAseq_main <- RUN_SINGLEr(so_in, fetch_celldex_ref("mouseRNAseq"), "label.main") + so_in$mouseRNAseq <- RUN_SINGLEr(so_in, fetch_celldex_ref("mouseRNAseq"), "label.fine") so_in$annot <- so_in$immgen_main } return(so_in) } -################################################################## -# -################################################################## + MAIN_DOUBLETS <- function(so_in, run_doublet_finder) { if (run_doublet_finder == "Y") { - sweep.res.list_kidney <- paramSweep_v3(so_in, PCs = 1:10, sct = T) + sweep.res.list_kidney <- paramSweep(so_in, PCs = 1:10, sct = TRUE) sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE) bcmvn_kidney <- find.pK(sweep.stats_kidney) @@ -133,14 +91,16 @@ MAIN_DOUBLETS <- function(so_in, run_doublet_finder) { nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop)) ## Run DoubletFinder with varying classification stringencies - dfso <- doubletFinder_v3(so_in, + dfso <- doubletFinder(so_in, pN = 0.25, pK = 0.09, nExp = nExp_poi, - reuse.pANN = FALSE, PCs = 1:10, sct = T + reuse.pANN = NULL, # https://github.com/chris-mcginnis-ucsf/DoubletFinder/issues/244 + PCs = 1:10, + sct = TRUE ) pAAN <- tail(names(dfso@meta.data), 2)[1] - dfso <- doubletFinder_v3(dfso, + dfso <- doubletFinder(dfso, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = pAAN, PCs = 1:10, sct = T @@ -152,9 +112,8 @@ MAIN_DOUBLETS <- function(so_in, run_doublet_finder) { return(so_in) } -################################################################## -# run batch corrections -################################################################## + +#' run batch corrections RUN_SINGLEr_AVERAGE <- function(obj, refFile, fineORmain) { avg <- AverageExpression(obj, assays = "SCT") avg <- as.data.frame(avg) @@ -164,13 +123,16 @@ RUN_SINGLEr_AVERAGE <- function(obj, refFile, fineORmain) { clustAnnot <- s$labels names(clustAnnot) <- colnames(avg) names(clustAnnot) <- gsub("SCT.", "", names(clustAnnot)) + names(clustAnnot) <- gsub("^g", "", names(clustAnnot)) annotVect <- clustAnnot[match(obj$seurat_clusters, names(clustAnnot))] names(annotVect) <- colnames(obj) return(annotVect) } -MAIN_BATCH_CORRECTION <- function(so_in, npcs, species, resolution_list, method_in, reduction_in, v_list = NULL, conda_env = "") { +#' batch correction function used in multiple rmarkdown notebooks + +MAIN_BATCH_CORRECTION <- function(so_in, npcs, species, resolution_list, method_in, reduction_in, v_list = NULL, cache_path = NULL) { # set assay to RNA to avoid double transform/norm DefaultAssay(so_in) <- "RNA" @@ -189,7 +151,7 @@ MAIN_BATCH_CORRECTION <- function(so_in, npcs, species, resolution_list, method_ so_integrate <- IntegrateLayers( object = so_pca, method = scVIIntegration, new.reduction = "integrated.scvi", - conda_env = conda_path, dims = 1:npcs + dims = 1:npcs ) } else if (method_in == "LIGER") { print("--running LIGER") @@ -225,28 +187,31 @@ MAIN_BATCH_CORRECTION <- function(so_in, npcs, species, resolution_list, method_ # reduction so <- RunUMAP(so, reduction = reduction_in, dims = 1:npcs) - # relabel + + # relabel with cluster-level annotations (uses averaged expression within each cluster) + if (dir.exists(cache_path)) { + gypsum::cacheDirectory(cache_path) + } + if (species == "hg38" || species == "hg19") { - so$clustAnnot_HPCA_main <- RUN_SINGLEr_AVERAGE(so, celldex::HumanPrimaryCellAtlasData(), "label.main") - so$clustAnnot_HPCA <- RUN_SINGLEr_AVERAGE(so, celldex::HumanPrimaryCellAtlasData(), "label.fine") - so$clustAnnot_BP_encode_main <- RUN_SINGLEr_AVERAGE(so, celldex::BlueprintEncodeData(), "label.main") - so$clustAnnot_BP_encode <- RUN_SINGLEr_AVERAGE(so, celldex::BlueprintEncodeData(), "label.fine") - so$clustAnnot_monaco_main <- RUN_SINGLEr_AVERAGE(so, celldex::MonacoImmuneData(), "label.main") - so$clustAnnot_monaco <- RUN_SINGLEr_AVERAGE(so, celldex::MonacoImmuneData(), "label.fine") - so$clustAnnot_immu_cell_exp_main <- RUN_SINGLEr_AVERAGE(so, celldex::DatabaseImmuneCellExpressionData(), "label.main") - so$clustAnnot_immu_cell_exp <- RUN_SINGLEr_AVERAGE(so, celldex::DatabaseImmuneCellExpressionData(), "label.fine") + so$clustAnnot_HPCA_main <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("hpca"), "label.main") + so$clustAnnot_HPCA <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("hpca"), "label.fine") + so$clustAnnot_BP_encode_main <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("BP_encode"), "label.main") + so$clustAnnot_BP_encode <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("BP_encode"), "label.fine") + so$clustAnnot_monaco_main <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("monaco"), "label.main") + so$clustAnnot_monaco <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("monaco"), "label.fine") + so$clustAnnot_immu_cell_exp_main <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("dice"), "label.main") + so$clustAnnot_immu_cell_exp <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("dice"), "label.fine") } else if (species == "mm10") { - so$clustAnnot_immgen_main <- RUN_SINGLEr_AVERAGE(so, celldex::ImmGenData(), "label.main") - so$clustAnnot_immgen <- RUN_SINGLEr_AVERAGE(so, celldex::ImmGenData(), "label.fine") - so$clustAnnot_mouseRNAseq_main <- RUN_SINGLEr_AVERAGE(so, celldex::MouseRNAseqData(), "label.main") - so$clustAnnot_mouseRNAseq <- RUN_SINGLEr_AVERAGE(so, celldex::MouseRNAseqData(), "label.fine") + so$clustAnnot_immgen_main <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("immgen"), "label.main") + so$clustAnnot_immgen <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("immgen"), "label.fine") + so$clustAnnot_mouseRNAseq_main <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("mouseRNAseq"), "label.main") + so$clustAnnot_mouseRNAseq <- RUN_SINGLEr_AVERAGE(so, fetch_celldex_ref("mouseRNAseq"), "label.fine") } return(so) } -################################################################## -# Integration Report Functions -################################################################## +#' Integration Report Functions OBJECT_SELECT <- function(id) { obj <- switch(id, "merged" = so_merged, diff --git a/bin/scvi.R b/bin/scvi.R deleted file mode 100755 index 5039c414..00000000 --- a/bin/scvi.R +++ /dev/null @@ -1,29 +0,0 @@ -.libPaths(c("/data/CCBR_Pipeliner/db/PipeDB/scrna5", .libPaths())) - - -library(Seurat) -library(stringr) -library(reticulate) -library(SingleR) -library(scRNAseq) -library(SingleCellExperiment) -library(celldex) -library(Orthology.eg.db) -library(org.Mm.eg.db) -library(org.Hs.eg.db) -library(SeuratWrappers) - -source("source.R") -args <- commandArgs(trailingOnly = T) - - -so <- as.character(args[1]) -outDirSeurat <- as.character(args[2]) -ref <- as.character(args[3]) -pcs <- as.character(args[4]) -resolutions <- as.character(strsplit(gsub(",+", ",", as.character(args[5])), split = ",")[[1]]) -resolutions <- as.numeric(resolutions) -so <- readRDS(so) - - -saveRDS(so, outDirSeurat) diff --git a/bin/seurat_merge.Rmd b/bin/seurat_merge.Rmd index 49e52123..52caba85 100755 --- a/bin/seurat_merge.Rmd +++ b/bin/seurat_merge.Rmd @@ -9,16 +9,15 @@ params: species: "hg38" npcs: 50 vars_to_regress: NULL - samplesheet: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/assets/input_manifest_cellranger.csv" - rdsFiles: "/data/sevillas2/sinclair/dev/results/seurat/preprocess/sample2_seurat_preprocess.rds /data/sevillas2/sinclair/dev/results/seurat/preprocess/sample4_seurat_preprocess.rds" + samplesheet: "assets/input_manifest_cellranger.csv" + rdsFiles: "results/seurat/preprocess/sample2_seurat_preprocess.rds results/seurat/preprocess/sample4_seurat_preprocess.rds" gid: "group1-group2" - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8/" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + scRNA_functions: "bin/scRNA_functions.R" testing: "Y" --- -```{r, prep_args, message=FALSE, include=FALSE} +```{r setup, message=FALSE, include=FALSE} +options(future.globals.maxSize = 1e12) # set up params species <- params$species vars_to_regress <- params$vars_to_regress @@ -31,61 +30,25 @@ rds_files <- strsplit(params$rdsFiles, " ")[[1]] gid <- params$gid samplesheet <- params$samplesheet npcs <- as.numeric(params$npcs) - -scRNA_functions <- params$scRNA_functions -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config testing <- params$testing print("These variables are used:") print(paste0("species:", species)) print(paste0("vars_to_regress_list:", vars_to_regress_list)) print(paste0("gid:", gid)) -``` - -```{r, handle_pkg, message=FALSE, include=FALSE} -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -message(.libPaths()) - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, merge == "Y") -pkg_df - -# for each package check installation, if present then load library -for (rowid in rownames(pkg_df)) { - pkg <- pkg_df[rowid, "package"] - source <- pkg_df[rowid, "source"] - version <- pkg_df[rowid, "version"] - gh_name <- pkg_df[rowid, "gh_name"] - need_install <- pkg[!(pkg %in% installed.packages()[, "Package"])] - if (length(need_install) != 0) { - print(paste0("Installing: ", pkg)) - if (source == "bc") BiocManager::install(pkg) - if (source == "cr") { - install.packages(pkg, - version = version, repos = "http://cran.us.r-project.org", - local = FALSE - ) - } - if (source == "gh") remotes::install_github(gh_name, version = version, local = FALSE) - } - - print(paste0("Loading: ", pkg)) - invisible(lapply(pkg, library, character.only = TRUE)) -} - -# source functions -source(scRNA_functions) - -# additional options -# tinytex::install_tinytex(force = TRUE) -options(future.globals.maxSize = 1e12) +library(Seurat) +library(SingleCellExperiment) +library(SingleR) +library(celldex) +library(tinytex) +library(stringr) +library(scRNAseq) +library(Orthology.eg.db) +library(AnnotationDbi) +library(org.Hs.eg.db) +library(org.Mm.eg.db) +library(Orthology.eg.db) ``` ```{r, processing, message=FALSE} diff --git a/bin/seurat_preprocess.Rmd b/bin/seurat_preprocess.Rmd index 76fdd91f..f1485492 100755 --- a/bin/seurat_preprocess.Rmd +++ b/bin/seurat_preprocess.Rmd @@ -2,13 +2,13 @@ title: "scRNA Notebook" output: html_document: - toc: yes + toc: true editor_options: chunk_output_type: console params: species: "hg19" sampleid: "WB_Lysis_1" - h5: "/data/sevillas2/scRNA_test/cellranger_counts/sample1/outs/filtered_feature_bc_matrix.h5" + h5: "cellranger_counts/sample1/outs/filtered_feature_bc_matrix.h5" qc_filtering: "manual" nCount_RNA_max: 500000 nCount_RNA_min: 1000 @@ -18,13 +18,18 @@ params: percent_mt_min: 0 run_doublet_finder: "N" npcs: 30 - Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8/" - Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/conf/Rpack.config" - scRNA_functions: "/data/CCBR_Pipeliner/Pipelines/SINCLAIR/dev/bin/scRNA_functions.R" + scRNA_functions: "bin/scRNA_functions.R" + celldex_cache: NULL testing: "N" --- -```{r, prep_args, message=FALSE} +```{r setup} +options(rlang_trace_top_env = rlang::current_env()) +options(error = function() { + sink() + print(rlang::trace_back(bottom = sys.frame(-1)), simplify = "none") +}) +knitr::opts_chunk$set(message = FALSE) # set up params species <- params$species sampleid <- params$sampleid @@ -39,56 +44,30 @@ percent_mt_max <- as.numeric(params$percent_mt_max) percent_mt_min <- as.numeric(params$percent_mt_min) run_doublet_finder <- params$run_doublet_finder # Y, N npcs_val <- as.numeric(params$npcs) - -Rlib_dir <- params$Rlib_dir -Rpkg_config <- params$Rpkg_config -scRNA_functions <- params$scRNA_functions - +celldex_cache <- params$celldex_cache testing <- params$testing -``` - -```{r, handle_pkg, message=FALSE} -# set library dir, load this and remove any other dirs to avoid confusion -# between personally created pkgs and the pipeline package -## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library" -print(paste0("Using the lib.loc location: ", Rlib_dir)) -assign(".lib.loc", Rlib_dir, envir = environment(.libPaths)) -.libPaths() - -# read in package info -pkg_df <- read.csv(Rpkg_config) -pkg_df <- subset(pkg_df, preprocess == "Y") -pkg_df - -# for each package check installation, if present then load library -for (rowid in rownames(pkg_df)) { - pkg <- pkg_df[rowid, "package"] - source <- pkg_df[rowid, "source"] - version <- pkg_df[rowid, "version"] - gh_name <- pkg_df[rowid, "gh_name"] - - need_install <- pkg[!(pkg %in% installed.packages()[, "Package"])] - if (length(need_install) != 0) { - print(paste0("Installing: ", pkg)) - if (source == "bc") BiocManager::install(pkg) - if (source == "cr") { - install.packages(pkg, - version = version, repos = "http://cran.us.r-project.org", - local = FALSE - ) - } - if (source == "gh") remotes::install_github(gh_name, version = version, local = FALSE) - } - - print(paste0("Loading: ", pkg)) - invisible(lapply(pkg, library, character.only = TRUE)) -} - -# source functions -source(scRNA_functions) -# additional options -# tinytex::install_tinytex(force = TRUE) +source(params$scRNA_functions) +library(AnnotationDbi) +library(celldex) +library(cluster) +library(DoubletFinder) +library(farver) +library(flexmix) +library(GenomeInfoDbData) +library(ggplot2) +library(hdf5r) +library(org.Hs.eg.db) +library(org.Mm.eg.db) +library(Orthology.eg.db) +library(Routliers) +library(RSQLite) +library(R.utils) +library(Seurat) +library(SeuratWrappers) +library(SingleCellExperiment) +library(SingleR) +library(tinytex) ``` ```{r, pre-processing, message=FALSE} @@ -116,7 +95,7 @@ FeatureScatter(so, feature1 = "nFeature_RNA", feature2 = "percent.mt") VlnPlot(so, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) ``` -```{r, QC, message=FALSE} +```{r QC} # subset so_filt <- subset(so, subset = nFeature_RNA > 200) so_filt @@ -132,7 +111,7 @@ so_qc <- RunMiQC(so_filt, # ggplot2::scale_color_gradient(low = "grey", high = "purple") ``` -```{r qc2, message=FALSE} +```{r qc2} # define feature info nCount_out <- outliers_mad(so_qc$nCount_RNA, threshold = 3)$LL_CI_MAD nFeature_out <- outliers_mad(so_qc$nFeature_RNA, threshold = 3)$LL_CI_MAD @@ -158,18 +137,51 @@ if (qc_filtering == "manual") { so_qc_select VlnPlot(so_qc_select, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) +``` + +```{r process} +MAIN_PROCESS_SO <- function(so_in, species, npcs_in) { + # assign genes depending on species input + if (species == "hg38" || species == "hg19") { + print("--proccesing human data") + s.genes <- cc.genes$s.genes + g2m.genes <- cc.genes$g2m.genes + } else if (species == "mm10") { + print("--proccesing mouse data") + s.genes <- CONVERT_TO_HUMAN_GENELIST(cc.genes$s.genes) + g2m.genes <- CONVERT_TO_HUMAN_GENELIST(cc.genes$g2m.genes) + } + + # process + so_1 <- NormalizeData(so_in, + normalization.method = "LogNormalize", + scale.factor = 10000, + assay = "RNA" + ) + so_2 <- ScaleData(so_1, assay = "RNA") + so_3 <- CellCycleScoring(so_2, + s.features = s.genes, + g2m.features = g2m.genes, + set.ident = TRUE + ) + so_4 <- SCTransform(so_3) + so_out <- SEURAT_CLUSTERING(so_4, npcs_in) + return(so_out) +} # Process data so_processed <- MAIN_PROCESS_SO(so_qc_select, species, npcs_val) +``` +```{r label} # Label data -so_labeled <- MAIN_SINGLER(so_processed, species) +so_labeled <- MAIN_SINGLER(so_processed, species, celldex_cache) # ID doublets so_doublet <- MAIN_DOUBLETS(so_labeled, run_doublet_finder) ``` -```{r, save, message=FALSE} +```{r, save} # clean final object so_output <- UpdateSeuratObject(so_doublet) so_output diff --git a/bin/source.R b/bin/source.R deleted file mode 100755 index dbe23838..00000000 --- a/bin/source.R +++ /dev/null @@ -1,82 +0,0 @@ -convertHumanGeneList <- function(gns) { - egs <- mapIds(org.Hs.eg.db, gns, "ENTREZID", "SYMBOL") - mapped <- select(Orthology.eg.db, egs, "Mus.musculus", "Homo.sapiens") - mapped$MUS <- mapIds(org.Mm.eg.db, as.character(mapped$Mus.musculus), "SYMBOL", "ENTREZID") - return(as.character(unlist(mapped$MUS))) -} - - -seuratSample <- function(so, npcs) { - so <- NormalizeData(so, normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA") - so <- ScaleData(so, assay = "RNA") - - so <- CellCycleScoring(so, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE) - - so <- SCTransform(so) - - so <- RunPCA(object = so, features = VariableFeatures(object = so), verbose = F, npcs = 50) - - so <- FindNeighbors(so, dims = 1:npcs) - so <- FindClusters(so, dims = 1:npcs, print.output = 0, resolution = 0.8, algorithm = 3) - so <- RunUMAP(so, dims = 1:npcs, n.components = 3) - - return(so) -} - - -runSingleRCell <- function(obj, refFile, fineORmain) { - obj <- DietSeurat(obj, graphs = "umap") - sce <- as.SingleCellExperiment(obj, assay = "SCT") - ref <- refFile - s <- SingleR(test = sce, ref = ref, labels = ref[[fineORmain]]) - return(s$pruned.labels) -} - - -doublets <- function(dfso) { - sweep.res.list_kidney <- paramSweep_v3(dfso, PCs = 1:10, sct = T) - sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE) - print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") - bcmvn_kidney <- find.pK(sweep.stats_kidney) - ## pK Identification (ground-truth) ------------------------------------------------------------------------------------------ - - - ## Homotypic Doublet Proportion Estimate ------------------------------------------------------------------------------------- - homotypic.prop <- modelHomotypic(dfso$annot) - perc <- 0.005 * (length(colnames(dfso)) / 1000) - nExp_poi <- round(perc * length(colnames(dfso))) # dfso@cell.names - nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop)) - print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") - ## Run DoubletFinder with varying classification stringencies ---------------------------------------------------------------- - dfso <- doubletFinder_v3(dfso, pN = 0.25, pK = 0.09, nExp = nExp_poi, reuse.pANN = FALSE, PCs = 1:10, sct = T) - pAAN <- tail(names(dfso@meta.data), 2)[1] - dfso <- doubletFinder_v3(dfso, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = pAAN, PCs = 1:10, sct = T) - - return(dfso) -} - -runClustIntegrated <- function(so, npcs, resolutions, reduction) { - so <- RunUMAP(so, reduction = reduction, dims = 1:npcs) - so <- FindNeighbors(so, reduction = reduction, dims = 1:npcs) - - for (res in resolutions) { - so <- FindClusters(so, dims = 1:npcs, resolution = res, algorithm = 3) - } - return(so) -} - -singleRClusters <- function(obj, refFile, fineORmain) { - avg <- AverageExpression(obj, assays = "SCT") - avg <- as.data.frame(avg) - ref <- refFile - s <- SingleR(test = as.matrix(avg), ref = ref, labels = ref[[fineORmain]]) - - clustAnnot <- s$labels - names(clustAnnot) <- colnames(avg) - names(clustAnnot) <- gsub("SCT.", "", names(clustAnnot)) - - clustAnnot <- clustAnnot[match(obj$seurat_clusters, names(clustAnnot))] - names(clustAnnot) <- colnames(obj) - obj$clustAnnot <- clustAnnot - return(obj$clustAnnot) -} diff --git a/conf/Rpack.config b/conf/Rpack.config deleted file mode 100644 index 9715214a..00000000 --- a/conf/Rpack.config +++ /dev/null @@ -1,34 +0,0 @@ -package,source,version,gh_name,preprocess,merge,cca,harmony,rpca,scvi,liger,int -BiocManager,cr,3.17,NA,Y,Y,Y,Y,Y,Y,Y,Y -remotes,cr,2.4.2,NA,Y,Y,Y,Y,Y,Y,Y,Y -Seurat,gh,seurat5,satijalab/seurat,Y,Y,Y,Y,Y,Y,Y,Y -hdf5r,cr,1.3.8,NA,Y,N,N,N,N,N,N,N -R.utils,cr,2.12.2,NA,Y,N,N,N,N,N,N,Y -cluster,cr,2.1.4,NA,Y,N,N,N,N,N,N,Y -SeuratWrappers,gh,seurat5,satijalab/seurat-wrappers,Y,N,N,Y,Y,Y,Y,Y -GenomeInfoDbData,bc,3.17,NA,Y,N,N,N,N,N,N,N -farver,cr,2.1.1,NA,Y,N,N,N,N,N,N,N -SingleCellExperiment,bc,1.22.0,NA,Y,Y,N,N,N,N,N,N -SingleR,bc,2.2.0,NA,Y,Y,Y,Y,Y,Y,Y,N -DoubletFinder,gh,DoubletFinder,chris-mcginnis-ucsf/DoubletFinder,Y,N,N,N,N,N,N,N -flexmix,cr,2.3-19,NA,Y,N,N,N,N,N,N,N -celldex,bc,1.10.1,NA,Y,Y,N,N,N,N,N,N -ggplot2,cr,3.4.2,NA,Y,N,N,N,N,N,N,Y -tinytex,cr,0.45,NA,Y,Y,Y,Y,Y,N,N,N -stringr,cr,1.5.0,NA,N,Y,N,N,N,N,N,N -scRNAseq,bc,2.14.0,NA,N,Y,N,N,N,N,N,N -Orthology.eg.db,bc,3.17.0,NA,N,Y,N,N,N,N,N,N -Routliers,gh,Routliers,mdelacre/Routliers,Y,N,N,N,N,N,N,N -AnnotationDbi,bc,1.62.2,NA,Y,Y,Y,Y,Y,N,N,N -org.Hs.eg.db,bc,3.17.0,NA,Y,Y,Y,Y,Y,N,N,N -org.Mm.eg.db,bc,3.17.0,NA,Y,Y,Y,Y,Y,N,N,N -Orthology.eg.db,bc,3.17.0,NA,Y,Y,Y,Y,Y,N,N,N -harmony,gh,0.1.1,immunogenomics/harmony,N,N,Y,Y,Y,N,N,N -ggpubr,cr,0.6.0,NA,N,N,N,N,N,N,N,Y -DT,cr,0.29,NA,N,N,N,N,N,N,N,Y -reticulate,cr,1.32.0,NA,N,N,N,N,N,N,N,N -scales,cr,1.2.1.0,NA,N,N,N,N,N,N,N,Y -png,cr,0.1-8,NA,N,N,N,N,N,N,N,Y -gridExtra,cr,0.9.1,NA,N,N,N,N,N,N,N,Y -cowplot,cr,1.1.1,NA,N,N,N,N,N,N,N,Y -rliger,gh,v1.1.0,welch-lab/liger,N,N,N,N,N,N,Y,N diff --git a/conf/base_stub.config b/conf/base_stub.config deleted file mode 100644 index e28cc5c6..00000000 --- a/conf/base_stub.config +++ /dev/null @@ -1,36 +0,0 @@ -process { - - // The defaults for all processes - cpus = { check_max( 16 * task.attempt, 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 10.h * task.attempt, 'time' ) } - - errorStrategy = { task.exitStatus in [140,143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_low { - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h , 'time' ) } - } - - withLabel:process_medium { - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h , 'time' ) } - } - - withLabel:process_high { - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h , 'time' ) } - } - - withName:CELLRANGER_COUNT { - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h , 'time' ) } - } -} diff --git a/conf/biowulf.config b/conf/biowulf.config index fb836055..d42a7bfc 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -8,6 +8,8 @@ params { // CCBR shared resource paths genome_dir = "/data/CCBR_Pipeliner/db/PipeDB/cellranger_ref/${params.species}" + // Database path + celldex_path = "/data/CCBR_Pipeliner/db/PipeDB/celldex_cache" /* index_dir = '/data/CCBR_Pipeliner/db/PipeDB/Indices' fastq_screen { diff --git a/conf/modules.config b/conf/modules.config index 9573e6c3..77f673a7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,7 +21,6 @@ process { path: { "${params.outdir}/cellranger_counts/" }, mode: params.publish_dir_mode ] - module = ['cellranger/7.2.0'] } withName: SEURAT_PREPROCESS{ @@ -30,7 +29,6 @@ process { path: { "${params.outdir}/seurat/preprocess" }, mode: params.publish_dir_mode ] - module = ['R/4.3', 'tex'] } withName: SEURAT_MERGE{ @@ -39,7 +37,6 @@ process { path: { "${params.outdir}/seurat/merge" }, mode: params.publish_dir_mode ] - module = ['R/4.3', 'tex'] } withName: BATCH_CORRECT_HARMONY { @@ -48,7 +45,6 @@ process { path: { "${params.outdir}/batch_correct" }, mode: params.publish_dir_mode ] - module = ['R/4.3'] } withName: BATCH_CORRECT_RPCA { @@ -57,7 +53,6 @@ process { path: { "${params.outdir}/batch_correct" }, mode: params.publish_dir_mode ] - module = ['R/4.3'] } withName: BATCH_CORRECT_CCA { @@ -66,7 +61,6 @@ process { path: { "${params.outdir}/batch_correct" }, mode: params.publish_dir_mode ] - module = ['R/4.3'] } // withName: BATCH_CORRECT_SCVI { @@ -84,7 +78,6 @@ process { path: { "${params.outdir}/batch_correct" }, mode: params.publish_dir_mode ] - module = ['R/4.3'] } withName: BATCH_CORRECT_INTEGRATION{ @@ -93,7 +86,6 @@ process { path: { "${params.outdir}/batch_correct" }, mode: params.publish_dir_mode ] - module = ['R/4.3'] } } diff --git a/conf/test.config b/conf/test.config index 4b364ab7..ee418c37 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,6 +16,6 @@ params { input = "${projectDir}/assets/input_manifest.csv" contrast = "${projectDir}/assets/contrast_manifest.csv" - outdir = "${launchDir}/output/tests" + outdir = "${launchDir}/results/tests" } diff --git a/conf/test_h5.config b/conf/test_h5.config index a771cb9b..1f18a9ad 100644 --- a/conf/test_h5.config +++ b/conf/test_h5.config @@ -15,7 +15,7 @@ params { // Input data input = "${projectDir}/assets/input_manifest_cellranger.csv" contrast = "${projectDir}/assets/contrast_manifest.csv" - outdir = "${launchDir}/output/tests" + outdir = "${launchDir}/results/tests" run_cellranger = false } diff --git a/conf/test_pbmc.config b/conf/test_pbmc.config index b3e0f36f..8806be96 100644 --- a/conf/test_pbmc.config +++ b/conf/test_pbmc.config @@ -5,7 +5,7 @@ params { // Input and Output options input = "${projectDir}/tests/test_pbmc/input_manifest_cellranger.csv" contrast = "${projectDir}/tests/test_pbmc/contrast_manifest.csv" - outdir = "${launchDir}/output/test_pbmc" + outdir = "${launchDir}/results/test_pbmc" species = "hg38" run_cellranger = false vars_to_regress = "" // other options include "S.Score,G2M.Score,nCount_RNA" diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 67fdc76e..00000000 --- a/docker/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Containers for SINCLAIR - -Dockerfiles to create containers for processes in this nextflow workflow. -These containers are built and pushed to dockerhub automatically when files in this directory change. - -[![docker](https://github.com/CCBR/SINCLAIR/actions/workflows/docker.yml/badge.svg)](https://github.com/CCBR/SINCLAIR/actions/workflows/docker.yml) diff --git a/docker/baser/Dockerfile b/docker/baser/Dockerfile deleted file mode 100644 index 0b19618e..00000000 --- a/docker/baser/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -FROM nciccbr/ccbr_ubuntu_base_20.04:v7 - -# build time variables -ARG BUILD_DATE="000000" -ENV BUILD_DATE=${BUILD_DATE} -ARG BUILD_TAG="000000" -ENV BUILD_TAG=${BUILD_TAG} -ARG REPONAME="000000" -ENV REPONAME=${REPONAME} - -# install necessary library -COPY install_packages_v1.0.R /opt2 -COPY Rpack_v1.0.yml /opt2 -WORKDIR /opt2 -RUN Rscript install_packages_v1.0.R - -# cleanup etc -# Save Dockerfile in the docker -COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} -RUN chmod a+r /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} -#COPY argparse.bash /opt2 -RUN chmod -R a+rx /opt2/argparse.bash -ENV PATH="/opt2/:$PATH" -RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ - apt-get autoclean && \ - apt-get autoremove -y && \ - rm -rf /var/lib/{apt,dpkg,cache,log}/ -WORKDIR /data2 diff --git a/docker/baser/README.md b/docker/baser/README.md deleted file mode 100644 index f6895a5d..00000000 --- a/docker/baser/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# baser - -Container for baser processes in SINCLAIR - - diff --git a/docker/baser/Rpack_v1.0.config b/docker/baser/Rpack_v1.0.config deleted file mode 100644 index 35429fc2..00000000 --- a/docker/baser/Rpack_v1.0.config +++ /dev/null @@ -1,35 +0,0 @@ -package,source,version,gh_name -BiocManager,cr,3.17,NA -remotes,cr,2.4.2,NA -Seurat,gh,seurat5,satijalab/seurat -hdf5r,cr,1.3.8,NA -R.utils,cr,2.12.2,NA -cluster,cr,2.1.4,NA -SeuratWrappers,gh,seurat5,satijalab/seurat-wrappers -GenomeInfoDbData,bc,3.17,NA -farver,cr,2.1.1,NA -SingleCellExperiment,bc,1.22.0,NA -SingleR,bc,2.2.0,NA -DoubletFinder,gh,DoubletFinder,chris-mcginnis-ucsf/DoubletFinder -flexmix,cr,2.3-19,NA -celldex,bc,1.10.1,NA -ggplot2,cr,3.4.2,NA -tinytex,cr,0.45,NA -stringr,cr,1.5.0,NA -scRNAseq,bc,2.14.0,NA -Orthology.eg.db,bc,3.17.0,NA -Routliers,gh,Routliers,mdelacre/Routliers -AnnotationDbi,bc,1.62.2,NA -org.Hs.eg.db,bc,3.17.0,NA -org.Mm.eg.db,bc,3.17.0,NA -Orthology.eg.db,bc,3.17.0,NA -harmony,gh,0.1.1,immunogenomics/harmony -ggpubr,cr,0.6.0,NA -DT,cr,0.29,NA -reticulate,cr,1.32.0,NA -scales,cr,1.2.1.0,NA -png,cr,0.1-8,NA -grid,cr,3.6.2,NA -gridExtra,cr,0.9.1,NA -cowplot,cr,1.1.1,NA -rliger,gh,v1.1.0,welch-lab/liger diff --git a/docker/baser/install_packages_v1.0.R b/docker/baser/install_packages_v1.0.R deleted file mode 100644 index e8ca6e2d..00000000 --- a/docker/baser/install_packages_v1.0.R +++ /dev/null @@ -1,27 +0,0 @@ -scRNA_handle_packages <- function(pkg_df) { - for (rowid in rownames(pkg_df)) { - pkg <- pkg_df[rowid, "package"] - source <- pkg_df[rowid, "source"] - version <- pkg_df[rowid, "version"] - gh_name <- pkg_df[rowid, "gh_name"] - - need_install <- pkg[!(pkg %in% installed.packages()[, "Package"])] - if (length(need_install) != 0) { - print(paste0("Installing: ", pkg)) - if (source == "bc") BiocManager::install(pkg) - if (source == "cr") { - install.packages(pkg, - version = version, repos = "http://cran.us.r-project.org", - local = FALSE - ) - } - if (source == "gh") remotes::install_github(gh_name, version = version, local = FALSE) - } - } -} - -# read in package info -pkg_df <- read.csv("Rpack_v1.0.config") - -# for each package check installation, if present then load library -scRNA_handle_packages(pkg_df) diff --git a/docker/baser/meta.yml b/docker/baser/meta.yml deleted file mode 100644 index 0b5b1b6c..00000000 --- a/docker/baser/meta.yml +++ /dev/null @@ -1,4 +0,0 @@ -dockerhub_namespace: nciccbr -image_name: sinclair_baser -version: 0.1.0 -container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/scvi/Dockerfile b/docker/scvi/Dockerfile deleted file mode 100644 index 463a59cf..00000000 --- a/docker/scvi/Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -FROM nciccbr/ccbr_ubuntu_base_20.04:v5 - -# build time variables -ARG BUILD_DATE="000000" -ENV BUILD_DATE=${BUILD_DATE} -ARG BUILD_TAG="000000" -ENV BUILD_TAG=${BUILD_TAG} -ARG REPONAME="000000" -ENV REPONAME=${REPONAME} - -# install miniconda -ENV CONDA_DIR /opt2/conda -WORKDIR /opt2 -RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /opt2/miniconda.sh && \ - /bin/bash /opt2/miniconda.sh -b -p /opt2/conda -ENV PATH=$CONDA_DIR/bin:$PATH - -# install environment -RUN conda create -n scvi python=3.9 -RUN . /opt2/conda/etc/profile.d/conda.sh && \ - conda activate scvi && \ - cconda install scvi-tools -c conda-forge -RUN . /opt2/conda/etc/profile.d/conda.sh && \ - conda activate scvi && \ - conda install -c conda-forge r-base r-essentials r-reticulate -WORKDIR /opt2 - -# create wrapper for scvi conda env -COPY scvi /opt2 -RUN chmod a+x /opt2/scvi -ENV PATH=/opt2:$PATH - -# cleanup etc -# Save Dockerfile in the docker -COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} -RUN chmod a+r /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} -RUN chmod -R a+rx /opt2/argparse.bash -ENV PATH="/opt2/:$PATH" -RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ - apt-get autoclean && \ - apt-get autoremove -y && \ - rm -rf /var/lib/{apt,dpkg,cache,log}/ - -COPY Dockerfile /opt2/Dockerfile -RUN chmod -R a+rX /opt2/Dockerfile -WORKDIR /data2 diff --git a/docker/scvi/README.md b/docker/scvi/README.md deleted file mode 100644 index aa50b159..00000000 --- a/docker/scvi/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# scvi - -Container for scvi processes in SINCLAIR - - diff --git a/docker/scvi/meta.yml b/docker/scvi/meta.yml deleted file mode 100644 index 43e1dfc2..00000000 --- a/docker/scvi/meta.yml +++ /dev/null @@ -1,4 +0,0 @@ -dockerhub_namespace: nciccbr -image_name: sinclair_scvi_1.0 -version: 0.1.0 -container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docs/params.md b/docs/params.md index 77a4744a..83db5f05 100644 --- a/docs/params.md +++ b/docs/params.md @@ -19,9 +19,9 @@ Define where the pipeline should find input data and save output data. | Parameter | Description | Type | Default | Required | Hidden | |-----------|-----------|-----------|-----------|-----------|-----------| -| `species` | | `string` | hg19 | | | +| `species` | | `string` | hg38 | | | | `vars_to_regress` | | `string` | | | | -| `qc_filtering` | | `string` | manual | | | +| `qc_filtering` | | `string` | miqc | | | | `nCount_RNA_max` | | `integer` | 500000 | | | | `nCount_RNA_min` | | `integer` | 1000 | | | | `nFeature_RNA_max` | | `integer` | 5000 | | | @@ -32,7 +32,7 @@ Define where the pipeline should find input data and save output data. | `seurat_resolution` | | `string` | 0.1,0.2,0.3,0.5,0.6,0.8,1 | | | | `npcs` | | `integer` | 50 | | | | `resolution_list` | | `string` | 0.1,0.2,0.3,0.5,0.6,0.8,1 | | | -| `save_cellranger_extra_files` | Whether to save extra cellranger files (bam, bai, cloupe) in addition to h5 files | `boolean` | False | | | +| `save_cellranger_extra_files` | Whether to save extra cellranger files (bam, bai, cloupe) in addition to h5 files | `boolean` | | | | | `genome_dir` | Path to the genome references. Overridden by platform configs, e.g. conf/biowulf.config | `string` | | | | ## Institutional config options @@ -54,29 +54,17 @@ Define where the pipeline should find input data and save output data. |-----------|-----------|-----------|-----------|-----------|-----------| | `publish_dir_mode` | | `string` | link | True | | | `tracedir` | | `string` | ${params.outdir}/pipeline_info | True | | -| `max_memory` | | `string` | 128.GB | True | | +| `max_memory` | | `string` | 1500.GB | True | | | `max_cpus` | | `integer` | 48 | True | | | `max_time` | | `string` | 240.h | True | | -## Containers - -Docker/Singularity containers to use for processes. Must be available in dockerhub - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `base_container` | | `string` | nciccbr/ccbr_ubuntu_base_20.04:v6.1 | | True | -| `baser_container` | | `string` | nciccbr/sinclair_baser:0.1.0 | | True | - ## Hidden options | Parameter | Description | Type | Default | Required | Hidden | |-----------|-----------|-----------|-----------|-----------|-----------| -| `Rlib_dir` | | `string` | /data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8 | | True | -| `conda_path` | | `string` | /data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env | | True | -| `python_path` | | `string` | /data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env/bin | | True | -| `Rpkg` | | `string` | ${projectDir}/conf/Rpack.config | | True | +| `celldex_path` | | `string` | /data/CCBR_Pipeliner/db/PipeDB/celldex_cache | | True | | `script_functions` | | `string` | ${projectDir}/bin/scRNA_functions.R | | True | | `script_preprocess` | | `string` | ${projectDir}/bin/seurat_preprocess.Rmd | | True | | `script_merge` | | `string` | ${projectDir}/bin/seurat_merge.Rmd | | True | @@ -85,4 +73,13 @@ Docker/Singularity containers to use for processes. Must be available in dockerh | `script_bc_cca` | | `string` | ${projectDir}/bin/batch_correction_cca.Rmd | | True | | `script_liger` | | `string` | ${projectDir}/bin/batch_correction_liger.Rmd | | True | | `script_bc_integration` | | `string` | ${projectDir}/bin/batch_correction_integration.Rmd | | True | + +## Containers + + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `container_base` | | `string` | nciccbr/ccbr_ubuntu_base_20.04:v7 | | | +| `container_seurat` | | `string` | nciccbr/seurat_5:v1 | | | diff --git a/modules/local/batch_correction_cca.nf b/modules/local/batch_correction_cca.nf index c1f9fec0..cbd5abe6 100644 --- a/modules/local/batch_correction_cca.nf +++ b/modules/local/batch_correction_cca.nf @@ -2,16 +2,17 @@ process BATCH_CORRECT_CCA { tag "${gid}" label 'process_high' + container "${params.container_seurat}" + input: tuple val(gid), path(mergedObj) val(species) val(npcs) val(vars_to_regress) val(resolution_list) - val(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) + path(celldex_path) output: tuple val(gid), path ("*.rds") , emit:rds @@ -27,10 +28,10 @@ process BATCH_CORRECT_CCA { npcs="$npcs", vars_to_regress="$vars_to_regress", resolution_list="$resolution_list", - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", scRNA_functions="$scRNA_functions", - testing="N"), + testing="N", + celldex_cache="$celldex_path" + ), output_file = "${gid}_batch_correction_cca.html")' """ diff --git a/modules/local/batch_correction_harmony.nf b/modules/local/batch_correction_harmony.nf index d710aeec..cc05a557 100644 --- a/modules/local/batch_correction_harmony.nf +++ b/modules/local/batch_correction_harmony.nf @@ -2,16 +2,17 @@ process BATCH_CORRECT_HARMONY { tag "${gid}" label 'process_high' + container "${params.container_seurat}" + input: tuple val(gid), path(mergedObj) val(species) val(npcs) val(vars_to_regress) val(resolution_list) - val(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) + path(celldex_path) output: tuple val(gid), path ("*.rds") , emit:rds @@ -27,10 +28,11 @@ process BATCH_CORRECT_HARMONY { npcs="$npcs", vars_to_regress="$vars_to_regress", resolution_list="$resolution_list", - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", scRNA_functions="$scRNA_functions", - testing="N"), + testing="N", + celldex_cache="$celldex_path" + + ), output_file = "${gid}_batch_correction_harmony.html")' """ diff --git a/modules/local/batch_correction_integration.nf b/modules/local/batch_correction_integration.nf index 37e2aab7..137e6336 100644 --- a/modules/local/batch_correction_integration.nf +++ b/modules/local/batch_correction_integration.nf @@ -2,6 +2,8 @@ process BATCH_CORRECT_INTEGRATION { tag "${gid}" label 'process_high' + container "${params.container_seurat}" + input: tuple val(gid), path(rds_m) tuple val(gid), path(rds_h) @@ -12,8 +14,6 @@ process BATCH_CORRECT_INTEGRATION { val(species) val(npcs) val(resolution_list) - path(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) @@ -34,8 +34,6 @@ process BATCH_CORRECT_INTEGRATION { resolution_list="$resolution_list", citeseq="", annot="", - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", scRNA_functions="$scRNA_functions", testing="N"), output_file = "${gid}_batch_correction_integration.html")' diff --git a/modules/local/batch_correction_liger.nf b/modules/local/batch_correction_liger.nf index a7c7b0e1..eed47cb5 100644 --- a/modules/local/batch_correction_liger.nf +++ b/modules/local/batch_correction_liger.nf @@ -2,16 +2,17 @@ process BATCH_CORRECT_LIGER { tag "${gid}" label 'process_high' + container "${params.container_seurat}" + input: tuple val(gid), path(mergedObj) val(species) val(npcs) val(vars_to_regress) val(resolution_list) - val(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) + path(celldex_path) output: tuple val(gid), path ("*.rds") , emit:rds @@ -27,10 +28,10 @@ process BATCH_CORRECT_LIGER { npcs="$npcs", vars_to_regress="$vars_to_regress", resolution_list="$resolution_list", - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", scRNA_functions="$scRNA_functions", - testing="N"), + celldex_cache="$celldex_path", + testing="N" + ), output_file = "${gid}_batch_correction_liger.html")' """ diff --git a/modules/local/batch_correction_rpca.nf b/modules/local/batch_correction_rpca.nf index f1b2deb5..2f5a30d5 100644 --- a/modules/local/batch_correction_rpca.nf +++ b/modules/local/batch_correction_rpca.nf @@ -2,16 +2,17 @@ process BATCH_CORRECT_RPCA { tag "${gid}" label 'process_high' + container "${params.container_seurat}" + input: tuple val(gid), path(mergedObj) val(species) val(npcs) val(vars_to_regress) val(resolution_list) - val(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) + path(celldex_path) output: tuple val(gid), path ("*.rds") , emit:rds @@ -27,10 +28,10 @@ process BATCH_CORRECT_RPCA { npcs="$npcs", vars_to_regress="$vars_to_regress", resolution_list="$resolution_list", - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", scRNA_functions="$scRNA_functions", - testing="N"), + celldex_cache="$celldex_path", + testing="N" + ), output_file = "${gid}_batch_correction_rpca.html")' """ diff --git a/modules/local/batch_correction_scvi.nf b/modules/local/batch_correction_scvi.nf index f3da6d3f..46b92737 100644 --- a/modules/local/batch_correction_scvi.nf +++ b/modules/local/batch_correction_scvi.nf @@ -2,18 +2,17 @@ process BATCH_CORRECT_SCVI { tag "${gid}" label 'process_high' + container "${params.container_seurat}" + input: tuple val(gid), path(mergedObj) val(species) val(npcs) val(vars_to_regress) val(resolution_list) - val(conda_path) - val(python_path) - val(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) + path(celldex_path) output: tuple val(gid), path ("*.rds") , emit:rds @@ -29,12 +28,10 @@ process BATCH_CORRECT_SCVI { npcs="$npcs", vars_to_regress="$vars_to_regress", resolution_list="$resolution_list", - conda_path="$conda_path", - python_path="$python_path", - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", scRNA_functions="$scRNA_functions", - testing="N"), + celldex_cache="$celldex_path", + testing="N" + ), output_file = "${gid}_batch_correction_scvi.html")' """ diff --git a/modules/local/cellranger_count_gex.nf b/modules/local/cellranger_count_gex.nf index 079f6133..3a328d6a 100644 --- a/modules/local/cellranger_count_gex.nf +++ b/modules/local/cellranger_count_gex.nf @@ -2,6 +2,8 @@ process CELLRANGER_COUNT { tag "${id}" label 'process_high' + container 'litd/docker-cellranger:v7.2.0' + input: tuple val(id), val(inDir) val(genome_dir) diff --git a/modules/local/cp/main.nf b/modules/local/cp/main.nf new file mode 100644 index 00000000..9dd1f56b --- /dev/null +++ b/modules/local/cp/main.nf @@ -0,0 +1,15 @@ +process COPY_DIR { + // copy directory + // -r recursively + // -L, --dereference follow symbolic links + input: + path(input_dir) + + output: + path("dir/") + + script: + """ + cp -Lr ${input_dir} dir/ + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 739e2c59..e4a93159 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,7 +2,7 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_low' - container "${params.base_container}" + container "${params.container_base}" input: path (samplesheet) @@ -29,5 +29,4 @@ process SAMPLESHEET_CHECK { project fi """ - } diff --git a/modules/local/seurat_merge.nf b/modules/local/seurat_merge.nf index 5cc0b804..510ff033 100644 --- a/modules/local/seurat_merge.nf +++ b/modules/local/seurat_merge.nf @@ -2,14 +2,14 @@ process SEURAT_MERGE { tag "${gid}" label 'process_high' + container "${params.container_seurat}" + input: tuple val(gid), path(rdsFiles) path(samplesheet) val(species) val(npcs) val(vars_to_regress) - val(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) @@ -27,8 +27,6 @@ process SEURAT_MERGE { rdsFiles="$rdsFiles", gid="$gid", samplesheet="$samplesheet", - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", scRNA_functions="$scRNA_functions", testing="N"), output_file = "${gid}_seurat_merged.html")' diff --git a/modules/local/seurat_preprocess.nf b/modules/local/seurat_preprocess.nf index 91bbb9dc..af11ee6a 100644 --- a/modules/local/seurat_preprocess.nf +++ b/modules/local/seurat_preprocess.nf @@ -2,8 +2,10 @@ process SEURAT_PREPROCESS { tag "${id}" label 'process_medium' + container "${params.container_seurat}" + input: - tuple val(id), val(inDir), path(h5) + tuple val(id), val(inDir), path(h5), path(celldex_path) val(species) val(qc_filtering) val(nCount_RNA_max) @@ -14,8 +16,6 @@ process SEURAT_PREPROCESS { val(percent_mt_min) val(run_doublet_finder) val(npcs) - val(Rlib_dir) - path(Rpkg_config) path(rmd) path(scRNA_functions) @@ -26,7 +26,8 @@ process SEURAT_PREPROCESS { script: def args = task.ext.args ?: '' """ - Rscript -e 'rmarkdown::render("${rmd}", + #!/usr/bin/env Rscript --vanilla + rmarkdown::render("${rmd}", params=list( species="$species", sampleid="$id", @@ -40,10 +41,11 @@ process SEURAT_PREPROCESS { percent_mt_min=$percent_mt_min, run_doublet_finder="$run_doublet_finder", npcs=$npcs, - Rlib_dir="$Rlib_dir", - Rpkg_config="$Rpkg_config", - scRNA_functions="$scRNA_functions"), - output_file = "${id}_seurat_preprocess.html")' + scRNA_functions="$scRNA_functions", + celldex_cache="$celldex_path" + ), + output_file = "${id}_seurat_preprocess.html" + ) """ stub: diff --git a/nextflow.config b/nextflow.config index a7f3340e..247ce320 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,10 +31,8 @@ params { npcs = 50 // number of principle components resolution_list = "0.1,0.2,0.3,0.5,0.6,0.8,1" - // Database paths - Rlib_dir = "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8" - conda_path = "/data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env" - python_path = "/data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env/bin" + // Database path + celldex_path = "/data/CCBR_Pipeliner/db/PipeDB/celldex_cache" // Other options tracedir = "${params.outdir}/pipeline_info" @@ -42,9 +40,6 @@ params { // References genome_dir = null // this is overridden by conf/biowulf.config - // R Config - Rpkg = "${projectDir}/conf/Rpack.config" - // Scripts script_functions = "${projectDir}/bin/scRNA_functions.R" script_preprocess = "${projectDir}/bin/seurat_preprocess.Rmd" @@ -65,8 +60,8 @@ params { max_time = '240.h' // Docker containers - base_container = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' - baser_container = 'nciccbr/sinclair_baser:0.1.0' + container_base = 'nciccbr/ccbr_ubuntu_base_20.04:v7' + container_seurat = 'nciccbr/seurat_5:v1-dev' // Config options config_profile_name = null @@ -89,20 +84,19 @@ profiles { charliecloud.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + docker.enabled = true + // Avoid this error: + // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. + // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 + // once this is established and works well, nextflow might implement this behavior as new default. + docker.runOptions = '-u \$(id -u):\$(id -g)' } singularity { - singularity.enabled = true + singularity.enabled = true singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + singularity.cacheDir = null // overridden by profiles: biowulf, frce, & slurmint + singularity.pullTimeout = "1h" + envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' } biowulf { includeConfig "conf/biowulf.config" @@ -126,10 +120,13 @@ profiles { includeConfig 'conf/modules.config' // Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { PYTHONNOUSERSITE = 1 R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Capture exit codes from upstream processes when piping diff --git a/nextflow_schema.json b/nextflow_schema.json index 4ab2d967..c872e09e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -56,14 +56,14 @@ "properties": { "species": { "type": "string", - "default": "hg19" + "default": "hg38" }, "vars_to_regress": { "type": "string" }, "qc_filtering": { "type": "string", - "default": "manual" + "default": "miqc" }, "nCount_RNA_max": { "type": "integer", @@ -108,8 +108,7 @@ }, "save_cellranger_extra_files": { "type": "boolean", - "description": "Whether to save extra cellranger files (bam, bai, cloupe) in addition to h5 files", - "default": false + "description": "Whether to save extra cellranger files (bam, bai, cloupe) in addition to h5 files" }, "genome_dir": { "type": "string", @@ -156,7 +155,7 @@ }, "max_memory": { "type": "string", - "default": "128.GB" + "default": "1500.GB" }, "max_cpus": { "type": "integer", @@ -175,52 +174,16 @@ "max_time" ] }, - "containers": { - "title": "Containers", - "type": "object", - "description": "Docker/Singularity containers to use for processes. Must be available in dockerhub", - "default": "", - "properties": { - "base_container": { - "type": "string", - "default": "nciccbr/ccbr_ubuntu_base_20.04:v6.1", - "hidden": true - }, - "baser_container": { - "type": "string", - "default": "nciccbr/sinclair_baser:0.1.0", - "hidden": true - } - } - }, "hidden_options": { "title": "Hidden options", "type": "object", "description": "", "default": "", "properties": { - "Rlib_dir": { - "type": "string", - "format": "directory-path", - "default": "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_scRNA_RHEL8", - "hidden": true - }, - "conda_path": { - "type": "string", - "format": "directory-path", - "default": "/data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env", - "hidden": true - }, - "python_path": { + "celldex_path": { "type": "string", "format": "directory-path", - "default": "/data/CCBR_Pipeliner/db/PipeDB/Conda/envs/scvi-env/bin", - "hidden": true - }, - "Rpkg": { - "type": "string", - "format": "file-path", - "default": "${projectDir}/conf/Rpack.config", + "default": "/data/CCBR_Pipeliner/db/PipeDB/celldex_cache", "hidden": true }, "script_functions": { @@ -272,6 +235,22 @@ "hidden": true } } + }, + "containers": { + "title": "Containers", + "type": "object", + "description": "", + "default": "", + "properties": { + "container_base": { + "type": "string", + "default": "nciccbr/ccbr_ubuntu_base_20.04:v7" + }, + "container_seurat": { + "type": "string", + "default": "nciccbr/seurat_5:v1" + } + } } }, "allOf": [ @@ -288,10 +267,10 @@ "$ref": "#/$defs/generic_options" }, { - "$ref": "#/$defs/containers" + "$ref": "#/$defs/hidden_options" }, { - "$ref": "#/$defs/hidden_options" + "$ref": "#/$defs/containers" } ] } diff --git a/workflows/gex.nf b/workflows/gex.nf index 9796ee79..a3cc1c94 100644 --- a/workflows/gex.nf +++ b/workflows/gex.nf @@ -15,6 +15,7 @@ Assign local subworkflows Assign Local Modules ======================================================================================================= */ +include { COPY_DIR as GYPSUM_CACHE } from '../modules/local/cp/main.nf' include { SEURAT_PREPROCESS } from '../modules/local/seurat_preprocess.nf' include { SEURAT_MERGE } from '../modules/local/seurat_merge.nf' include { BATCH_CORRECT_HARMONY } from '../modules/local/batch_correction_harmony.nf' @@ -45,11 +46,15 @@ workflow GEX_EXQC { // if vars_to_regress is null, set it to 'NULL' for R to evaluate def vars_to_regress = params.vars_to_regress ?: 'NULL' + + // Set celldex cache + ch_celldex_path = Channel.fromPath(params.celldex_path) | GYPSUM_CACHE + // Set output path to relative, species outdir_path = Channel.fromPath(params.outdir,relative:true) // Run Seurat for individual samples SEURAT_PREPROCESS ( - ch_fqdir_h5, + ch_fqdir_h5.combine(ch_celldex_path), params.species, params.qc_filtering, params.nCount_RNA_max, @@ -60,8 +65,6 @@ workflow GEX_EXQC { params.percent_mt_min, params.run_doublet_finder, params.npcs, - params.Rlib_dir, - params.Rpkg, params.script_preprocess, params.script_functions ) @@ -92,8 +95,6 @@ workflow GEX_EXQC { params.species, params.npcs, vars_to_regress, - params.Rlib_dir, - params.Rpkg, params.script_merge, params.script_functions ) @@ -105,10 +106,9 @@ workflow GEX_EXQC { params.npcs, vars_to_regress, params.resolution_list, - params.Rlib_dir, - params.Rpkg, params.script_bc_harmony, - params.script_functions + params.script_functions, + ch_celldex_path ) // Run batch corrections @@ -118,10 +118,9 @@ workflow GEX_EXQC { params.npcs, vars_to_regress, params.resolution_list, - params.Rlib_dir, - params.Rpkg, params.script_bc_rpca, - params.script_functions + params.script_functions, + ch_celldex_path ) // Run batch corrections @@ -131,10 +130,9 @@ workflow GEX_EXQC { params.npcs, vars_to_regress, params.resolution_list, - params.Rlib_dir, - params.Rpkg, params.script_bc_cca, - params.script_functions + params.script_functions, + ch_celldex_path ) /* BLOCKING SCVI FOR FUTURE RELEASE @@ -145,12 +143,9 @@ workflow GEX_EXQC { params.npcs, vars_to_regress, params.resolution_list, - params.conda_path, - params.python_path, - params.Rlib_dir, - params.Rpkg, params.script_scvi, - params.script_functions + params.script_functions, + ch_celldex_path ) */ @@ -161,10 +156,9 @@ workflow GEX_EXQC { params.npcs, vars_to_regress, params.resolution_list, - params.Rlib_dir, - params.Rpkg, params.script_liger, - params.script_functions + params.script_functions, + ch_celldex_path ) // Integrate batch corrections @@ -178,8 +172,6 @@ workflow GEX_EXQC { params.species, params.npcs, params.resolution_list, - params.Rlib_dir, - params.Rpkg, params.script_bc_integration, params.script_functions )