diff --git a/R/bootstrap_plot.R b/R/bootstrap_plot.R index 702301b..68d3c36 100755 --- a/R/bootstrap_plot.R +++ b/R/bootstrap_plot.R @@ -1,3 +1,116 @@ +#' Create a multi-page PDF with each cell type on a separate page +#' +#' This function creates a PDF where each unique value in a specified faceting +#' variable (typically "CellType") is plotted on a separate page. Each plot shows +#' expression data with points colored by expression levels. +#' +#' @param gene_data A data frame containing gene expression data with columns for +#' "boot", "hit", and a faceting variable (typically "CellType"). +#' @param facet_var Character string specifying the column name to use for +#' separating plots onto different pages (e.g., "CellType"). +#' @param output_file Character string specifying the path to save the PDF file. +#' @param add_labels Character string specifying the column name to use for point labels, +#' or NULL to disable labels (default: NULL). +#' @param verbose Logical indicating whether to print progress messages (default: TRUE). +#' +#' @return No return value, called for side effect of creating a PDF file. +#' +#' @details The function creates QQ plots of "boot" vs "hit" values, with points +#' colored by "hit" values using a reversed viridis color scale. Each plot includes +#' a title showing the cell type being displayed. If add_labels is provided, text labels +#' from the specified column will be added to the points using ggrepel. +#' +#' @examples +#' \dontrun{ +#' # Create plots without labels +#' .create_multipage_plot( +#' gene_data = my_gene_data, +#' facet_var = "CellType", +#' output_file = "cell_type_plots.pdf" +#' ) +#' +#' # Create plots with gene symbol labels +#' .create_multipage_plot( +#' gene_data = my_gene_data, +#' facet_var = "CellType", +#' output_file = "cell_type_plots_labeled.pdf", +#' add_labels = "gene_symbol" +#' ) +#' } +#' +#' @import ggplot2 +#' @importFrom viridis scale_color_viridis_c +#' @importFrom ggrepel geom_text_repel +#' +#' @keywords internal +.create_multipage_plot <- function(gene_data, facet_var, output_file, + add_labels = NULL, verbose = TRUE) { + # Check for required packages + if(!is.null(add_labels) && !requireNamespace("ggrepel", quietly = TRUE)) { + stop("Package 'ggrepel' is needed for adding labels. Please install it.", + call. = FALSE) + } + + if (verbose) messager("Saving plot -->", output_file) + # Check if the specified label column exists + if(!is.null(add_labels) && !(add_labels %in% colnames(gene_data))) { + stop(paste0("Label column '", add_labels, "' not found in data."), + call. = FALSE) + } + + # Get unique cell types + cell_types <- unique(gene_data[[facet_var]]) + + # Start PDF device + pdf(output_file, width = 5, height = 5) + + # Create and print a plot for each cell type + for (cell_type in cell_types) { + # Subset data for this cell type + subset_data <- gene_data[gene_data[[facet_var]] == cell_type, ] + + # Create base plot for this cell type + g <- ggplot(subset_data, aes(x = boot, y = hit, color = hit)) + + geom_point(size = 1, alpha = .75) + + xlab("Mean Bootstrap Expression") + + ylab("Expression in cell type (%)\n") + + scale_color_viridis_c(direction = -1) + + ggtitle(paste("Cell Type:", cell_type)) + + geom_abline( + intercept = 0, + slope = 1, + linetype = "dashed", + colour = ggplot2::alpha("red",.5) + ) + + theme_classic() + + # Add labels if a column name was provided + if(!is.null(add_labels)) { + g <- g + ggrepel::geom_text_repel( + aes(label = .data[[add_labels]]), # Use the column name dynamically + size = 3, + force_pull = 0.1, + box.padding = 0.35, + point.padding = 0.5, + segment.color = "grey50", + max.overlaps = 25 + ) + + if (verbose) messager("Added labels from column:", add_labels) + } + + # Print the plot (adds it to the PDF) + print(g) + + if (verbose) messager("Added plot for", cell_type) + } + + # Close the PDF device + dev.off() + +} + + #' Bootstrap plot #' #' Plot bootstrap enrichment results. @@ -27,6 +140,7 @@ bootstrap_plot <- function(gene_data, requireNamespace("ggplot2") requireNamespace("patchwork") + requireNamespace("ggrepel") Pos <- Rep <- Exp <- p <- significant <- CellType <- NULL; exp_mats_msg <- paste( @@ -36,8 +150,11 @@ bootstrap_plot <- function(gene_data, plots <- list() #### Set up save path #### dir.create(save_dir, showWarnings = FALSE, recursive = TRUE) - gene_data$symLab <- ifelse(gene_data$hit > 25, - sprintf(" %s", gene_data$gene), NA) + gene_data$symLab <- ifelse( + test = gene_data$hit - gene_data$boot > 1 | gene_data$hit > 75, + yes = gene_data$gene, + no = "" + ) #### Filter gene_data #### if(!is.null(signif_ct)){ @@ -55,46 +172,22 @@ bootstrap_plot <- function(gene_data, dir.create(dirname(f), showWarnings = FALSE, recursive = TRUE) } ## Plot several variants of the graph ## - add_line <- function(){ - geom_abline(intercept = 0, slope = 1, - linetype = "dashed", - colour = ggplot2::alpha("red",.5)) - } #### Plot 1: Plot without gene names #### - g1 <- ggplot(gene_data, - aes_string(x = "boot", y = "hit", color="hit")) + - geom_point(size = 1, alpha=.75) + - xlab("Mean Bootstrap Expression") + - ylab("Expression in cell type (%)\n") + - scale_color_viridis_c() + - facet_grid(facets = facets, - scales = scales) + - add_line() + - theme_graph() - plots[["plot1"]] <- g1 - messager("Saving plot -->", files[[1]], v=verbose) - ggplot2::ggsave(filename = files[[1]], - plot = g1, - width = 3.5, - height = 3.5) + .create_multipage_plot( + gene_data = gene_data, + facet_var = "CellType", # Column name containing cell types + output_file = files[[1]], + verbose = verbose + ) #### Plot 2: Plot with gene names #### - g2 <- g1 + - geom_text(aes_string(label = "symLab"), - # fill=ggplot2::alpha("black",.5), - color=ggplot2::alpha("black",.75), - na.rm = TRUE, - hjust = 0, vjust = 0, size = 3 - ) + - scale_x_discrete(expand = expansion(mult = c(0,.15))) + - scale_y_discrete(expand = expansion(mult = c(0,.15))) - plots[["plot2"]] <- g2 - messager("Saving plot -->", files[[2]], v=verbose) - ggplot2::ggsave(filename = files[[2]], - plot = g2, - width = 3.5, - height = 3.5) - + .create_multipage_plot( + gene_data = gene_data, + facet_var = "CellType", # Column name containing cell types + output_file = files[[2]], + add_labels = "symLab", + verbose = verbose + ) #### Plot 3 #### if(is.null(exp_mats)){ @@ -134,11 +227,13 @@ bootstrap_plot <- function(gene_data, scale_x_discrete(breaks = NULL) + theme_graph() + facet_wrap(facets=facets) + # cat("Names of melt boot:", paste(names(melt_boot), collapse = " ")) + # wd <- 1 + length(unique(melt_boot[, hit])) * 0.25 plots[["plot3"]] <- g3 messager("Saving plot -->", files[[3]], v=verbose) ggplot2::ggsave(filename = files[[3]], plot = g3, - width = 3.5, + width = 8, height = 3.5) @@ -182,12 +277,12 @@ bootstrap_plot <- function(gene_data, labs(x=expression("Least specific" %->% "Most specific"), y="Expression in cell type (%)\n") #### Save #### - wd <- 1 + length(unique(melt_boot[,4])) * 0.175 + # wd <- 1 + length(unique(melt_boot[,4])) * 0.25 plots[["plot4"]] <- g4 messager("Saving plot -->", files[[4]], v=verbose) ggplot2::ggsave(filename = files[[4]], plot = g4, - width = wd, + width = 8, height = 4) } @@ -195,3 +290,5 @@ bootstrap_plot <- function(gene_data, return(list(plots=plots, paths=files)) } + + diff --git a/R/check_full_results.R b/R/check_full_results.R index af06975..15bbddc 100755 --- a/R/check_full_results.R +++ b/R/check_full_results.R @@ -8,7 +8,8 @@ #' #' @keywords internal check_full_results <- function(full_results, - sct_data) { + sct_data, + annotLevel) { if (!is.null(full_results)) { err_msg <- paste0( "ERROR: full_results must be a list of length 3 or 4", @@ -23,13 +24,9 @@ check_full_results <- function(full_results, " sct_data. Perhaps the wrong annotLevel was used?" ) if (sum(!as.character(unique(full_results$results$CellType)) %in% - colnames(sct_data[[1]]$specificity)) == + colnames(sct_data[[annotLevel]]$specificity)) == length(as.character(unique(full_results$results$CellType)))) { stop(err_msg2) } - if (sum(!as.character(unique(full_results$results$CellType)) %in% - colnames(sct_data[[1]]$specificity)) > 0) { - stop(err_msg2) - } } } diff --git a/R/generate_bootstrap_plots.r b/R/generate_bootstrap_plots.r index 4cf3038..e97c8b1 100755 --- a/R/generate_bootstrap_plots.r +++ b/R/generate_bootstrap_plots.r @@ -153,7 +153,8 @@ generate_bootstrap_plots <- function(sct_data = NULL, ) check_full_results( full_results = full_results, - sct_data = sct_data + sct_data = sct_data, + annotLevel = annotLevel ) results <- full_results$results #### Check gene lists ####