Skip to content
181 changes: 139 additions & 42 deletions R/bootstrap_plot.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,116 @@
#' Create a multi-page PDF with each cell type on a separate page
#'
#' This function creates a PDF where each unique value in a specified faceting
#' variable (typically "CellType") is plotted on a separate page. Each plot shows
#' expression data with points colored by expression levels.
#'
#' @param gene_data A data frame containing gene expression data with columns for
#' "boot", "hit", and a faceting variable (typically "CellType").
#' @param facet_var Character string specifying the column name to use for
#' separating plots onto different pages (e.g., "CellType").
#' @param output_file Character string specifying the path to save the PDF file.
#' @param add_labels Character string specifying the column name to use for point labels,
#' or NULL to disable labels (default: NULL).
#' @param verbose Logical indicating whether to print progress messages (default: TRUE).
#'
#' @return No return value, called for side effect of creating a PDF file.
#'
#' @details The function creates QQ plots of "boot" vs "hit" values, with points
#' colored by "hit" values using a reversed viridis color scale. Each plot includes
#' a title showing the cell type being displayed. If add_labels is provided, text labels
#' from the specified column will be added to the points using ggrepel.
#'
#' @examples
#' \dontrun{
#' # Create plots without labels
#' .create_multipage_plot(
#' gene_data = my_gene_data,
#' facet_var = "CellType",
#' output_file = "cell_type_plots.pdf"
#' )
#'
#' # Create plots with gene symbol labels
#' .create_multipage_plot(
#' gene_data = my_gene_data,
#' facet_var = "CellType",
#' output_file = "cell_type_plots_labeled.pdf",
#' add_labels = "gene_symbol"
#' )
#' }
#'
#' @import ggplot2
#' @importFrom viridis scale_color_viridis_c
#' @importFrom ggrepel geom_text_repel
#'
#' @keywords internal
.create_multipage_plot <- function(gene_data, facet_var, output_file,
add_labels = NULL, verbose = TRUE) {
# Check for required packages
if(!is.null(add_labels) && !requireNamespace("ggrepel", quietly = TRUE)) {
stop("Package 'ggrepel' is needed for adding labels. Please install it.",
call. = FALSE)
}

if (verbose) messager("Saving plot -->", output_file)
# Check if the specified label column exists
if(!is.null(add_labels) && !(add_labels %in% colnames(gene_data))) {
stop(paste0("Label column '", add_labels, "' not found in data."),
call. = FALSE)
}

# Get unique cell types
cell_types <- unique(gene_data[[facet_var]])

# Start PDF device
pdf(output_file, width = 5, height = 5)

# Create and print a plot for each cell type
for (cell_type in cell_types) {
# Subset data for this cell type
subset_data <- gene_data[gene_data[[facet_var]] == cell_type, ]

# Create base plot for this cell type
g <- ggplot(subset_data, aes(x = boot, y = hit, color = hit)) +
geom_point(size = 1, alpha = .75) +
xlab("Mean Bootstrap Expression") +
ylab("Expression in cell type (%)\n") +
scale_color_viridis_c(direction = -1) +
ggtitle(paste("Cell Type:", cell_type)) +
geom_abline(
intercept = 0,
slope = 1,
linetype = "dashed",
colour = ggplot2::alpha("red",.5)
) +
theme_classic()

# Add labels if a column name was provided
if(!is.null(add_labels)) {
g <- g + ggrepel::geom_text_repel(
aes(label = .data[[add_labels]]), # Use the column name dynamically
size = 3,
force_pull = 0.1,
box.padding = 0.35,
point.padding = 0.5,
segment.color = "grey50",
max.overlaps = 25
)

if (verbose) messager("Added labels from column:", add_labels)
}

# Print the plot (adds it to the PDF)
print(g)

if (verbose) messager("Added plot for", cell_type)
}

# Close the PDF device
dev.off()

}


#' Bootstrap plot
#'
#' Plot bootstrap enrichment results.
Expand Down Expand Up @@ -27,6 +140,7 @@ bootstrap_plot <- function(gene_data,

requireNamespace("ggplot2")
requireNamespace("patchwork")
requireNamespace("ggrepel")
Pos <- Rep <- Exp <- p <- significant <- CellType <- NULL;

exp_mats_msg <- paste(
Expand All @@ -36,8 +150,11 @@ bootstrap_plot <- function(gene_data,
plots <- list()
#### Set up save path ####
dir.create(save_dir, showWarnings = FALSE, recursive = TRUE)
gene_data$symLab <- ifelse(gene_data$hit > 25,
sprintf(" %s", gene_data$gene), NA)
gene_data$symLab <- ifelse(
test = gene_data$hit - gene_data$boot > 1 | gene_data$hit > 75,
yes = gene_data$gene,
no = ""
)

#### Filter gene_data ####
if(!is.null(signif_ct)){
Expand All @@ -55,46 +172,22 @@ bootstrap_plot <- function(gene_data,
dir.create(dirname(f), showWarnings = FALSE, recursive = TRUE)
}
## Plot several variants of the graph ##
add_line <- function(){
geom_abline(intercept = 0, slope = 1,
linetype = "dashed",
colour = ggplot2::alpha("red",.5))
}
#### Plot 1: Plot without gene names ####
g1 <- ggplot(gene_data,
aes_string(x = "boot", y = "hit", color="hit")) +
geom_point(size = 1, alpha=.75) +
xlab("Mean Bootstrap Expression") +
ylab("Expression in cell type (%)\n") +
scale_color_viridis_c() +
facet_grid(facets = facets,
scales = scales) +
add_line() +
theme_graph()
plots[["plot1"]] <- g1
messager("Saving plot -->", files[[1]], v=verbose)
ggplot2::ggsave(filename = files[[1]],
plot = g1,
width = 3.5,
height = 3.5)
.create_multipage_plot(
gene_data = gene_data,
facet_var = "CellType", # Column name containing cell types
output_file = files[[1]],
verbose = verbose
)

#### Plot 2: Plot with gene names ####
g2 <- g1 +
geom_text(aes_string(label = "symLab"),
# fill=ggplot2::alpha("black",.5),
color=ggplot2::alpha("black",.75),
na.rm = TRUE,
hjust = 0, vjust = 0, size = 3
) +
scale_x_discrete(expand = expansion(mult = c(0,.15))) +
scale_y_discrete(expand = expansion(mult = c(0,.15)))
plots[["plot2"]] <- g2
messager("Saving plot -->", files[[2]], v=verbose)
ggplot2::ggsave(filename = files[[2]],
plot = g2,
width = 3.5,
height = 3.5)

.create_multipage_plot(
gene_data = gene_data,
facet_var = "CellType", # Column name containing cell types
output_file = files[[2]],
add_labels = "symLab",
verbose = verbose
)

#### Plot 3 ####
if(is.null(exp_mats)){
Expand Down Expand Up @@ -134,11 +227,13 @@ bootstrap_plot <- function(gene_data,
scale_x_discrete(breaks = NULL) +
theme_graph() +
facet_wrap(facets=facets)
# cat("Names of melt boot:", paste(names(melt_boot), collapse = " "))
# wd <- 1 + length(unique(melt_boot[, hit])) * 0.25
plots[["plot3"]] <- g3
messager("Saving plot -->", files[[3]], v=verbose)
ggplot2::ggsave(filename = files[[3]],
plot = g3,
width = 3.5,
width = 8,
height = 3.5)


Expand Down Expand Up @@ -182,16 +277,18 @@ bootstrap_plot <- function(gene_data,
labs(x=expression("Least specific" %->% "Most specific"),
y="Expression in cell type (%)\n")
#### Save ####
wd <- 1 + length(unique(melt_boot[,4])) * 0.175
# wd <- 1 + length(unique(melt_boot[,4])) * 0.25
plots[["plot4"]] <- g4
messager("Saving plot -->", files[[4]], v=verbose)
ggplot2::ggsave(filename = files[[4]],
plot = g4,
width = wd,
width = 8,
height = 4)
}

if(isTRUE(show_plot)) methods::show(plots)
return(list(plots=plots,
paths=files))
}


9 changes: 3 additions & 6 deletions R/check_full_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
#'
#' @keywords internal
check_full_results <- function(full_results,
sct_data) {
sct_data,
annotLevel) {
if (!is.null(full_results)) {
err_msg <- paste0(
"ERROR: full_results must be a list of length 3 or 4",
Expand All @@ -23,13 +24,9 @@ check_full_results <- function(full_results,
" sct_data. Perhaps the wrong annotLevel was used?"
)
if (sum(!as.character(unique(full_results$results$CellType)) %in%
colnames(sct_data[[1]]$specificity)) ==
colnames(sct_data[[annotLevel]]$specificity)) ==
length(as.character(unique(full_results$results$CellType)))) {
stop(err_msg2)
}
if (sum(!as.character(unique(full_results$results$CellType)) %in%
colnames(sct_data[[1]]$specificity)) > 0) {
stop(err_msg2)
}
}
}
3 changes: 2 additions & 1 deletion R/generate_bootstrap_plots.r
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ generate_bootstrap_plots <- function(sct_data = NULL,
)
check_full_results(
full_results = full_results,
sct_data = sct_data
sct_data = sct_data,
annotLevel = annotLevel
)
results <- full_results$results
#### Check gene lists ####
Expand Down