tidyomics
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion b/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/reduce_dimensions.R‎
Lines changed: 25 additions & 44 deletions b/‎R/reduce_dimensions.R‎
Lines changed: 25 additions & 44 deletions
diff --git a/‎R/rotate_dimensions.R‎
Lines changed: 1 addition & 1 deletion b/‎R/rotate_dimensions.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/N52_workflow.R‎
Lines changed: 2 additions & 2 deletions b/‎dev/N52_workflow.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/benchmark/TCGA_workflow_ttBulk_for_comparison.R‎
Lines changed: 2 additions & 2 deletions b/‎dev/benchmark/TCGA_workflow_ttBulk_for_comparison.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/benchmark/pasilla_workflow_ttBulk_for_comparison.R‎
Lines changed: 2 additions & 2 deletions b/‎dev/benchmark/pasilla_workflow_ttBulk_for_comparison.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/comparison_with_base_R.Rmd‎
Lines changed: 3 additions & 2 deletions b/‎dev/comparison_with_base_R.Rmd‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎dev/manuscript_differential_transcript_abundance.Rmd‎
Lines changed: 3 additions & 3 deletions b/‎dev/manuscript_differential_transcript_abundance.Rmd‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎dev/pasilla_workflow.R‎
Lines changed: 2 additions & 2 deletions b/‎dev/pasilla_workflow.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎inst/NEWS.rd‎
Lines changed: 5 additions & 0 deletions b/‎inst/NEWS.rd‎
Lines changed: 5 additions & 0 deletions
@@ -1,7 +1,7 @@
 Type: Package
 Package: tidybulk
 Title: Brings transcriptomics to the tidyverse 
-Version: 2.1.2
+Version: 2.1.3
 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com",
                   role = c("aut", "cre")),
             person("Maria", "Doyle", email = "Maria.Doyle@petermac.org",
 
@@ -2,23 +2,21 @@
 #'
 #' `r lifecycle::badge("maturing")`
 #'
-#' @description reduce_dimensions() takes as input A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment)) and calculates the reduced dimensional space of the transcript abundance.
+#' @description reduce_dimensions() takes as input a `SummarizedExperiment` and calculates the reduced dimensional space of the transcript abundance.
+#'
 #'
-#' @importFrom rlang enquo quo_name
 #' @importFrom magrittr not
 #' @importFrom dplyr filter distinct select mutate rename
 #' @importFrom tidyr pivot_wider
 #' @importFrom tibble enframe
-#' @importFrom SummarizedExperiment colData rowData assays
+#' @importFrom SummarizedExperiment colData rowData assays assayNames
 #' @importFrom stats prcomp
 #'
 #'
 #' @name reduce_dimensions
 #'
-#' @param .data A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment))
-#' @param .element The name of the element column (normally samples).
-#' @param .feature The name of the feature column (normally transcripts/genes)
-#' @param .abundance The name of the column including the numerical value the clustering is based on (normally transcript abundance)
+#' @param .data A `SummarizedExperiment`
+#' @param assay Character string: the name of the assay to use for dimension reduction (must match `assayNames(.data)`). This argument must be explicitly specified so that the choice of abundance matrix is deliberate.
 #'
 #' @param method A character string. The dimension reduction algorithm to use (PCA, MDS, tSNE).
 #' @param top An integer. How many top genes to select for dimensionality reduction
@@ -43,22 +41,7 @@
 #' Underlying method for tSNE:
 #' Rtsne::Rtsne(data, ...)
 #'
-#' Underlying method for UMAP:
-#'
-#'  df_source =
-#' .data |>
-#'
-#'   # Filter NA symbol
-#'   filter(!!.feature |> is.na() |> not()) |>
-#'
-#'   # Prepare data frame
-#'   distinct(!!.feature,!!.element,!!.abundance) |>
-#'
-#'   # Filter most variable genes
-#'   keep_variable_transcripts(top) |>
-#'   reduce_dimensions(method="PCA",  .dims = calculate_for_pca_dimensions ) |>
-#'   as_matrix(rownames = quo_name(.element)) |>
-#'   uwot::tumap(...)
+#' Underlying method for UMAP: variable features from the chosen \code{assay} are optionally PCA-reduced, then \code{uwot::tumap()} is applied to the sample coordinates matrix.
 #'
 #'
 #' @return A tbl object with additional columns for the reduced dimensions
@@ -79,13 +62,13 @@
 #' counts.MDS =
 #'  airway |>
 #'  identify_abundant() |>
-#'  reduce_dimensions( method="MDS", .dims = 3)
+#'  reduce_dimensions(assay = "counts", method="MDS", .dims = 3)
 #'
 #'
 #' counts.PCA =
 #'  airway |>
 #'  identify_abundant() |>
-#'  reduce_dimensions(method="PCA", .dims = 3)
+#'  reduce_dimensions(assay = "counts", method="PCA", .dims = 3)
 #'
 #' @references
 #' Mangiola, S., Molania, R., Dong, R., Doyle, M. A., & Papenfuss, A. T. (2021). tidybulk: an R tidy framework for modular transcriptomic data analysis. Genome Biology, 22(1), 42. doi:10.1186/s13059-020-02233-7
@@ -102,9 +85,7 @@
 #'
 #'
 setGeneric("reduce_dimensions", function(.data,
-                                         .element = NULL,
-                                         .feature = NULL,
-                                         .abundance = NULL,
+                                         assay,
                                          method,
                                          .dims = 2,
 
@@ -122,7 +103,7 @@ standardGeneric("reduce_dimensions"))
 
 
 .reduce_dimensions_se = function(.data,
-                                 .abundance = NULL,
+                                 assay,
 
                                  method,
                                  .dims = 2,
@@ -135,10 +116,19 @@ standardGeneric("reduce_dimensions"))
   # Fix NOTEs
   . = NULL
 
-  .abundance = enquo(.abundance)
+  if (missing(assay))
+    stop("tidybulk says: please specify `assay` explicitly as a character string (e.g. assay = \"counts_scaled\"). If needed, create an assay scaled proportionally to library size with `scale_abundance()` and pass that assay name via `assay`.", call. = FALSE)
+  
+  if (!is.character(assay) || length(assay) != 1L || !nzchar(assay))
+    stop("tidybulk says: `assay` must be a single non-empty character string naming an assay in assayNames(.data).", call. = FALSE)
 
-  if(.abundance |> quo_is_symbolic()) my_assay = quo_name(.abundance)
-  else my_assay = get_assay_scaled_if_exists_SE(.data)
+  my_assay <- assay[[1L]]
+  if (!my_assay %in% assayNames(.data))
+    stop(sprintf(
+      "tidybulk says: assay \"%s\" is not in assayNames(.data) (%s).",
+      my_assay,
+      paste(assayNames(.data), collapse = ", ")
+    ), call. = FALSE)
 
   # adjust top for the max number of features I have
   if(top > nrow(.data)){
@@ -292,11 +282,8 @@ setMethod("reduce_dimensions",
 #' @importFrom rlang :=
 #' @importFrom stats setNames
 #'
-#' @param .data A tibble
-#' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`)
+#' @param .data Feature-by-sample matrix (from an assay)
 #' @param .dims A integer vector corresponding to principal components of interest (e.g., 1:6)
-#' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally genes)
-#' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples)
 #' @param top An integer. How many top genes to select
 #' @param of_samples A boolean
 #' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity
@@ -439,11 +426,8 @@ get_reduced_dimensions_MDS_bulk_SE <-
 #' @importFrom magrittr divide_by
 #' @importFrom Matrix t
 #'
-#' @param .data A tibble
-#' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`)
+#' @param .data Feature-by-sample matrix (from an assay)
 #' @param .dims A integer vector corresponding to principal components of interest (e.g., 1:6)
-#' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally genes)
-#' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples)
 #' @param top An integer. How many top genes to select
 #' @param of_samples A boolean
 #' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity
@@ -533,11 +517,8 @@ we suggest to partition the dataset for sample clusters.
 #' @importFrom stats setNames
 #' @importFrom Matrix t
 #'
-#' @param .data A tibble
-#' @param .abundance A column symbol with the value the clustering is based on (e.g., `count`)
+#' @param .data Feature-by-sample matrix (from an assay)
 #' @param .dims A integer vector corresponding to principal components of interest (e.g., 1:6)
-#' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally genes)
-#' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples)
 #' @param top An integer. How many top genes to select
 #' @param of_samples A boolean
 #' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity
 
@@ -49,7 +49,7 @@
 #' counts.MDS =
 #'  airway |>
 #'  identify_abundant() |>
-#'  reduce_dimensions( method="MDS", .dims = 3)
+#'  reduce_dimensions(assay = "counts", method="MDS", .dims = 3)
 #'
 #' counts.MDS.rotated =  rotate_dimensions(counts.MDS, `Dim1`, `Dim2`, rotation_degrees = 45, .element = sample)
 #'
 
@@ -44,15 +44,15 @@ tt_scaled %>%
 
 # Visualise MDS for cell types
 tt_scaled %>%
-	reduce_dimensions(method="MDS", .dims = 3) %>%
+	reduce_dimensions(assay = "counts_scaled", method="MDS", .dims = 3) %>%
 	select(contains("Dim"), sample, CAPRA_TOTAL,  cell_type_formatted) %>%
 	distinct() %>%
 	GGally::ggpairs(columns = 1:3, ggplot2::aes(colour=cell_type_formatted))
 
 # Vidualise MDS CAPRA
 tt_scaled %>%
 	group_by(cell_type_formatted) %>%
-	do(reduce_dimensions((.), method="MDS")) %>%
+	do(reduce_dimensions((.), assay = "counts_scaled", method="MDS")) %>%
 	select(contains("Dim"), sample, CAPRA_TOTAL,  cell_type_formatted) %>%
 	distinct() %>%
 	ggplot(aes(x=Dim1, y = Dim2, color=CAPRA_TOTAL)) +
 
@@ -25,7 +25,7 @@ tt_scaled = tt %>% identify_abundant(factor_of_interest = PFI.2 ) %>% scale_abun
 	tt_scaled
 }
 plot_MDS = function(){
-	tt_mds = tt_scaled %>% 	reduce_dimensions(method = "MDS", .dims = 3)
+	tt_mds = tt_scaled %>% 	reduce_dimensions(assay = "counts_scaled", method = "MDS", .dims = 3)
 # Visualise MDS for cell types
 p = tt_mds %>%
 	select(contains("Dim"), patient, PFI.2) %>%
@@ -45,7 +45,7 @@ plot_adjusted_MDS = function(){
 
 		adjust_abundance( ~  PFI.2 + batch) %>%
 		filter(`count_scaled_adjusted` %>% is.na %>% `!`) %>%
-		reduce_dimensions(.abundance = `count_scaled_adjusted`,	method = "MDS",	.dims = 3) %>%
+		reduce_dimensions(assay = "counts_scaled_adjusted",	method = "MDS",	.dims = 3) %>%
 
 		# Plot
 		select(contains("Dim"), patient, PFI.2,  batch) %>%
 
@@ -55,7 +55,7 @@ tt_scaled = tt %>% identify_abundant(factor_of_interest = condition) %>% scale_a
 	tt_scaled
 }
 plot_MDS = function(){
-	tt_mds = tt_scaled %>% 	reduce_dimensions(method = "MDS", .dims = 3)
+	tt_mds = tt_scaled %>% 	reduce_dimensions(assay = "counts_scaled", method = "MDS", .dims = 3)
 # Visualise MDS for cell types
 p = tt_mds %>%
 	select(contains("Dim"), sample, type,  condition) %>%
@@ -69,7 +69,7 @@ plot_adjusted_MDS = function(){
  p = tt_mds %>%
 	adjust_abundance( ~ condition + type) %>%
 	filter(`count_scaled_adjusted` %>% is.na %>% `!`) %>%
-	reduce_dimensions(.abundance = `count_scaled_adjusted`,	method = "MDS",	.dims = 3) %>%
+	reduce_dimensions(assay = "counts_scaled_adjusted",	method = "MDS",	.dims = 3) %>%
 
 	# Plot
 	select(contains("Dim"), sample, type,  condition) %>%
 
@@ -187,7 +187,7 @@ Tidy transcriptomics
 ```{r mds, cache=TRUE}
 tt.norm.MDS =
   tt.norm %>%
-  reduce_dimensions(method="MDS", .dims = 2)
+  reduce_dimensions(assay = "counts_scaled", method="MDS", .dims = 2)
 
 ```
 </div>
@@ -218,7 +218,7 @@ Tidy transcriptomics
 ```{r pca, cache=TRUE, message=FALSE, warning=FALSE, results='hide'}
 tt.norm.PCA =
   tt.norm %>%
-  reduce_dimensions(method="PCA", .dims = 2)
+  reduce_dimensions(assay = "counts_scaled", method="PCA", .dims = 2)
 ```
 </div>
 <div class="column-right">
@@ -245,6 +245,7 @@ tt.norm.tSNE =
 	tidybulk(		sample, ens, count_scaled) %>%
 	identify_abundant() %>%
 	reduce_dimensions(
+		assay = "count_scaled",
 		method = "tSNE",
 		perplexity=10,
 		pca_scale =TRUE
 
@@ -107,7 +107,7 @@ tt_scaled %>%
 
 ```{r}
 # Reduce data dimensionality with arbitrary number of dimensions
-tt_mds <- tt_scaled %>% reduce_dimensions(method="MDS", .dims = 3)
+tt_mds <- tt_scaled %>% reduce_dimensions(assay = "counts_scaled", method="MDS", .dims = 3)
 
 # Plot all-vs-all MDS dimensions 
 tt_mds %>%
@@ -127,8 +127,8 @@ tt_mds_adj_mds <-
 
     # Calculate reduced dimensions on the adjusted counts as well
     reduce_dimensions(
-       .abundance = count_scaled_adjusted, 
-       method="MDS", .dim = 3
+       assay = "counts_scaled_adjusted",
+       method="MDS", .dims = 3
     ) 
 
 
 
@@ -66,7 +66,7 @@ p1 =
 	ylab("Density") +
 	my_theme
 
-tt_mds = tt_scaled %>% 	reduce_dimensions(method="MDS", .dims = 3)
+tt_mds = tt_scaled %>% 	reduce_dimensions(assay = "counts_scaled", method="MDS", .dims = 3)
 
 # Visualise MDS for cell types
 p2 =
@@ -111,7 +111,7 @@ tt_adj = tt_mds %>%	adjust_abundance(~ condition + type)
 p3 =
 	tt_adj %>%
 	filter( `count_scaled_adjusted` %>% is.na %>% `!`) %>%
-	reduce_dimensions(.abundance = `count_scaled_adjusted`, method="MDS", .dims = 3) %>%
+	reduce_dimensions(assay = "counts_scaled_adjusted", method="MDS", .dims = 3) %>%
 
 	# Plot
 	select(contains("Dim"), sample, type,  condition ) %>%
 
@@ -1,6 +1,11 @@
 \name{NEWS}
 \title{News for Package \pkg{tidybulk}}
 
+\section{Changes in development version}{
+\itemize{
+    \item \strong{BREAKING CHANGE:} \code{reduce_dimensions()} no longer accepts \code{.abundance} (a tidy-eval assay symbol). The assay must be given explicitly as a character string via \code{assay = "..."}, matching \code{assayNames(object)}. Omitting \code{assay} or passing an unknown assay name raises an error, so users must deliberately choose which abundance matrix to use (commonly a library-size scaled assay from \code{scale_abundance()}). Update all calls, tests, and vignettes that used \code{.abundance = counts} to \code{assay = "counts"} (or the appropriate assay name).
+}}
+
 \section{Changes in version 1.2.0, Bioconductor 3.12 Release}{
 \itemize{
     \item Make gene filtering functionality `identify_abundance` explicit, a warning will be given if this has not been performed before the majority of workflow steps (e.g. `test_differential_abundance`).