Skip to content

Commit 7d03660

Browse files
authored
Merge pull request #111 from tidyomics/fix-slice
Fix slice
2 parents d16eb22 + cd62829 commit 7d03660

File tree

6 files changed

+329
-16
lines changed

6 files changed

+329
-16
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: tidySummarizedExperiment
33
Title: Brings SummarizedExperiment to the Tidyverse
4-
Version: 1.19.7
4+
Version: 1.19.8
55
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
66
role = c("aut", "cre")) )
77
Author: Stefano Mangiola [aut, cre] <[email protected]>
@@ -60,7 +60,7 @@ Biarch: true
6060
biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
6161
Encoding: UTF-8
6262
LazyData: true
63-
RoxygenNote: 7.3.2
63+
RoxygenNote: 7.3.3
6464
Roxygen: list(markdown = TRUE)
6565
LazyDataCompression: xz
6666
URL: https://github.com/stemangiola/tidySummarizedExperiment

R/dplyr_methods.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,11 @@ rowwise.SummarizedExperiment <- function(data, ...) {
371371
#'
372372
#' Wickham, H., François, R., Henry, L., Müller, K., Vaughan, D. (2023). dplyr: A Grammar of Data Manipulation. R package version 2.1.4, https://CRAN.R-project.org/package=dplyr
373373
#' @export
374-
slice.SummarizedExperiment <- function(.data, ..., .preserve=FALSE) {
374+
slice.SummarizedExperiment <- function(.data, ..., .by = NULL, .preserve = FALSE) {
375375

376-
slice_optimised(.data, ..., .preserve=.preserve)
376+
.by <- enquo(.by)
377+
378+
slice_optimised(.data, ..., .by = !!.by, .preserve = .preserve)
377379

378380
# .data |>
379381
# as_tibble(skip_GRanges = T) |>

R/utilities.R

Lines changed: 157 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -437,23 +437,82 @@ update_SE_from_tibble <- function(.data_mutated, se, column_belonging = NULL) {
437437
se
438438
}
439439
#' @importFrom methods is
440-
slice_optimised <- function(.data, ..., .preserve=FALSE) {
440+
#' @keywords internal
441+
#' @param slice_args List of arguments passed to slice
442+
#' @param .data The data object to check for grouping
443+
#' @param .by The .by argument from slice
444+
#' @return Logical indicating if this is an ungrouped range slice
445+
#' @noRd
446+
is_range_slice_ungrouped_detected <- function(.data, slice_args, .by = NULL) {
447+
# First check if this is a range slice
448+
is_range_slice <- any(sapply(slice_args, function(x) {
449+
if (is.numeric(x)) {
450+
length(x) > 1
451+
} else {
452+
FALSE
453+
}
454+
}))
455+
456+
# If not a range slice, return FALSE (no error needed)
457+
if (!is_range_slice) {
458+
return(FALSE)
459+
}
460+
461+
# If it is a range slice, check if the data is grouped
462+
# Check both .by parameter and if the data itself is grouped
463+
is_grouped_by_param <- !quo_is_null(.by)
464+
is_grouped_by_data <- inherits(.data, "grouped_df") ||
465+
(inherits(.data, "SummarizedExperiment") && !is.null(attr(.data, "groups")))
466+
467+
is_grouped <- is_grouped_by_param || is_grouped_by_data
468+
469+
# Return TRUE if ungrouped (should throw error)
470+
!is_grouped
471+
}
472+
473+
slice_optimised <- function(.data, ..., .by = NULL, .preserve = FALSE) {
441474

442475
. <- NULL
443476

477+
.by <- enquo(.by)
478+
479+
# For slice operations that might select multiple rows, we need to handle the case where
480+
# the result cannot maintain the SummarizedExperiment structure
481+
# In this case, we should return a tibble with the equivalent data
482+
# Check if this is an ungrouped range slice
483+
slice_args <- list(...)
484+
if (is_range_slice_ungrouped_detected(.data, slice_args, .by)) {
485+
# For range slices on ungrouped data, throw an error with a helpful message
486+
stop("tidySummarizedExperiment says: slice using a range doesn't work on ungrouped data. Please use .by parameter or convert to tibble with as_tibble() before using slice with ranges.")
487+
}
488+
489+
490+
# For single row slices, use the original optimized approach
444491
# This simulated tibble only gets samples and features so we know those that have been completely omitted already
445492
# In order to save time for the as_tibble conversion
446493
simulated_slice =
447-
simulate_feature_sample_from_tibble(.data) %>%
448-
dplyr::slice(..., .preserve = .preserve)
494+
simulate_feature_sample_from_tibble(.data, !!.by) %>%
495+
dplyr::slice(..., .by = !!.by, .preserve = .preserve)
449496

497+
# Remove .by column only if it's not a special column
498+
if (!rlang::quo_is_null(.by) && !quo_name(.by) %in% c(f_(.data)$name, s_(.data)$name)) {
499+
simulated_slice <- simulated_slice %>% select(-!!.by)
500+
}
501+
450502
.data =
451503
.data %>%
452504

453505
# Subset the object for samples and features present in the simulated data
454-
.[rownames(.) %in% simulated_slice[,f_(.)$name],
455-
colnames(.) %in% simulated_slice[,s_(.)$name]] %>%
456-
inner_join(simulated_slice, by = c(f_(.)$name, s_(.)$name))
506+
.[rownames(.data) %in% simulated_slice[,f_(.data)$name][[1]],
507+
colnames(.data) %in% simulated_slice[,s_(.data)$name][[1]]
508+
]
509+
.data = .data %>%
510+
inner_join(simulated_slice, by = c(f_(.data)$name, s_(.data)$name))
511+
512+
# If the result is already a tibble (due to mixed scope join), return it directly
513+
if (.data %>% is("tbl")) {
514+
return(.data)
515+
}
457516

458517
# If order do not match with the one proposed by slice convert to tibble
459518
if (.data %>% is("tbl") %>% not()) {
@@ -1048,13 +1107,103 @@ get_special_column_name_symbol <- function(name) {
10481107
# This function produce artificially the feature ans sample column,
10491108
# to make optimisation before as_tibble is called
10501109
# for big datasets
1051-
simulate_feature_sample_from_tibble <- function(.data) {
1110+
simulate_feature_sample_from_tibble <- function(.data, ...) {
10521111

10531112
. <- NULL
10541113
r <- rownames(.data) %>% .[rep(1:length(.), ncol(.data) )]
10551114
c <- colnames(.data) %>% .[rep(1:length(.), each = nrow(.data) )]
10561115

1057-
tibble(!!f_(.data)$symbol := r, !!s_(.data)$symbol := c)
1116+
# Create base tibble with feature and sample columns
1117+
result_tibble <- tibble(!!f_(.data)$symbol := r, !!s_(.data)$symbol := c)
1118+
1119+
# BELOW IS THE MORE COMPLEX GROUP CASE
1120+
# THAT SLOWS DOWN 10x
1121+
# IN THE FUTURE THIS COULD BE OPTIMISED
1122+
1123+
# Store original dimensions for use in rowData/colData processing
1124+
n_features_orig <- nrow(.data)
1125+
n_samples_orig <- ncol(.data)
1126+
feature_names_orig <- rownames(.data)
1127+
sample_names_orig <- colnames(.data)
1128+
1129+
# Store column names to avoid issues with .data references
1130+
feature_col_name <- f_(.data)$name
1131+
sample_col_name <- s_(.data)$name
1132+
feature_col_symbol <- f_(.data)$symbol
1133+
sample_col_symbol <- s_(.data)$symbol
1134+
1135+
# Check if ... parameters reference columns in rowData or colData
1136+
# and attach only those columns to the output
1137+
1138+
# Extract column names from ... parameters
1139+
slice_colnames <- get_ellipse_colnames(...)
1140+
1141+
# Filter out any non-character elements (like quosures)
1142+
slice_colnames <- slice_colnames[is.character(slice_colnames)]
1143+
1144+
# Only proceed if there are slice parameters
1145+
if (length(slice_colnames) > 0) {
1146+
# Get available rowData and colData column names
1147+
rowdata_colnames <- if (.hasSlot(.data, "rowData") | .hasSlot(.data, "elementMetadata")) {
1148+
colnames(rowData(.data))
1149+
} else {
1150+
character(0)
1151+
}
1152+
1153+
coldata_colnames <- colnames(colData(.data))
1154+
1155+
# Check which slice columns are in rowData
1156+
rowdata_matches <- intersect(slice_colnames, rowdata_colnames)
1157+
1158+
# Check which slice columns are in colData
1159+
coldata_matches <- intersect(slice_colnames, coldata_colnames)
1160+
1161+
# Add rowData columns if they are referenced in ...
1162+
if (length(rowdata_matches) > 0) {
1163+
# Create rowData tibble with repeated rows for each sample
1164+
rowdata_df <- as.data.frame(rowData(.data)[, rowdata_matches, drop = FALSE])
1165+
rowdata_df[[feature_col_name]] <- rownames(rowData(.data))
1166+
1167+
# Create expanded tibble more efficiently
1168+
rowdata_tibble <- tibble(
1169+
!!feature_col_symbol := rep(rowdata_df[[feature_col_name]], each = n_samples_orig),
1170+
!!sample_col_symbol := rep(sample_names_orig, n_features_orig)
1171+
)
1172+
1173+
# Add the rowData columns
1174+
for (col in rowdata_matches) {
1175+
rowdata_tibble[[col]] <- rep(rowdata_df[[col]], each = n_samples_orig)
1176+
}
1177+
1178+
# Join with result tibble
1179+
result_tibble <- result_tibble %>%
1180+
left_join(rowdata_tibble, by = c(feature_col_name, sample_col_name))
1181+
}
1182+
1183+
# Add colData columns if they are referenced in ...
1184+
if (length(coldata_matches) > 0) {
1185+
# Create colData tibble with repeated columns for each feature
1186+
coldata_df <- as.data.frame(colData(.data)[, coldata_matches, drop = FALSE])
1187+
coldata_df[[sample_col_name]] <- rownames(colData(.data))
1188+
1189+
# Create expanded tibble more efficiently
1190+
coldata_tibble <- tibble(
1191+
!!feature_col_symbol := rep(feature_names_orig, each = n_samples_orig),
1192+
!!sample_col_symbol := rep(coldata_df[[sample_col_name]], n_features_orig)
1193+
)
1194+
1195+
# Add the colData columns
1196+
for (col in coldata_matches) {
1197+
coldata_tibble[[col]] <- rep(coldata_df[[col]], n_features_orig)
1198+
}
1199+
1200+
# Join with result tibble
1201+
result_tibble <- result_tibble %>%
1202+
left_join(coldata_tibble, by = c(feature_col_name, sample_col_name))
1203+
}
1204+
}
1205+
1206+
return(result_tibble)
10581207
}
10591208

10601209
feature__ <- get_special_column_name_symbol(".feature")

man/slice.Rd

Lines changed: 7 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tidySummarizedExperiment-package.Rd

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)