@@ -437,23 +437,82 @@ update_SE_from_tibble <- function(.data_mutated, se, column_belonging = NULL) {
437437 se
438438}
439439# ' @importFrom methods is
440- slice_optimised <- function (.data , ... , .preserve = FALSE ) {
440+ # ' @keywords internal
441+ # ' @param slice_args List of arguments passed to slice
442+ # ' @param .data The data object to check for grouping
443+ # ' @param .by The .by argument from slice
444+ # ' @return Logical indicating if this is an ungrouped range slice
445+ # ' @noRd
446+ is_range_slice_ungrouped_detected <- function (.data , slice_args , .by = NULL ) {
447+ # First check if this is a range slice
448+ is_range_slice <- any(sapply(slice_args , function (x ) {
449+ if (is.numeric(x )) {
450+ length(x ) > 1
451+ } else {
452+ FALSE
453+ }
454+ }))
455+
456+ # If not a range slice, return FALSE (no error needed)
457+ if (! is_range_slice ) {
458+ return (FALSE )
459+ }
460+
461+ # If it is a range slice, check if the data is grouped
462+ # Check both .by parameter and if the data itself is grouped
463+ is_grouped_by_param <- ! quo_is_null(.by )
464+ is_grouped_by_data <- inherits(.data , " grouped_df" ) ||
465+ (inherits(.data , " SummarizedExperiment" ) && ! is.null(attr(.data , " groups" )))
466+
467+ is_grouped <- is_grouped_by_param || is_grouped_by_data
468+
469+ # Return TRUE if ungrouped (should throw error)
470+ ! is_grouped
471+ }
472+
473+ slice_optimised <- function (.data , ... , .by = NULL , .preserve = FALSE ) {
441474
442475 . <- NULL
443476
477+ .by <- enquo(.by )
478+
479+ # For slice operations that might select multiple rows, we need to handle the case where
480+ # the result cannot maintain the SummarizedExperiment structure
481+ # In this case, we should return a tibble with the equivalent data
482+ # Check if this is an ungrouped range slice
483+ slice_args <- list (... )
484+ if (is_range_slice_ungrouped_detected(.data , slice_args , .by )) {
485+ # For range slices on ungrouped data, throw an error with a helpful message
486+ stop(" tidySummarizedExperiment says: slice using a range doesn't work on ungrouped data. Please use .by parameter or convert to tibble with as_tibble() before using slice with ranges." )
487+ }
488+
489+
490+ # For single row slices, use the original optimized approach
444491 # This simulated tibble only gets samples and features so we know those that have been completely omitted already
445492 # In order to save time for the as_tibble conversion
446493 simulated_slice =
447- simulate_feature_sample_from_tibble(.data ) %> %
448- dplyr :: slice(... , .preserve = .preserve )
494+ simulate_feature_sample_from_tibble(.data , !! .by ) %> %
495+ dplyr :: slice(... , .by = !! .by , . preserve = .preserve )
449496
497+ # Remove .by column only if it's not a special column
498+ if (! rlang :: quo_is_null(.by ) && ! quo_name(.by ) %in% c(f_(.data )$ name , s_(.data )$ name )) {
499+ simulated_slice <- simulated_slice %> % select(- !! .by )
500+ }
501+
450502 .data =
451503 .data %> %
452504
453505 # Subset the object for samples and features present in the simulated data
454- . [rownames(. ) %in% simulated_slice [,f_(. )$ name ],
455- colnames(. ) %in% simulated_slice [,s_(. )$ name ]] %> %
456- inner_join(simulated_slice , by = c(f_(. )$ name , s_(. )$ name ))
506+ . [rownames(.data ) %in% simulated_slice [,f_(.data )$ name ][[1 ]],
507+ colnames(.data ) %in% simulated_slice [,s_(.data )$ name ][[1 ]]
508+ ]
509+ .data = .data %> %
510+ inner_join(simulated_slice , by = c(f_(.data )$ name , s_(.data )$ name ))
511+
512+ # If the result is already a tibble (due to mixed scope join), return it directly
513+ if (.data %> % is(" tbl" )) {
514+ return (.data )
515+ }
457516
458517 # If order do not match with the one proposed by slice convert to tibble
459518 if (.data %> % is(" tbl" ) %> % not()) {
@@ -1048,13 +1107,103 @@ get_special_column_name_symbol <- function(name) {
10481107# This function produce artificially the feature ans sample column,
10491108# to make optimisation before as_tibble is called
10501109# for big datasets
1051- simulate_feature_sample_from_tibble <- function (.data ) {
1110+ simulate_feature_sample_from_tibble <- function (.data , ... ) {
10521111
10531112 . <- NULL
10541113 r <- rownames(.data ) %> % . [rep(1 : length(. ), ncol(.data ) )]
10551114 c <- colnames(.data ) %> % . [rep(1 : length(. ), each = nrow(.data ) )]
10561115
1057- tibble(!! f_(.data )$ symbol : = r , !! s_(.data )$ symbol : = c )
1116+ # Create base tibble with feature and sample columns
1117+ result_tibble <- tibble(!! f_(.data )$ symbol : = r , !! s_(.data )$ symbol : = c )
1118+
1119+ # BELOW IS THE MORE COMPLEX GROUP CASE
1120+ # THAT SLOWS DOWN 10x
1121+ # IN THE FUTURE THIS COULD BE OPTIMISED
1122+
1123+ # Store original dimensions for use in rowData/colData processing
1124+ n_features_orig <- nrow(.data )
1125+ n_samples_orig <- ncol(.data )
1126+ feature_names_orig <- rownames(.data )
1127+ sample_names_orig <- colnames(.data )
1128+
1129+ # Store column names to avoid issues with .data references
1130+ feature_col_name <- f_(.data )$ name
1131+ sample_col_name <- s_(.data )$ name
1132+ feature_col_symbol <- f_(.data )$ symbol
1133+ sample_col_symbol <- s_(.data )$ symbol
1134+
1135+ # Check if ... parameters reference columns in rowData or colData
1136+ # and attach only those columns to the output
1137+
1138+ # Extract column names from ... parameters
1139+ slice_colnames <- get_ellipse_colnames(... )
1140+
1141+ # Filter out any non-character elements (like quosures)
1142+ slice_colnames <- slice_colnames [is.character(slice_colnames )]
1143+
1144+ # Only proceed if there are slice parameters
1145+ if (length(slice_colnames ) > 0 ) {
1146+ # Get available rowData and colData column names
1147+ rowdata_colnames <- if (.hasSlot(.data , " rowData" ) | .hasSlot(.data , " elementMetadata" )) {
1148+ colnames(rowData(.data ))
1149+ } else {
1150+ character (0 )
1151+ }
1152+
1153+ coldata_colnames <- colnames(colData(.data ))
1154+
1155+ # Check which slice columns are in rowData
1156+ rowdata_matches <- intersect(slice_colnames , rowdata_colnames )
1157+
1158+ # Check which slice columns are in colData
1159+ coldata_matches <- intersect(slice_colnames , coldata_colnames )
1160+
1161+ # Add rowData columns if they are referenced in ...
1162+ if (length(rowdata_matches ) > 0 ) {
1163+ # Create rowData tibble with repeated rows for each sample
1164+ rowdata_df <- as.data.frame(rowData(.data )[, rowdata_matches , drop = FALSE ])
1165+ rowdata_df [[feature_col_name ]] <- rownames(rowData(.data ))
1166+
1167+ # Create expanded tibble more efficiently
1168+ rowdata_tibble <- tibble(
1169+ !! feature_col_symbol : = rep(rowdata_df [[feature_col_name ]], each = n_samples_orig ),
1170+ !! sample_col_symbol : = rep(sample_names_orig , n_features_orig )
1171+ )
1172+
1173+ # Add the rowData columns
1174+ for (col in rowdata_matches ) {
1175+ rowdata_tibble [[col ]] <- rep(rowdata_df [[col ]], each = n_samples_orig )
1176+ }
1177+
1178+ # Join with result tibble
1179+ result_tibble <- result_tibble %> %
1180+ left_join(rowdata_tibble , by = c(feature_col_name , sample_col_name ))
1181+ }
1182+
1183+ # Add colData columns if they are referenced in ...
1184+ if (length(coldata_matches ) > 0 ) {
1185+ # Create colData tibble with repeated columns for each feature
1186+ coldata_df <- as.data.frame(colData(.data )[, coldata_matches , drop = FALSE ])
1187+ coldata_df [[sample_col_name ]] <- rownames(colData(.data ))
1188+
1189+ # Create expanded tibble more efficiently
1190+ coldata_tibble <- tibble(
1191+ !! feature_col_symbol : = rep(feature_names_orig , each = n_samples_orig ),
1192+ !! sample_col_symbol : = rep(coldata_df [[sample_col_name ]], n_features_orig )
1193+ )
1194+
1195+ # Add the colData columns
1196+ for (col in coldata_matches ) {
1197+ coldata_tibble [[col ]] <- rep(coldata_df [[col ]], n_features_orig )
1198+ }
1199+
1200+ # Join with result tibble
1201+ result_tibble <- result_tibble %> %
1202+ left_join(coldata_tibble , by = c(feature_col_name , sample_col_name ))
1203+ }
1204+ }
1205+
1206+ return (result_tibble )
10581207}
10591208
10601209feature__ <- get_special_column_name_symbol(" .feature" )
0 commit comments