diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 5477215..f023f74 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -34,6 +34,7 @@ jobs: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true + extra-repositories: 'https://bnprks.r-universe.dev' - uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/DESCRIPTION b/DESCRIPTION index 17830e9..5a6b958 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,8 @@ RoxygenNote: 7.3.2 Suggests: testthat (>= 3.0.0), Matrix, - SeuratObject (>= 5.0.0) + SeuratObject (>= 5.0.0), + BPCells Config/testthat/edition: 3 Imports: methods, diff --git a/R/hdf5.R b/R/hdf5.R index 9a568d0..4ca879f 100644 --- a/R/hdf5.R +++ b/R/hdf5.R @@ -25,11 +25,14 @@ create_hdf5 <- function( if (file.exists(h5path)) { return(err(sprintf("cannot create h5 file as it already exists: %s", h5path))) } - - # create hdf5 file and matrix groups - f <- hdf5r::H5File$new(h5path, mode = "w") - - write_mat(f, count_mat, feature_ids) + if (inherits(count_mat, "IterableMatrix")) { + write_mat_bpcells(h5path, count_mat, feature_ids) + f <- hdf5r::H5File$new(h5path, mode = "r+") + } else { + # create hdf5 file and matrix groups + f <- hdf5r::H5File$new(h5path, mode = "w") + write_mat(f, count_mat, feature_ids) + } write_clusters(f, clusters) write_projections(f, projections) write_metadata(f, seurat_obj_version) @@ -79,6 +82,56 @@ write_mat <- function(f, count_mat, feature_ids) { features_group$close() } +#' Writes BPCells matrix to the H5 file +#' +#' @param h5path Path to a new H5 file +#' @param count_mat A sparse matrix inheriting from \code{IterableMatrix} from \pkg{BPCells}. +#' Rows are features, Columns are barcodes. +#' @param feature_ids optional character vector that specifies the feature ids of the count matrix. +#' Typically, these are the ensemble ids. +#' +#' @note +#' This function will check whether \pkg{BPCells} is installed. +#' +#' @noRd +write_mat_bpcells <- function(h5path, count_mat, feature_ids) { + if (!requireNamespace("BPCells", quietly = TRUE)) { + stop( + "Please install 'BPCells' to write IterableMatrix:\n", + " remotes::install_github('bnprks/BPCells/r')" + ) + } + features <- rownames(count_mat) + barcodes <- colnames(count_mat) + if (length(feature_ids) == 0) { + feature_ids <- rownames(count_mat) + } + if (!BPCells::matrix_type(count_mat) == "uint32_t") { + count_mat <- BPCells::convert_matrix_type(count_mat) + } + count_mat <- BPCells::write_matrix_10x_hdf5(count_mat, path = h5path) + + # Need to rewrite the barcodes and features to use strings with max length + f <- hdf5r::H5File$new(h5path, mode = "r+") + + matrix_group <- f$open("matrix") + hdf5r::h5unlink(matrix_group, "features") + hdf5r::h5unlink(matrix_group, "barcodes") + + create_str_dataset(matrix_group, "barcodes", barcodes) + features_group <- matrix_group$create_group("features") + + create_str_dataset(features_group, "name", features) + create_str_dataset(features_group, "id", as.character(feature_ids)) + create_str_dataset(features_group, "feature_type", rep("Gene Expression", length(features))) + create_str_dataset(features_group, "_all_tag_keys", as.character()) # required features + features_group$close() + matrix_group$close() + + f$close_all() # make sure everything closed +} + + #' Prints the metadata list to stdout. #' #' @param metadata The metadata list diff --git a/R/validate.R b/R/validate.R index 735d461..ea6fe99 100644 --- a/R/validate.R +++ b/R/validate.R @@ -15,8 +15,8 @@ #' #' @export validate_count_mat <- function(count_mat, feature_ids = NULL) { # nolint: cyclocomp_linter. - if (!is(count_mat, "dgCMatrix")) { - return(err("count_mat must be a dgCMatrix")) + if (!is(count_mat, "dgCMatrix") && !inherits(count_mat, "IterableMatrix")) { + return(err("count_mat must be a dgCMatrix or IterableMatrix")) } features <- rownames(count_mat) @@ -34,11 +34,13 @@ validate_count_mat <- function(count_mat, feature_ids = NULL) { # nolint: cycloc if (length(barcodes) == 0) { return(err("count_mat must have at least one barcode")) } - if (any(is.nan(count_mat@x))) { - return(err("matrix values must not be NaN")) - } - if (any(is.infinite(count_mat@x))) { - return(err("matrix values must not be infinite")) + if (inherits(count_mat, "dgCMatrix")) { + if (any(is.nan(count_mat@x))) { + return(err("matrix values must not be NaN")) + } + if (any(is.infinite(count_mat@x))) { + return(err("matrix values must not be infinite")) + } } if (!all(sapply(barcodes, nzchar))) { return(err("barcodes cannot be the empty string")) diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index d678ebb..eacf0c8 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -27,6 +27,32 @@ create_count_mat <- function(rows, cols, valid_barcodes = FALSE) { mat } +#' Create a sparse count_mat with BPCells +#' +#' @importFrom Matrix rsparsematrix +create_count_mat_bpcells <- function(rows, cols, valid_barcodes = FALSE) { + mat <- Matrix::rsparsematrix(rows, cols, 0.5, rand.x = function(n) as.integer(100 * runif(n))) + + rownames <- as.character() + if (rows > 0) { + rownames <- paste0("row", 1:rows) + } + + colnames <- as.character() + if (cols > 0) { + if (valid_barcodes) { + colnames <- lapply(rep(14, cols), random_barcode) + } else { + colnames <- paste0("col", 1:cols) + } + } + + dimnames(mat) <- list(rownames, colnames) + + mat_dir <- tempfile(pattern = "bpcells") + BPCells::write_matrix_dir(mat, mat_dir) +} + #' Create a dimensional reduction (projection) object create_dim_reduction <- function(count_mat, key) { barcode_count <- dim(count_mat)[2] diff --git a/tests/testthat/test-hdf5.R b/tests/testthat/test-hdf5.R index 0c6ca29..4a782d0 100644 --- a/tests/testthat/test-hdf5.R +++ b/tests/testthat/test-hdf5.R @@ -51,6 +51,63 @@ test_that("can create hdf5", { expect(!is.null(hdf5r::readDataSet(louper_seurat_version)), "extra field is missing") }) +test_that("can create hdf5 with BPCells", { + skip_if_not_installed("BPCells") + + barcode_count <- 5 + proj <- create_dense_mat(barcode_count, 2) + count_mat <- create_count_mat_bpcells(100, barcode_count) + feature_ids <- paste0("Add_", rownames(count_mat)) + + clusters <- list("f1" = factor(c("a", "c", "b", "a", "b"), levels = c("a", "b", "c"), ordered = TRUE)) + projections <- list("p1" = proj) + h5path <- sprintf("%s.h5", tempfile()) + + seurat_obj_version <- "1.2.3" + create_hdf5(count_mat, clusters, projections, h5path, feature_ids, seurat_obj_version) + + count_mat <- as(count_mat, "dgCMatrix") + f <- hdf5r::h5file(h5path) + + # spot check matrix + matrix_group <- hdf5r::openGroup(f, "matrix") + barcodes <- hdf5r::openLocation(matrix_group, "barcodes") + data <- hdf5r::openLocation(matrix_group, "data") + expect_equal(hdf5r::readDataSet(barcodes), paste0("col", 1:barcode_count)) + expect_equal(hdf5r::readDataSet(data), count_mat@x) + + features_group <- hdf5r::openGroup(matrix_group, "features") + feature_names <- hdf5r::openGroup(features_group, "name") + feature_ids_group <- hdf5r::openGroup(features_group, "id") + expect_equal(hdf5r::readDataSet(feature_names), rownames(count_mat)) + expect_equal(hdf5r::readDataSet(feature_ids_group), feature_ids) + + # spot check projections + projs_group <- hdf5r::openGroup(f, "projections") + proj_group <- hdf5r::openGroup(projs_group, "p1") + proj_dataset <- hdf5r::openLocation(proj_group, "data") + expect_equal(proj, hdf5r::readDataSet(proj_dataset)) + + # spot check clusters + clusters_group <- hdf5r::openGroup(f, "clusters") + cluster_group <- hdf5r::openGroup(clusters_group, "f1") + assignments <- hdf5r::openLocation(cluster_group, "assignments") + group_names <- hdf5r::openLocation(cluster_group, "group_names") + expect_equal(hdf5r::readDataSet(assignments), clusters[[1]]@.Data - 1) + expect_equal(hdf5r::readDataSet(group_names), levels(clusters[[1]])) + + # spot check metadata + metadata <- hdf5r::openGroup(f, "metadata") + + tool <- hdf5r::openLocation(metadata, "tool") + expect_equal(hdf5r::readDataSet(tool), "loupeR") + + extra <- hdf5r::openGroup(metadata, "extra") + louper_seurat_version <- hdf5r::openLocation(extra, "loupeR_seurat_version") + val <- hdf5r::readDataSet(louper_seurat_version) + expect(!is.null(hdf5r::readDataSet(louper_seurat_version)), "extra field is missing") +}) + test_that("can create hdf5 custom feature ids", { barcode_count <- 5 count_mat <- create_count_mat(3, barcode_count) diff --git a/tests/testthat/test-lib.R b/tests/testthat/test-lib.R index db97b45..883aa04 100644 --- a/tests/testthat/test-lib.R +++ b/tests/testthat/test-lib.R @@ -59,6 +59,29 @@ test_that("can run create_loupe", { expect(x, "create_loupe returns TRUE") }) +test_that("can run create_loupe with BPCells", { + barcode_count <- 5 + count_mat <- create_count_mat_bpcells(100, barcode_count, valid_barcodes = TRUE) + proj <- create_dense_mat(barcode_count, 2) + projections <- list("p1" = proj) + + cluster <- factor( + c("a", "c", "b", "a", "b"), + levels = c("a", "b", "c"), + ordered = TRUE + ) + clusters <- list("f1" = cluster) + + x <- create_loupe( + count_mat, + clusters = clusters, + projections = projections, + executable_path = get_executable_path() + ) + + expect(x, "create_loupe returns TRUE") +}) + test_that("can run create_loupe with integer projection matrix", { barcode_count <- 5 count_mat <- create_count_mat(100, barcode_count, valid_barcodes = TRUE)