Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true
extra-repositories: 'https://bnprks.r-universe.dev'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied what Seurat-Object is doing to unbreak the CI builds


- uses: r-lib/actions/setup-r-dependencies@v2
with:
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ RoxygenNote: 7.3.2
Suggests:
testthat (>= 3.0.0),
Matrix,
SeuratObject (>= 5.0.0)
SeuratObject (>= 5.0.0),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might also need to add a REMOTES section so that the CI knows where/how to download bpcells.

Remotes:
    github::bnprks/BPCells/r

BPCells
Config/testthat/edition: 3
Imports:
methods,
Expand Down
63 changes: 58 additions & 5 deletions R/hdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ create_hdf5 <- function(
if (file.exists(h5path)) {
return(err(sprintf("cannot create h5 file as it already exists: %s", h5path)))
}

# create hdf5 file and matrix groups
f <- hdf5r::H5File$new(h5path, mode = "w")

write_mat(f, count_mat, feature_ids)
if (inherits(count_mat, "IterableMatrix")) {
write_mat_bpcells(h5path, count_mat, feature_ids)
f <- hdf5r::H5File$new(h5path, mode = "r+")
} else {
# create hdf5 file and matrix groups
f <- hdf5r::H5File$new(h5path, mode = "w")
write_mat(f, count_mat, feature_ids)
}
write_clusters(f, clusters)
write_projections(f, projections)
write_metadata(f, seurat_obj_version)
Expand Down Expand Up @@ -79,6 +82,56 @@ write_mat <- function(f, count_mat, feature_ids) {
features_group$close()
}

#' Writes BPCells matrix to the H5 file
#'
#' @param h5path Path to a new H5 file
#' @param count_mat A sparse matrix inheriting from \code{IterableMatrix} from \pkg{BPCells}.
#' Rows are features, Columns are barcodes.
#' @param feature_ids optional character vector that specifies the feature ids of the count matrix.
#' Typically, these are the ensemble ids.
#'
#' @note
#' This function will check whether \pkg{BPCells} is installed.
#'
#' @noRd
write_mat_bpcells <- function(h5path, count_mat, feature_ids) {
if (!requireNamespace("BPCells", quietly = TRUE)) {
stop(
"Please install 'BPCells' to write IterableMatrix:\n",
" remotes::install_github('bnprks/BPCells/r')"
)
}
features <- rownames(count_mat)
barcodes <- colnames(count_mat)
if (length(feature_ids) == 0) {
feature_ids <- rownames(count_mat)
}
if (!BPCells::matrix_type(count_mat) == "uint32_t") {
count_mat <- BPCells::convert_matrix_type(count_mat)
}
count_mat <- BPCells::write_matrix_10x_hdf5(count_mat, path = h5path)

# Need to rewrite the barcodes and features to use strings with max length
f <- hdf5r::H5File$new(h5path, mode = "r+")

matrix_group <- f$open("matrix")
hdf5r::h5unlink(matrix_group, "features")
hdf5r::h5unlink(matrix_group, "barcodes")

create_str_dataset(matrix_group, "barcodes", barcodes)
features_group <- matrix_group$create_group("features")

create_str_dataset(features_group, "name", features)
create_str_dataset(features_group, "id", as.character(feature_ids))
create_str_dataset(features_group, "feature_type", rep("Gene Expression", length(features)))
create_str_dataset(features_group, "_all_tag_keys", as.character()) # required features
features_group$close()
matrix_group$close()

f$close_all() # make sure everything closed
}


#' Prints the metadata list to stdout.
#'
#' @param metadata The metadata list
Expand Down
16 changes: 9 additions & 7 deletions R/validate.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#'
#' @export
validate_count_mat <- function(count_mat, feature_ids = NULL) { # nolint: cyclocomp_linter.
if (!is(count_mat, "dgCMatrix")) {
return(err("count_mat must be a dgCMatrix"))
if (!is(count_mat, "dgCMatrix") && !inherits(count_mat, "IterableMatrix")) {
return(err("count_mat must be a dgCMatrix or IterableMatrix"))
}

features <- rownames(count_mat)
Expand All @@ -34,11 +34,13 @@ validate_count_mat <- function(count_mat, feature_ids = NULL) { # nolint: cycloc
if (length(barcodes) == 0) {
return(err("count_mat must have at least one barcode"))
}
if (any(is.nan(count_mat@x))) {
return(err("matrix values must not be NaN"))
}
if (any(is.infinite(count_mat@x))) {
return(err("matrix values must not be infinite"))
if (inherits(count_mat, "dgCMatrix")) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we avoiding this check because the matrix will be large? Or is this unsupported.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, only dgCMatrix has the slot x, while IterableMatrix from BPCells does not.

if (any(is.nan(count_mat@x))) {
return(err("matrix values must not be NaN"))
}
if (any(is.infinite(count_mat@x))) {
return(err("matrix values must not be infinite"))
}
}
if (!all(sapply(barcodes, nzchar))) {
return(err("barcodes cannot be the empty string"))
Expand Down
26 changes: 26 additions & 0 deletions tests/testthat/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,32 @@ create_count_mat <- function(rows, cols, valid_barcodes = FALSE) {
mat
}

#' Create a sparse count_mat with BPCells
#'
#' @importFrom Matrix rsparsematrix
create_count_mat_bpcells <- function(rows, cols, valid_barcodes = FALSE) {
mat <- Matrix::rsparsematrix(rows, cols, 0.5, rand.x = function(n) as.integer(100 * runif(n)))

rownames <- as.character()
if (rows > 0) {
rownames <- paste0("row", 1:rows)
}

colnames <- as.character()
if (cols > 0) {
if (valid_barcodes) {
colnames <- lapply(rep(14, cols), random_barcode)
} else {
colnames <- paste0("col", 1:cols)
}
}

dimnames(mat) <- list(rownames, colnames)

mat_dir <- tempfile(pattern = "bpcells")
BPCells::write_matrix_dir(mat, mat_dir)
}

#' Create a dimensional reduction (projection) object
create_dim_reduction <- function(count_mat, key) {
barcode_count <- dim(count_mat)[2]
Expand Down
57 changes: 57 additions & 0 deletions tests/testthat/test-hdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,63 @@ test_that("can create hdf5", {
expect(!is.null(hdf5r::readDataSet(louper_seurat_version)), "extra field is missing")
})

test_that("can create hdf5 with BPCells", {
skip_if_not_installed("BPCells")

barcode_count <- 5
proj <- create_dense_mat(barcode_count, 2)
count_mat <- create_count_mat_bpcells(100, barcode_count)
feature_ids <- paste0("Add_", rownames(count_mat))

clusters <- list("f1" = factor(c("a", "c", "b", "a", "b"), levels = c("a", "b", "c"), ordered = TRUE))
projections <- list("p1" = proj)
h5path <- sprintf("%s.h5", tempfile())

seurat_obj_version <- "1.2.3"
create_hdf5(count_mat, clusters, projections, h5path, feature_ids, seurat_obj_version)

count_mat <- as(count_mat, "dgCMatrix")
f <- hdf5r::h5file(h5path)

# spot check matrix
matrix_group <- hdf5r::openGroup(f, "matrix")
barcodes <- hdf5r::openLocation(matrix_group, "barcodes")
data <- hdf5r::openLocation(matrix_group, "data")
expect_equal(hdf5r::readDataSet(barcodes), paste0("col", 1:barcode_count))
expect_equal(hdf5r::readDataSet(data), count_mat@x)

features_group <- hdf5r::openGroup(matrix_group, "features")
feature_names <- hdf5r::openGroup(features_group, "name")
feature_ids_group <- hdf5r::openGroup(features_group, "id")
expect_equal(hdf5r::readDataSet(feature_names), rownames(count_mat))
expect_equal(hdf5r::readDataSet(feature_ids_group), feature_ids)

# spot check projections
projs_group <- hdf5r::openGroup(f, "projections")
proj_group <- hdf5r::openGroup(projs_group, "p1")
proj_dataset <- hdf5r::openLocation(proj_group, "data")
expect_equal(proj, hdf5r::readDataSet(proj_dataset))

# spot check clusters
clusters_group <- hdf5r::openGroup(f, "clusters")
cluster_group <- hdf5r::openGroup(clusters_group, "f1")
assignments <- hdf5r::openLocation(cluster_group, "assignments")
group_names <- hdf5r::openLocation(cluster_group, "group_names")
expect_equal(hdf5r::readDataSet(assignments), clusters[[1]]@.Data - 1)
expect_equal(hdf5r::readDataSet(group_names), levels(clusters[[1]]))

# spot check metadata
metadata <- hdf5r::openGroup(f, "metadata")

tool <- hdf5r::openLocation(metadata, "tool")
expect_equal(hdf5r::readDataSet(tool), "loupeR")

extra <- hdf5r::openGroup(metadata, "extra")
louper_seurat_version <- hdf5r::openLocation(extra, "loupeR_seurat_version")
val <- hdf5r::readDataSet(louper_seurat_version)
expect(!is.null(hdf5r::readDataSet(louper_seurat_version)), "extra field is missing")
})

test_that("can create hdf5 custom feature ids", {
barcode_count <- 5
count_mat <- create_count_mat(3, barcode_count)
Expand Down
23 changes: 23 additions & 0 deletions tests/testthat/test-lib.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,29 @@ test_that("can run create_loupe", {
expect(x, "create_loupe returns TRUE")
})

test_that("can run create_loupe with BPCells", {
barcode_count <- 5
count_mat <- create_count_mat_bpcells(100, barcode_count, valid_barcodes = TRUE)
proj <- create_dense_mat(barcode_count, 2)
projections <- list("p1" = proj)

cluster <- factor(
c("a", "c", "b", "a", "b"),
levels = c("a", "b", "c"),
ordered = TRUE
)
clusters <- list("f1" = cluster)

x <- create_loupe(
count_mat,
clusters = clusters,
projections = projections,
executable_path = get_executable_path()
)

expect(x, "create_loupe returns TRUE")
})

test_that("can run create_loupe with integer projection matrix", {
barcode_count <- 5
count_mat <- create_count_mat(100, barcode_count, valid_barcodes = TRUE)
Expand Down
Loading