diff --git a/R/td_create_metadata_file.R b/R/td_create_metadata_file.R index 4ef95bf..e75f6d0 100644 --- a/R/td_create_metadata_file.R +++ b/R/td_create_metadata_file.R @@ -61,9 +61,10 @@ td_create_metadata_file <- function( recursive = TRUE ) + metadata <- read_yaml_template() + if (format == "yaml") { - metadata <- metadata_as_yaml() |> - gsub("\\.dataset_key", name, x = _) + metadata <- gsub("\\.dataset_key", name, x = metadata) cat(metadata, file = filename, append = FALSE) } else { @@ -87,105 +88,3 @@ td_create_metadata_file <- function( invisible(NULL) } - - -#' Create default metadata as a YAML -#' -#' @noRd - -metadata_as_yaml <- function() { - glue::glue( - " - status: draft # One of 'draft', 'incomplete' or 'complete' - dataset: - id: .dataset_id # Dataset identifier - title: .dataset_title # Dataset title - description: .description # Short description - license: .license # Dataset license - bibtex: .filename # Dataset citation - doi: .doi # DOI of the dataset description (paper) - url: .url # URL of the dataset description (paper) - taxon: .taxon # Taxonomic group (mammals, birds, etc.) - taxonomic_level: species # Taxonomic resolution (species, genus, etc.) - type: static # One of 'static' or 'api' - file_url: .url # Full URL to download the static file - file_name: .filename # Name of the static file - file_extension: .ext # File extension of the static file - manual_download: no # One of 'yes' or 'no' - sheet: .number # Sheet number for xslx dataset - long_format: no # One of 'yes' or 'no' (traits in columns) - skip_rows: .na # Number of header rows to remove - col_separator: ',' # Character used to separate columns - na_value: .na # Character used for missing values - taxonomy: - genus: .na # Column name of the genus - species: .na # Column name of the species - binomial: .column # Column name of the binomial name - comment: .na - traits: - - variable: .col_name_1 # Column name of the trait - name: .trait_name # Full name of the trait - category: .na # Category of the trait - type: quantitative # One of 'quantitative' or 'categoric' - units: .unit # Original unit - - variable: .col_name_2 # Column name of the trait - name: .trait_name # Full name of the trait - category: .na # Category of the trait - type: categorical # One of 'quantitative' or 'categorical' - units: .na # Original unit - levels: - - value: .value # Value 1 for categorical trait - description: .descr # Description of the category - - value: .value # Value 2 for categorical trait - description: .descr # Description of the category - " - ) -} - - -#' Convert YAML to list of data.frame -#' -#' @noRd - -metadata_as_df <- function() { - metadata <- metadata_as_yaml() |> - yaml::read_yaml(text = _) - - sheets <- list() - - sheets[["status"]] <- data.frame("status" = "draft") - - sheets[["dataset"]] <- data.frame( - "key" = names(unlist(metadata$"dataset")), # to handle sublevel taxonomy - "value" = unlist(metadata$"dataset") - ) - - rownames(sheets[["dataset"]]) <- NULL - # replace '.' by '_' for spelling harmonization - sheets[["dataset"]]$key <- gsub("\\.", "_", sheets[["dataset"]]$key) - - sheets[["traits"]] <- as.data.frame(metadata$"traits"[[1]]) - sheets[["traits"]] <- rbind(sheets[["traits"]], sheets[["traits"]]) - - trait_q <- as.data.frame(metadata$"traits"[[1]]) - trait_q <- data.frame( - trait_q, - "levels_value" = NA, - "levels_description" = NA - ) - - trait_c <- as.data.frame(metadata$"traits"[[2]]) - trait_c <- trait_c[, -grep("^levels", colnames(trait_c))] - - trait_c <- data.frame( - trait_c, - "levels_value" = ".value", - "levels_description" = ".descr" - ) - - trait_c <- rbind(trait_c, trait_c) - - sheets[["traits"]] <- rbind(trait_q, trait_c) - - sheets -} diff --git a/R/utils_checks.R b/R/utils_checks.R index 88663f9..882e215 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -239,41 +239,3 @@ check_named_list <- function(x) { invisible(NULL) } - - -# #' Check if a YAML file (map metadata) exists -# #' -# #' @noRd - -# check_yaml_file <- function(path) { -# if (!file.exists(path)) { -# stop( -# "The file '", -# path, -# "' doesn't exist", -# call. = FALSE -# ) -# } - -# invisible(NULL) -# } - -#' Check if a key exists in a YAML file (named list) -#' -#' @noRd - -check_key_in_yaml <- function(metadata, key) { - check_named_list(metadata) - check_character_arg(key) - - if (!(key %in% names(metadata))) { - stop( - "No key '", - key, - "' found in the YAML file", - call. = FALSE - ) - } - - invisible(NULL) -} diff --git a/R/utils_yaml.R b/R/utils_yaml.R new file mode 100644 index 0000000..beed946 --- /dev/null +++ b/R/utils_yaml.R @@ -0,0 +1,97 @@ +# #' Check if a YAML file (map metadata) exists +# #' +# #' @noRd + +# check_yaml_file <- function(path) { +# if (!file.exists(path)) { +# stop( +# "The file '", +# path, +# "' doesn't exist", +# call. = FALSE +# ) +# } + +# invisible(NULL) +# } + +#' Check if a key exists in a YAML file (named list) +#' +#' @noRd + +check_key_in_yaml <- function(metadata, key) { + check_named_list(metadata) + check_character_arg(key) + + if (!(key %in% names(metadata))) { + stop( + "No key '", + key, + "' found in the YAML file", + call. = FALSE + ) + } + + invisible(NULL) +} + + +#' Import metadata template +#' +#' @noRd + +read_yaml_template <- function() { + yaml::read_yaml( + file = system.file( + file.path("templates", "metadata_template.yml"), + package = "traitdatabases" + ) + ) +} + + +#' Convert YAML to list of data.frame +#' +#' @noRd + +metadata_as_df <- function() { + metadata <- read_yaml_template() + + sheets <- list() + + sheets[["status"]] <- data.frame("status" = "draft") + + sheets[["dataset"]] <- data.frame( + "key" = names(unlist(metadata$"dataset")), # to handle sublevel taxonomy + "value" = unlist(metadata$"dataset") + ) + + rownames(sheets[["dataset"]]) <- NULL + # replace '.' by '_' for spelling harmonization + sheets[["dataset"]]$key <- gsub("\\.", "_", sheets[["dataset"]]$key) + + sheets[["traits"]] <- as.data.frame(metadata$"traits"[[1]]) + sheets[["traits"]] <- rbind(sheets[["traits"]], sheets[["traits"]]) + + trait_q <- as.data.frame(metadata$"traits"[[1]]) + trait_q <- data.frame( + trait_q, + "levels_value" = NA, + "levels_description" = NA + ) + + trait_c <- as.data.frame(metadata$"traits"[[2]]) + trait_c <- trait_c[, -grep("^levels", colnames(trait_c))] + + trait_c <- data.frame( + trait_c, + "levels_value" = ".value", + "levels_description" = ".descr" + ) + + trait_c <- rbind(trait_c, trait_c) + + sheets[["traits"]] <- rbind(trait_q, trait_c) + + sheets +} diff --git a/inst/templates/metadata_template.yml b/inst/templates/metadata_template.yml new file mode 100644 index 0000000..f8708c6 --- /dev/null +++ b/inst/templates/metadata_template.yml @@ -0,0 +1,42 @@ +status: draft # One of 'draft', 'incomplete' or 'complete' +dataset: + id: .dataset_id # Dataset identifier + title: .dataset_title # Dataset title + description: .description # Short description + license: .license # Dataset license + bibtex: .filename # Dataset citation + doi: .doi # DOI of the dataset description (paper) + url: .url # URL of the dataset description (paper) + taxon: .taxon # Taxonomic group (mammals, birds, etc.) + taxonomic_level: species # Taxonomic resolution (species, genus, etc.) + type: static # One of 'static' or 'api' + file_url: .url # Full URL to download the static file + file_name: .filename # Name of the static file + file_extension: .ext # File extension of the static file + manual_download: no # One of 'yes' or 'no' + sheet: .number # Sheet number for xslx dataset + long_format: no # One of 'yes' or 'no' (traits in columns) + skip_rows: .na # Number of header rows to remove + col_separator: ',' # Character used to separate columns + na_value: .na # Character used for missing values + taxonomy: + genus: .na # Column name of the genus + species: .na # Column name of the species + binomial: .column # Column name of the binomial name + comment: .na +traits: +- variable: .col_name_1 # Column name of the trait + name: .trait_name # Full name of the trait + category: .na # Category of the trait + type: quantitative # One of 'quantitative' or 'categoric' + units: .unit # Original unit +- variable: .col_name_2 # Column name of the trait + name: .trait_name # Full name of the trait + category: .na # Category of the trait + type: categorical # One of 'quantitative' or 'categorical' + units: .na # Original unit + levels: + - value: .value # Value 1 for categorical trait + description: .descr # Description of the category + - value: .value # Value 2 for categorical trait + description: .descr # Description of the category diff --git a/tests/testthat/test-read_yaml_template.R b/tests/testthat/test-read_yaml_template.R new file mode 100644 index 0000000..a575c51 --- /dev/null +++ b/tests/testthat/test-read_yaml_template.R @@ -0,0 +1,15 @@ +## read_yaml_template() ---- + +test_that("read_yaml_template() succeeds", { + metadata <- read_yaml_template() + + expect_true(is.list(metadata)) + + expect_true("status" %in% names(metadata)) + expect_true("traits" %in% names(metadata)) + expect_true("dataset" %in% names(metadata)) + expect_true("taxonomy" %in% names(metadata$"dataset")) + expect_true("binomial" %in% names(metadata$"dataset"$"taxonomy")) + + expect_true(metadata$"dataset"$"id" == ".dataset_id") +})