diff --git a/R/QueryNamespace.R b/R/QueryNamespace.R index aa6b1ab..989d945 100644 --- a/R/QueryNamespace.R +++ b/R/QueryNamespace.R @@ -14,6 +14,52 @@ # See the License for the specific language governing permissions and # limitations under the License. + +.typeCheckVariable <- function(varName, value, typeStr, isArray) { + # Map type strings to checking functions + typeChecks <- list( + INT = function(x) is.numeric(x) && all(x == as.integer(x)), + BIGINT = function(x) is.numeric(x) && all(x %% 1 == 0), + CHAR = function(x) is.character(x), + VARCHAR = function(x) is.character(x), + TEXT = function(x) is.character(x), + NUMERIC = function(x) is.numeric(x), + BOOLEAN = function (x) is.logical(x) || (is.numeric(x) && all(x %in% c(0, 1))) + ) + + # Check type exists + baseType <- gsub("\\[\\]", "", typeStr) + if (!baseType %in% names(typeChecks)) { + stop(sprintf("Unknown type check: %s", typeStr)) + } + checkFun <- typeChecks[[baseType]] + + # Check presence + if (is.null(value)) { + stop(sprintf("Required variable '%s' (type check %s) not supplied to render().", varName, typeStr)) + } + # Check NA + if (any(is.na(value))) { + stop(sprintf("Variable '%s' (type check %s) contains NA values, which are not allowed.", varName, typeStr)) + } + + # Array vs scalar + if (isArray) { + if (!checkFun(value)) { + stop(sprintf("Variable '%s' must be an array of type %s.", varName, baseType)) + } + # Must be vector of length >= 1 + if (!(is.vector(value) && length(value) >= 1)) { + stop(sprintf("Variable '%s' must be a non-empty vector of type %s.", varName, baseType)) + } + } else { + if (!checkFun(value) || length(value) != 1) { + stop(sprintf("Variable '%s' must be a scalar of type %s.", varName, baseType)) + } + } +} + + #' QueryNamespace #' @export #' @description @@ -174,13 +220,32 @@ QueryNamespace <- R6::R6Class( #' @param ... additional variables to be passed to SqlRender::render - will overwrite anything in namespace render = function(sql, ...) { params <- private$replacementVars$as_list() - addVars <- list(...) - for (k in names(addVars)) { params[[k]] <- addVars[[k]] } + # Regex for {TYPEC TYPE[@] @var_name} + typecPattern <- "\\{TYPEC\\s+([A-Z]+(\\[\\])?)\\s+@([a-zA-Z0-9_]+)\\}" + typecMatches <- gregexpr(typecPattern, sql, perl = TRUE) + matchPositions <- regmatches(sql, typecMatches)[[1]] + + if (length(matchPositions) > 0) { + for (matchStr in matchPositions) { + m <- regexec(typecPattern, matchStr, perl = TRUE) + parts <- regmatches(matchStr, m)[[1]] + if (length(parts) >= 4) { + typeStr <- parts[2] # e.g. INT, INT[], CHAR, CHAR[], NUMERIC, NUMERIC[] + isArray <- grepl("\\[\\]", typeStr) + varName <- parts[4] # <-- Should be parts[4], not parts[3] + value <- params[[varName]] + .typeCheckVariable(varName, value, typeStr, isArray) + } + } + # Remove all {TYPEC ...} lines from SQL + sql <- gsub(typecPattern, "", sql, perl = TRUE) + } + params$sql <- sql params$warnOnMissingParameters <- FALSE do.call(SqlRender::render, params) diff --git a/man/ResultModelManager-package.Rd b/man/ResultModelManager-package.Rd index b132006..e8c65b5 100644 --- a/man/ResultModelManager-package.Rd +++ b/man/ResultModelManager-package.Rd @@ -12,6 +12,7 @@ Database data model management utilities for R packages in the Observational Hea Useful links: \itemize{ \item \url{https://github.com/OHDSI/ResultModelManager} + \item \url{https://ohdsi.github.io/ResultModelManager/} \item Report bugs at \url{https://github.com/OHDSI/ResultModelManager/issues} } diff --git a/tests/testthat/test-QueryNamespace.R b/tests/testthat/test-QueryNamespace.R index 8341e0c..0b9ecac 100644 --- a/tests/testthat/test-QueryNamespace.R +++ b/tests/testthat/test-QueryNamespace.R @@ -128,3 +128,38 @@ test_that("create helper function works", { ) ) }) + + +test_that("render enforces type checks in SQL", { + cohortNamespace <- QueryNamespace$new( + connectionHandler = connectionHandler, + tableSpecification = tableSpecification, + result_schema = "main", + tablePrefix = "cd_" + ) + on.exit({ + cohortNamespace$closeConnection() + }, add = TRUE) + + # INT scalar OK + sql <- "{TYPEC INT @cohort_id} SELECT * FROM @result_schema.@cohort WHERE cohort_definition_id = @cohort_id" + expect_error(cohortNamespace$render(sql, cohort_id = 1.5), "scalar of type INT") + expect_error(cohortNamespace$render(sql, cohort_id = NA), "contains NA") + expect_error(cohortNamespace$render(sql), "not supplied") + + # BIGINT array OK + sql <- "{TYPEC BIGINT[] @ids} SELECT * FROM @result_schema.@cohort WHERE cohort_definition_id IN (@ids)" + expect_silent(cohortNamespace$render(sql, ids = c(1, 2, 3))) + expect_error(cohortNamespace$render(sql, ids = c(1, 2.5)), "array of type BIGINT") + + # CHAR scalar OK + sql <- "{TYPEC CHAR @cohort_name} SELECT * FROM @result_schema.@cohort WHERE cohort_name = @cohort_name" + expect_silent(cohortNamespace$render(sql, cohort_name = "test")) + expect_error(cohortNamespace$render(sql, cohort_name = c("a", "b")), "scalar of type CHAR") + expect_error(cohortNamespace$render(sql, cohort_name = NA), "contains NA") + + # CHAR array OK + sql <- "{TYPEC CHAR[] @names} SELECT * FROM @result_schema.@cohort WHERE cohort_name IN (@names)" + expect_silent(cohortNamespace$render(sql, names = c("a", "b"))) + expect_error(cohortNamespace$render(sql, names = c("a", NA)), "contains NA") +}) diff --git a/tests/testthat/test-TypeChecker.R b/tests/testthat/test-TypeChecker.R new file mode 100644 index 0000000..5f65054 --- /dev/null +++ b/tests/testthat/test-TypeChecker.R @@ -0,0 +1,87 @@ +test_that(".typeCheckVariable INT checks", { + expect_silent(.typeCheckVariable("x", 1L, "INT", FALSE)) + expect_silent(.typeCheckVariable("x", 1, "INT", FALSE)) + expect_error(.typeCheckVariable("x", 1.5, "INT", FALSE), "scalar of type INT") + expect_error(.typeCheckVariable("x", NA, "INT", FALSE), "contains NA") + expect_error(.typeCheckVariable("x", NULL, "INT", FALSE), "not supplied") + expect_error(.typeCheckVariable("x", c(1L, 2L), "INT", FALSE), "scalar of type INT") + expect_silent(.typeCheckVariable("x", c(1L, 2L), "INT[]", TRUE)) + expect_error(.typeCheckVariable("x", c(1.5, 2L), "INT[]", TRUE), "array of type INT") + expect_error(.typeCheckVariable("x", numeric(0), "INT[]", TRUE), "non-empty vector") +}) + +test_that(".typeCheckVariable BIGINT checks", { + expect_silent(.typeCheckVariable("x", 123456789012345, "BIGINT", FALSE)) + expect_silent(.typeCheckVariable("x", 1e11, "BIGINT", FALSE)) + expect_silent(.typeCheckVariable("x", 2, "BIGINT", FALSE)) + expect_error(.typeCheckVariable("x", 1.5, "BIGINT", FALSE), "scalar of type BIGINT") + expect_error(.typeCheckVariable("x", NA, "BIGINT", FALSE), "contains NA") + expect_error(.typeCheckVariable("x", NULL, "BIGINT", FALSE), "not supplied") + expect_error(.typeCheckVariable("x", c(1, 2), "BIGINT", FALSE), "scalar of type BIGINT") + expect_silent(.typeCheckVariable("x", c(1, 2, 3), "BIGINT[]", TRUE)) + expect_error(.typeCheckVariable("x", c(1, 2.5), "BIGINT[]", TRUE), "array of type BIGINT") + expect_error(.typeCheckVariable("x", numeric(0), "BIGINT[]", TRUE), "non-empty vector") +}) + +test_that(".typeCheckVariable CHAR checks", { + expect_silent(.typeCheckVariable("x", "a", "CHAR", FALSE)) + expect_error(.typeCheckVariable("x", 1, "CHAR", FALSE), "scalar of type CHAR") + expect_error(.typeCheckVariable("x", NA_character_, "CHAR", FALSE), "contains NA") + expect_error(.typeCheckVariable("x", NULL, "CHAR", FALSE), "not supplied") + expect_error(.typeCheckVariable("x", c("a", "b"), "CHAR", FALSE), "scalar of type CHAR") + expect_silent(.typeCheckVariable("x", c("a", "b"), "CHAR[]", TRUE)) + expect_error(.typeCheckVariable("x", c("a", NA_character_), "CHAR[]", TRUE), "contains NA") + expect_error(.typeCheckVariable("x", character(0), "CHAR[]", TRUE), "non-empty vector") +}) + +test_that(".typeCheckVariable VARCHAR checks", { + expect_silent(.typeCheckVariable("x", "hello", "VARCHAR", FALSE)) + expect_error(.typeCheckVariable("x", 1, "VARCHAR", FALSE), "scalar of type VARCHAR") + expect_error(.typeCheckVariable("x", NA_character_, "VARCHAR", FALSE), "contains NA") + expect_error(.typeCheckVariable("x", NULL, "VARCHAR", FALSE), "not supplied") + expect_error(.typeCheckVariable("x", c("a", "b"), "VARCHAR", FALSE), "scalar of type VARCHAR") + expect_silent(.typeCheckVariable("x", c("a", "b"), "VARCHAR[]", TRUE)) + expect_error(.typeCheckVariable("x", c("a", NA_character_), "VARCHAR[]", TRUE), "contains NA") + expect_error(.typeCheckVariable("x", character(0), "VARCHAR[]", TRUE), "non-empty vector") +}) + +test_that(".typeCheckVariable TEXT checks", { + expect_silent(.typeCheckVariable("x", "some text", "TEXT", FALSE)) + expect_error(.typeCheckVariable("x", 1, "TEXT", FALSE), "scalar of type TEXT") + expect_error(.typeCheckVariable("x", NA_character_, "TEXT", FALSE), "contains NA") + expect_error(.typeCheckVariable("x", NULL, "TEXT", FALSE), "not supplied") + expect_error(.typeCheckVariable("x", c("a", "b"), "TEXT", FALSE), "scalar of type TEXT") + expect_silent(.typeCheckVariable("x", c("a", "b"), "TEXT[]", TRUE)) + expect_error(.typeCheckVariable("x", c("a", NA_character_), "TEXT[]", TRUE), "contains NA") + expect_error(.typeCheckVariable("x", character(0), "TEXT[]", TRUE), "non-empty vector") +}) + +test_that(".typeCheckVariable NUMERIC checks", { + expect_silent(.typeCheckVariable("x", 3.14, "NUMERIC", FALSE)) + expect_silent(.typeCheckVariable("x", 1L, "NUMERIC", FALSE)) + expect_error(.typeCheckVariable("x", "a", "NUMERIC", FALSE), "scalar of type NUMERIC") + expect_error(.typeCheckVariable("x", NA, "NUMERIC", FALSE), "contains NA") + expect_error(.typeCheckVariable("x", NULL, "NUMERIC", FALSE), "not supplied") + expect_error(.typeCheckVariable("x", c(1, 2), "NUMERIC", FALSE), "scalar of type NUMERIC") + expect_silent(.typeCheckVariable("x", c(1.1, 2.2, 3.3), "NUMERIC[]", TRUE)) + expect_error(.typeCheckVariable("x", c(1.1, NA), "NUMERIC[]", TRUE), "contains NA") + expect_error(.typeCheckVariable("x", numeric(0), "NUMERIC[]", TRUE), "non-empty vector") +}) + +test_that(".typeCheckVariable BOOLEAN checks", { + expect_silent(.typeCheckVariable("x", TRUE, "BOOLEAN", FALSE)) + expect_silent(.typeCheckVariable("x", FALSE, "BOOLEAN", FALSE)) + expect_silent(.typeCheckVariable("x", 1, "BOOLEAN", FALSE)) + expect_silent(.typeCheckVariable("x", 0, "BOOLEAN", FALSE)) + expect_error(.typeCheckVariable("x", NA, "BOOLEAN", FALSE), "contains NA") + expect_error(.typeCheckVariable("x", NULL, "BOOLEAN", FALSE), "not supplied") + expect_error(.typeCheckVariable("x", c(TRUE, FALSE), "BOOLEAN", FALSE), "scalar of type BOOLEAN") + expect_silent(.typeCheckVariable("x", c(TRUE, FALSE), "BOOLEAN[]", TRUE)) + expect_silent(.typeCheckVariable("x", c(TRUE, 1, 0), "BOOLEAN[]", TRUE)) + expect_error(.typeCheckVariable("x", c(TRUE, NA), "BOOLEAN[]", TRUE), "contains NA") + expect_error(.typeCheckVariable("x", logical(0), "BOOLEAN[]", TRUE), "non-empty vector") +}) + +test_that(".typeCheckVariable unknown type", { + expect_error(.typeCheckVariable("x", 1, "UNKNOWN", FALSE), "Unknown type check") +}) \ No newline at end of file