Skip to content
117 changes: 68 additions & 49 deletions .dev/ast_fuzz_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ if (
}

contents[wrong_number_def_idx] <-
'wrong_number_fmt <- "got %d lints instead of %d%s\\nFile contents:\\n%s"'
' wrong_number_fmt <- "got %d lints instead of %d%s\\nFile contents:\\n%s"'
contents[wrong_number_use_idx] <-
gsub("\\)$", ", readChar(file, file.size(file)))", contents[wrong_number_use_idx])
writeLines(contents, expect_lint_file)
Expand All @@ -66,61 +66,81 @@ withr::defer({

suppressMessages(pkgload::load_all())

can_parse <- \(lines) !inherits(tryCatch(parse(text = lines), error = identity), "error")
get_str <- \(x) tail(unlist(strsplit(x, ": ", fixed = TRUE)), 1L)

# beware lazy eval: originally tried adding a withr::defer() in each iteration, but
# this effectively only runs the last 'defer' expression as the names are only
# evaluated at run-time. So instead keep track of all edits in this object.
# this approach to implementing 'nofuzz' feels painfully manual, but I couldn't
# figure out how else to get 'testthat' to give us what we need -- the failures
# object in the reporter is frustratingly inconsistent in whether the trace
# exists, and even if it does, we'd have to text-mangle to get the corresponding
# file names out. Also, the trace 'srcref' happens under keep.source=FALSE,
# so we lose any associated comments anyway. even that would not solve the issue
# of getting top-level exclusions done for 'nofuzz start|end' ranges, except
# maybe if it enabled us to reuse lintr's own exclude() system.
# therefore we take this approach: pass over the test suite first and comment out
# any tests/units that have been marked 'nofuzz'. restore later. one consequence
# is there's no support for fuzzer-specific exclusion, e.g. we fully disable
# the unnecessary_placeholder_linter() tests because |> and _ placeholders differ.
# these have to be enabled/disabled at runtime as it's not possible to disentagle which
# fuzzer caused the error ex-post (and it might be the interaction of >1 at issue).
# an earlier approach was like the current 'nofuzz' -- just comment out the troublesome
# tests from being run at all. But that led to a very quickly growing set of tests being
# skipped totally, which also hid some issues that are surfaced by the current approach.
# Another idea would be to just leave the enable/disable calls as code in the test suite,
# but I prefer the current approach of leaving them as comments: (1) it's more consistent
# with the 'nolint' exclusion system and (2) it doesn't distract the casual reader as much.
test_restorations <- list()
for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) {
xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE)))
# parent::* to catch top-level comments (exprlist). matches one-line nofuzz and start/end ranges.
nofuzz_lines <- xml_find_all(xml, "//COMMENT[contains(text(), 'nofuzz')]/parent::*")
if (length(nofuzz_lines) == 0L) next

test_original <- test_lines <- readLines(test_file)

for (nofuzz_line in nofuzz_lines) {
comments <- xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]")
comment_text <- xml_text(comments)
# handle start/end ranges first.
start_idx <- grep("nofuzz start", comment_text, fixed = TRUE)
end_idx <- grep("nofuzz end", comment_text, fixed = TRUE)
if (length(start_idx) != length(end_idx) || any(end_idx < start_idx)) {
stop(sprintf(
"Mismatched '# nofuzz start' (%s), '# nofuzz end' (%s) in %s",
toString(start_idx), toString(end_idx), test_file
))
test_lines <- readLines(test_file)
one_expr_idx <- grep("# nofuzz", test_lines, fixed = TRUE)
range_start_idx <- grep("^\\s*# fuzzer disable:", test_lines)
if (length(one_expr_idx) == 0L && length(range_start_idx) == 0L) next

test_original <- test_lines
pd <- getParseData(parse(test_file))

for (start_line in rev(one_expr_idx)) {
end_line <- start_line
while (end_line <= length(test_lines) && !can_parse(test_lines[start_line:end_line])) {
end_line <- end_line + 1L
}

comment_ranges <- Map(`:`,
as.integer(xml_attr(comments[start_idx], "line1")),
as.integer(xml_attr(comments[end_idx], "line1"))
)
for (comment_range in comment_ranges) {
test_lines[comment_range] <- paste("#", test_lines[comment_range])
if (end_line > length(test_lines)) {
stop("Unable to parse any expression starting from line ", start_line)
}
comment_txt <- subset(pd, line1 == start_line & token == "COMMENT", select = "text", drop = TRUE)
# blanket disable means the test cannot be run. this happens e.g. for tests of encoding
# that are too complicated to deal with in this GHA.
if (comment_txt == "# nofuzz") {
test_lines[start_line:end_line] <- ""
} else {
deactivated <- get_str(comment_txt)
test_lines <- c(
head(test_lines, start_line - 1L),
sprintf("deactivate_fuzzers('%s')", deactivated),
test_lines[start_line:end_line],
sprintf("activate_fuzzers('%s')", deactivated),
tail(test_lines, -end_line)
)
}
}

if (length(start_idx) > 0L && !any(!start_idx & !end_idx)) next
if (length(one_expr_idx)) {
writeLines(test_lines, test_file)
pd <- getParseData(parse(test_file))
range_start_idx <- grep("^\\s*# fuzzer disable:", test_lines)
}

# NB: one-line tests line expect_lint(...) # nofuzz are not supported,
# since the comment will attach to the parent test_that() & thus comment
# out the whole unit. Easiest solution is just to spread out those few tests for now.
comment_range <- as.integer(xml_attr(nofuzz_line, "line1")):as.integer(xml_attr(nofuzz_line, "line2"))
test_lines[comment_range] <- paste("#", test_lines[comment_range])
range_end_idx <- grep("^\\s*# fuzzer enable:", test_lines)

if (length(range_start_idx) != length(range_end_idx) || any(range_end_idx < range_start_idx)) {
stop(sprintf(
"Mismatched '# fuzzer disable' (%s), '# fuzzer enable' (%s) in %s",
toString(range_start_idx), toString(range_end_idx), test_file
))
}

for (ii in seq_along(range_start_idx)) {
start_line <- test_lines[range_start_idx[ii]]
test_lines[range_start_idx[ii]] <-
gsub("#.*", sprintf("deactivate_fuzzers('%s')", get_str(start_line)), start_line)
end_line <- test_lines[range_end_idx[ii]]
test_lines[range_end_idx[ii]] <-
gsub("#.*", sprintf("activate_fuzzers('%s')", get_str(end_line)), end_line)
}

writeLines(test_lines, test_file)
if (length(range_start_idx)) writeLines(test_lines, test_file)

test_restorations <- c(test_restorations, list(list(file = test_file, lines = test_original)))
}
withr::defer(for (restoration in test_restorations) writeLines(restoration$lines, restoration$file))
Expand All @@ -134,8 +154,7 @@ all_classes <- unlist(lapply(
reporter$get_results(),
\(test) lapply(test$results, \(x) class(x)[1L])
))
cat("Summary of test statuses:\n")
print(table(all_classes))
print(table(`Summary of test statuses:` = all_classes))

# ignore any test that failed for expected reasons, e.g. some known lint metadata changes
# about line numbers or the contents of the line. this saves us having to pepper tons of
Expand All @@ -160,7 +179,7 @@ if (length(invalid_failures) > 0L) {
\(x) sprintf("%s:%s", x$file, x$test),
character(1L)
)
cat("Some fuzzed tests failed unexpectedly!\n")
cat(sprintf("%d fuzzed tests failed unexpectedly!\n", length(invalid_failures)))
print(invalid_failures)
stop("Use # nofuzz [start|end] to mark false positives or fix any bugs.")
stop("Fix any bugs, or use '# nofuzz'/'# fuzzer [dis|en]able' to mark false positives.")
}
46 changes: 38 additions & 8 deletions .dev/maybe_fuzz_content.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,7 @@ maybe_fuzz_content <- function(file, lines) {
file.copy(file, new_file, copy.mode = FALSE)
}

apply_fuzzers(new_file, fuzzers = list(
function_lambda_fuzzer,
pipe_fuzzer,
dollar_at_fuzzer,
comment_injection_fuzzer,
assignment_fuzzer
))
apply_fuzzers(new_file, fuzzers = .fuzzers$active)

new_file
}
Expand Down Expand Up @@ -106,7 +100,7 @@ apply_fuzzers <- function(f, fuzzers) {
return(invisible())
}

unedited <- lines <- readLines(f)
unedited <- lines <- readLines(f, warn = FALSE)
for (fuzzer in fuzzers) {
updated_lines <- fuzzer(pd, lines)
if (is.null(updated_lines)) next # skip some I/O if we can
Expand All @@ -122,3 +116,39 @@ apply_fuzzers <- function(f, fuzzers) {

invisible()
}

.fuzzers <- new.env()
.fuzzers$active <- list(
assignment = assignment_fuzzer,
comment_injection = comment_injection_fuzzer,
dollar_at = dollar_at_fuzzer,
function_lambda = function_lambda_fuzzer,
pipe = pipe_fuzzer
)
.fuzzers$inactive <- list()

deactivate_fuzzers <- function(names_str) {
req <- unlist(strsplit(names_str, " ", fixed = TRUE))
if (!all(req %in% names(.fuzzers$active))) {
stop(sprintf(
"Invalid attempt to deactivate fuzzers: '%s'\n Currently active fuzzers: %s\n Currently inactive fuzzers: %s",
names_str, toString(names(.fuzzers$active)), toString(names(.fuzzers$inactive))
))
}
.fuzzers$inactive[req] <- .fuzzers$active[req]
.fuzzers$active[req] <- NULL
invisible()
}

activate_fuzzers <- function(names_str) {
req <- unlist(strsplit(names_str, " ", fixed = TRUE))
if (!all(req %in% names(.fuzzers$inactive))) {
stop(sprintf(
"Invalid attempt to activate fuzzers: '%s'\n Currently active fuzzers: %s\n Currently inactive fuzzers: %s",
names_str, toString(names(.fuzzers$active)), toString(names(.fuzzers$inactive))
))
}
.fuzzers$active[req] <- .fuzzers$inactive[req]
.fuzzers$inactive[req] <- NULL
invisible()
}
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
* `boolean_arithmetic_linter()` finds many more cases like `sum(x | y) == 0` where the total of a known-logical vector is compared to 0 (#1580, @MichaelChirico).
* New argument `include_s4_slots` for the `xml_find_function_calls()` entry in the `get_source_expressions()` to govern whether calls of the form `s4Obj@fun()` are included in the result (#2820, @MichaelChirico).
* General handling of logic around where comments can appear in code has been improved (#2822, @MichaelChirico). In many cases, this is a tiny robustness fix for weird edge cases unlikely to be found in practice, but in others, this improves practical linter precision (reduced false positives and/or false negatives). The affected linters (with annotations for changes noteworthy enough to have gotten a dedicated bug) are:
+ `any_duplicated_linter()`
+ `brace_linter()`
+ `coalesce_linter()`
+ `comparison_negation_linter()` #2826
Expand All @@ -42,6 +43,8 @@
+ `if_switch_linter()`
+ `ifelse_censor_linter()` #2826
+ `implicit_assignment_linter()`
+ `is_numeric_linter()`
+ `keyword_quote_linter()`
+ `length_test_linter()`
+ `literal_coercion_linter()` #2824
+ `matrix_apply_linter()` #2825
Expand Down
69 changes: 36 additions & 33 deletions R/any_duplicated_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,50 +52,53 @@ any_duplicated_linter <- function() {
# the final parent::expr/expr gets us to the expr on the other side of EQ;
# this lets us match on either side of EQ, where following-sibling
# assumes we are before EQ, preceding-sibling assumes we are after EQ.
length_unique_xpath_parts <- glue("
//{ c('EQ', 'NE', 'GT', 'LT') }
/parent::expr
/expr[
expr[1][SYMBOL_FUNCTION_CALL[text() = 'length']]
and expr[expr[1][
SYMBOL_FUNCTION_CALL[text() = 'unique']
and (
following-sibling::expr =
parent::expr
/parent::expr
/parent::expr
/expr
/expr[1][SYMBOL_FUNCTION_CALL[text()= 'length']]
/following-sibling::expr
or
following-sibling::expr[OP-DOLLAR or LBB]/expr[1] =
parent::expr
/parent::expr
/parent::expr
/expr
/expr[1][SYMBOL_FUNCTION_CALL[text()= 'nrow']]
/following-sibling::expr
)
]]
]
")
length_unique_xpath <- paste(length_unique_xpath_parts, collapse = " | ")
length_comparison_xpath <- "
parent::expr
/parent::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'length']]
/parent::expr[EQ or NE or GT or LT]
"
length_unique_xpath <- "
expr/expr/expr[1][
SYMBOL_FUNCTION_CALL[text() = 'unique']
and (
following-sibling::expr =
parent::expr
/parent::expr
/parent::expr
/expr
/expr[1][SYMBOL_FUNCTION_CALL[text() = 'length']]
/following-sibling::expr
or
following-sibling::expr[OP-DOLLAR or LBB]/expr[1] =
parent::expr
/parent::expr
/parent::expr
/expr
/expr[1][SYMBOL_FUNCTION_CALL[text() = 'nrow']]
/following-sibling::expr
)
]"

uses_nrow_xpath <- "./parent::expr/expr/expr[1]/SYMBOL_FUNCTION_CALL[text() = 'nrow']"
uses_nrow_xpath <- "./expr/expr[1]/SYMBOL_FUNCTION_CALL[text() = 'nrow']"

Linter(linter_level = "expression", function(source_expression) {
xml <- source_expression$xml_parsed_content
xml_calls <- source_expression$xml_find_function_calls("any")
any_calls <- source_expression$xml_find_function_calls("any")
unique_calls <- source_expression$xml_find_function_calls("unique")

any_duplicated_expr <- xml_find_all(xml_calls, any_duplicated_xpath)
any_duplicated_expr <- xml_find_all(any_calls, any_duplicated_xpath)
any_duplicated_lints <- xml_nodes_to_lints(
any_duplicated_expr,
source_expression = source_expression,
lint_message = "anyDuplicated(x, ...) > 0 is better than any(duplicated(x), ...).",
type = "warning"
)

length_unique_expr <- xml_find_all(xml, length_unique_xpath)
in_length_comparison <- !is.na(xml_find_first(unique_calls, length_comparison_xpath))
unique_calls <- strip_comments_from_subtree(
xml_parent(xml_parent(xml_parent(unique_calls[in_length_comparison])))
)
is_length_unique <- !is.na(xml_find_first(unique_calls, length_unique_xpath))
length_unique_expr <- unique_calls[is_length_unique]
lint_message <- ifelse(
is.na(xml_find_first(length_unique_expr, uses_nrow_xpath)),
"anyDuplicated(x) == 0L is better than length(unique(x)) == length(x).",
Expand Down
18 changes: 8 additions & 10 deletions R/is_numeric_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,12 @@ is_numeric_linter <- function() {

# testing things like is.numeric(x) || is.integer(x)
or_xpath <- glue("
//OR2
/parent::expr[
expr/{is_numeric_expr}
and expr/{is_integer_expr}
and
expr/{is_numeric_expr}/following-sibling::expr[1]
= expr/{is_integer_expr}/following-sibling::expr[1]
]
//OR2/parent::expr[expr/{is_numeric_expr} and expr/{is_integer_expr}]
")
node_match_xpath <- glue("self::*[
expr/{is_numeric_expr}/following-sibling::expr[1]
= expr/{is_integer_expr}/following-sibling::expr[1]
]")

# testing class(x) %in% c("numeric", "integer")
class_xpath <- "
Expand All @@ -69,9 +66,10 @@ is_numeric_linter <- function() {
Linter(linter_level = "expression", function(source_expression) {
xml <- source_expression$xml_parsed_content

or_expr <- xml_find_all(xml, or_xpath)
or_expr <- strip_comments_from_subtree(xml_find_all(xml, or_xpath))
expr_match <- !is.na(xml_find_first(or_expr, node_match_xpath))
or_lints <- xml_nodes_to_lints(
or_expr,
or_expr[expr_match],
source_expression = source_expression,
lint_message = paste(
"Use `is.numeric(x)` instead of the equivalent `is.numeric(x) || is.integer(x)`.",
Expand Down
2 changes: 1 addition & 1 deletion R/keyword_quote_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ keyword_quote_linter <- function() {
)

extraction_expr <- extraction_expr[invalid_extraction_quoting]
extractor <- xml_find_chr(extraction_expr, "string(preceding-sibling::*[1])")
extractor <- xml_find_chr(extraction_expr, "string(preceding-sibling::*[not(self::COMMENT)][1])")
gen_extractor <- ifelse(extractor == "$", "[[", "slot()")

extraction_lints <- xml_nodes_to_lints(
Expand Down
Loading