Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,11 @@ export(hashupo)
export(hashupo.cache_integer64)
export(identical.integer64)
export(integer64)
export(intersect)
export(is.double)
export(is.double.default)
export(is.double.integer64)
export(is.element)
export(is.finite.integer64)
export(is.infinite.integer64)
export(is.integer64)
Expand Down Expand Up @@ -344,6 +346,8 @@ export(runif64)
export(scale.integer64)
export(seq.integer64)
export(setcache)
export(setdiff)
export(setequal)
export(shellorder.integer64)
export(shellsort.integer64)
export(shellsortorder.integer64)
Expand Down Expand Up @@ -382,6 +386,7 @@ export(summary.integer64)
export(table.integer64)
export(tiepos)
export(tiepos.integer64)
export(union)
export(unipos)
export(unipos.integer64)
export(unique.integer64)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
- Ignores leading/trailing whitespace (as does `as.integer()`; #232).
1. `sortcache`, `sortordercache` and `ordercache` get a new argument `na.last`.
1. `matrix`, `array`, `%*%` and `as.matrix` get an `integer64` method (#45). Thanks @hcirellu.
1. `union`, `setdiff`, `intersect`, `setequal` and `is.element` get an overload to work correctly with `integer64` (#182).

## BUG FIXES

Expand Down
121 changes: 118 additions & 3 deletions R/integer64.R
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,7 @@ seq.integer64 = function(from=NULL, to=NULL, by=NULL, length.out=NULL, along.wit
target_class_for_Ops = function(e1, e2) {
if(missing(e2)) {
if (!is.numeric(unclass(e1)) && !is.logical(e1) && !is.complex(e1))
stop(errorCondition(gettext("non-numeric argument to mathematical function", domain = "R"), call=sys.call(sys.nframe() - 1L)))
stop(errorCondition(gettext("non-numeric argument to mathematical function", domain="R"), call=sys.call(sys.nframe() - 1L)))

if (is.complex(e1)) {
"complex"
Expand All @@ -1201,9 +1201,9 @@ target_class_for_Ops = function(e1, e2) {
}
} else {
if (!is.numeric(unclass(e1)) && !is.logical(e1) && !is.complex(e1))
stop(errorCondition(gettext("non-numeric argument to binary operator", domain = "R"), call=sys.call(sys.nframe() - 1L)))
stop(errorCondition(gettext("non-numeric argument to binary operator", domain="R"), call=sys.call(sys.nframe() - 1L)))
if (!is.numeric(unclass(e2)) && !is.logical(e2) && !is.complex(e2))
stop(errorCondition(gettext("non-numeric argument to binary operator", domain = "R"), call=sys.call(sys.nframe() - 1L)))
stop(errorCondition(gettext("non-numeric argument to binary operator", domain="R"), call=sys.call(sys.nframe() - 1L)))

if (is.complex(e1) || is.complex(e2)) {
"complex"
Expand Down Expand Up @@ -1912,3 +1912,118 @@ as.list.integer64 = function(x, ...) {
anyNA.integer64 = function(x, recursive) {
.Call(C_r_ram_integer64_any_na, x=x)
}


#' @title Set Operations
#' @description Performs set union, intersection, (asymmetric!) difference, equality and membership on two vectors. As soon as an integer64 vector is involved, the operations are performed using integer64 semantics. Otherwise the \code{base} package functions are called.
#' @param x,y,el,set vectors (of the same mode) containing a sequence of items (conceptually) with no duplicated values.
#' @return
#' For union, a vector of a common mode or class.
#'
#' For intersect, a vector of a common mode or class, or NULL if x or y is NULL.
#'
#' For setdiff, a vector of the same mode or class as x.
#'
#' A logical scalar for setequal and a logical of the same length as x for is.element.
#' @seealso [base::union]
#' @examples
#' x <- as.integer64(1:5)
#' y <- c(1L, 3L, 5L, 7L)
#' union(x, y)
#' intersect(x, y)
#' setdiff(x, y)
#' setequal(x, y)
#' is.element(x, y)
#'
#' @export
#' @rdname sets
union = function(x, y) {
if (!(is.integer64(x) || is.integer64(y)))
return(base::union(x, y))

target_class = target_class_and_sample_value(list(x, y))$class
# try using the benefit of integer64 caching, if possible. I.e. call unique() before as().
x = unique(x)
if (class(x)[1L] != target_class)
x = as(x, target_class)
y = unique(y)
if (class(y)[1L] != target_class)
y = as(y, target_class)

unique(c(x, y))
}

#' @export
#' @rdname sets
intersect = function(x, y) {
if (!(is.integer64(x) || is.integer64(y)))
return(base::intersect(x, y))

target_class = target_class_and_sample_value(list(x, y))$class
x = unique(x)
if (class(x)[1L] != target_class)
x = as(x, target_class)
y = unique(y)
if (class(y)[1L] != target_class)
y = as(y, target_class)

x[match(x, y, 0L) > 0L]
}

#' @export
#' @rdname sets
setequal = function(x, y) {
if (!(is.integer64(x) || is.integer64(y)))
return(base::setequal(x, y))

target_class = target_class_and_sample_value(list(x, y))$class
x = unique(x)
if (class(x)[1L] != target_class)
x = as(x, target_class)
y = unique(y)
if (class(y)[1L] != target_class)
y = as(y, target_class)

length(x) == length(y) && !anyNA(match(x, y))
}

#' @export
#' @rdname sets
setdiff = function(x, y) {
if (!(is.integer64(x) || is.integer64(y)))
return(base::setdiff(x, y))

if (class(x)[1L] %in% c("POSIXct", "Date"))
x = unclass(x)
if (class(x)[1L] %in% c("factor", "ordered"))
x = as.character(x)
target_class = target_class_and_sample_value(list(x, y))$class
x = unique(x)
y = unique(y)
if (class(x)[1L] != target_class)
x_match = as(x, target_class)
else
x_match = x
if (class(y)[1L] != target_class)
y = as(y, target_class)

x[match(x_match, y, 0L) == 0L]
}

#' @export
#' @rdname sets
is.element = function(el, set) {
if (!(is.integer64(el) || is.integer64(set)))
return(base::is.element(el, set))

target_class = target_class_and_sample_value(list(el, set))$class
if (class(el)[1L] != target_class)
el = as(el, target_class)
set = unique(set)
if (class(set)[1L] != target_class)
set = as(set, target_class)

match(el, set, 0L) > 0L
}


31 changes: 31 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,34 @@ withCallingHandlers_and_choose_call = function(expr, function_names, name_to_dis
)
eval(wch, envir=parent.frame())
}

# function to determine target class and sample value for union, intersect, setdiff, setequal, min, max, range, sum, prod, c, cbind and rbind functions
target_class_and_sample_value = function(x, recursive=FALSE, errorClasses="") {

getClassesOfElements = function(x, recursive, errorClasses) {
classes = vapply(x, function(el) if (class(el)[1L] == "list" || "data.frame" %in% class(el)) "list" else class(el)[1L], character(1L))
if (recursive) {
union(classes[classes != "list"], unlist(lapply(x[classes == "list"], function(el) getClassesOfElements(el, recursive=TRUE, errorClasses=errorClasses))))
} else {
unique(classes)
}
}
classes = getClassesOfElements(x, recursive=isTRUE(recursive), errorClasses=errorClasses)
if (length(sel <- intersect(errorClasses, classes)))
stop(errorCondition(sprintf(gettext("invalid 'type' (%s) of argument", domain="R"), sel[1L]), call=sys.call(max(sys.nframe() - 1L, 1L))))

if (any(c("character", "factor", "ordered") %in% classes)) {
valueClass = "character"
funValue = character(1L)
} else if ("complex" %in% classes) {
valueClass = "complex"
funValue = complex(1L)
} else if (any(c("Date", "POSIXct", "POSIXlt", "difftime") %in% classes)) {
valueClass = "double"
funValue = numeric(1L)
} else {
valueClass = "integer64"
funValue = integer64(1L)
}
list(class = valueClass, sampleValue = funValue)
}
48 changes: 48 additions & 0 deletions man/sets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading