Skip to content

Commit

Permalink
ARROW-8549: [R] Assorted post-0.17 release cleanups
Browse files Browse the repository at this point in the history
Functional changes in the R package installation:

* Downloading dependencies is made completely silent by default
* When downloading source, it now tries three Apache mirrors
* Limit parallelization in the C++ build to 2 CPUs (unless MAKEFLAGS is set), per CRAN repository policy

CI/test changes:

* Adds a `try()` around a non-essential dependency download to fix a source of occasional build flakiness
* Makes the R install logs always print out, not just on failure. It was useful to be able to see what exactly the successful builds were doing to make sure they were doing the expected right things.
* Makes the `--as-cran` checks build and check the docs/vignettes. This adds a lot of time to the build but is only done nightly, and something needs to check this, at least before release

Closes apache#6995 from nealrichardson/0.17.0-rpkg

Authored-by: Neal Richardson <[email protected]>
Signed-off-by: Neal Richardson <[email protected]>
  • Loading branch information
nealrichardson committed Apr 22, 2020
1 parent dee46a5 commit d1f7573
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 47 deletions.
3 changes: 2 additions & 1 deletion ci/scripts/r_deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ pushd ${source_dir}
# Install R package dependencies
${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck'))"
${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
${R_BIN} -e "remotes::install_github('nealrichardson/decor')"
# This isn't required for testing, only for if you're using this to build your dev environment
${R_BIN} -e "try(remotes::install_github('nealrichardson/decor'))"

popd

Expand Down
11 changes: 6 additions & 5 deletions ci/scripts/r_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,19 @@ export TEST_R_WITH_ARROW=TRUE
export _R_CHECK_TESTS_NLINES_=0
export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE
export _R_CHECK_LIMIT_CORES_=FALSE
export VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //)
# By default, aws-sdk tries to contact a non-existing local ip host
# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
export AWS_EC2_METADATA_DISABLED=TRUE

# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check)
BEFORE=$(ls -alh ~/)

# Conditionally run --as-cran because crossbow jobs aren't using _R_CHECK_COMPILATION_FLAGS_KNOWN_
# (maybe an R version thing, needs 3.6.2?)
# Also only --run-donttest if NOT_CRAN because Parquet example requires snappy (optional dependency)
${R_BIN} -e "cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true'); rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', ifelse(cran, '--as-cran', '--run-donttest')), error_on = 'warning', check_dir = 'check')"
${R_BIN} -e "as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
if (as_cran) {
rcmdcheck::rcmdcheck(args = c('--as-cran', '--run-donttest'), error_on = 'warning', check_dir = 'check')
} else {
rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', '--run-donttest'), error_on = 'warning', check_dir = 'check')
}"

AFTER=$(ls -alh ~/)
if [ "$BEFORE" != "$AFTER" ]; then
Expand Down
3 changes: 1 addition & 2 deletions dev/tasks/r/azure.linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,4 @@ jobs:
- script: |
set -ex
cat arrow/r/check/arrow.Rcheck/00install.out
displayName: Dump install logs on failure
condition: failed()
displayName: Dump install logs
3 changes: 1 addition & 2 deletions dev/tasks/r/github.linux.cran.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,5 @@ jobs:
- name: Docker Run
shell: bash
run: cd arrow && docker-compose run r
- name: Dump install logs on failure
if: failure()
- name: Dump install logs
run: cat arrow/r/check/arrow.Rcheck/00install.out
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,7 @@ services:
shm_size: *shm-size
environment:
<<: *ccache
NOT_CRAN: 'true'
volumes: *conda-volumes
command:
["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
Expand All @@ -808,6 +809,7 @@ services:
<<: *ccache
ARROW_R_CXXFLAGS: '-Werror'
LIBARROW_BUILD: 'false'
NOT_CRAN: 'true'
volumes: *ubuntu-volumes
command: >
/bin/bash -c "
Expand All @@ -834,6 +836,7 @@ services:
environment:
LIBARROW_DOWNLOAD: "false"
ARROW_HOME: "/arrow"
# To test for CRAN release, delete ^^ these two env vars so we download the Apache release
ARROW_USE_PKG_CONFIG: "false"
volumes:
- .:/arrow:delegated
Expand Down
5 changes: 3 additions & 2 deletions r/cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
## Test environments
* Debian Linux, R-devel, GCC ASAN/UBSAN
* Debian Linux, GCC, R-devel/R-patched/R-release
* Fedora Linux, GCC/clang, R-devel
* Ubuntu Linux 16.04 LTS, R-release, GCC
* win-builder (R-devel and R-release)
* macOS (10.11, 10.14), R-release
* macOS 10.14, R-release

## R CMD check results

Expand Down
93 changes: 59 additions & 34 deletions r/tools/linuxlibs.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ VERSION <- args[1]
dst_dir <- paste0("libarrow/arrow-", VERSION)

arrow_repo <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/"
apache_src_url <- paste0(
"https://archive.apache.org/dist/arrow/arrow-", VERSION,
"/apache-arrow-", VERSION, ".tar.gz"
)

options(.arrow.cleanup = character()) # To collect dirs to rm on exit
on.exit(unlink(getOption(".arrow.cleanup")))
Expand All @@ -40,17 +36,23 @@ binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")), "false"
# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
quietly <- !env_is("ARROW_R_DEV", "true")

try_download <- function(from_url, to_file) {
try(
suppressWarnings(
download.file(from_url, to_file, quiet = quietly)
),
silent = quietly
)
file.exists(to_file)
}

download_binary <- function(os = identify_os()) {
libfile <- tempfile()
if (!is.null(os)) {
# See if we can map this os-version to one we have binaries for
os <- find_available_binary(os)
binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
try(
download.file(binary_url, libfile, quiet = quietly),
silent = quietly
)
if (file.exists(libfile)) {
if (try_download(binary_url, libfile)) {
cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
} else {
cat(sprintf("*** No C++ binaries found for %s\n", os))
Expand Down Expand Up @@ -133,31 +135,54 @@ find_available_binary <- function(os) {

download_source <- function() {
tf1 <- tempfile()
src_dir <- NULL
source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
try(
download.file(source_url, tf1, quiet = quietly),
silent = quietly
)
if (!file.exists(tf1)) {
# Try for an official release
try(
download.file(apache_src_url, tf1, quiet = quietly),
silent = quietly
)
}
if (file.exists(tf1)) {
src_dir <- tempfile()
if (bintray_download(tf1)) {
# First try from bintray
cat("*** Successfully retrieved C++ source\n")
src_dir <- tempfile()
unzip(tf1, exdir = src_dir)
unlink(tf1)
# These scripts need to be executable
system(sprintf("chmod 755 %s/cpp/build-support/*.sh", src_dir))
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
# The actual src is in cpp
src_dir <- paste0(src_dir, "/cpp")
} else if (apache_download(tf1)) {
# If that fails, try for an official release
cat("*** Successfully retrieved C++ source\n")
untar(tf1, exdir = src_dir)
unlink(tf1)
src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
}
src_dir

if (dir.exists(src_dir)) {
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
# These scripts need to be executable
system(
sprintf("chmod 755 %s/build-support/*.sh", src_dir),
ignore.stdout = quietly, ignore.stderr = quietly
)
return(src_dir)
} else {
return(NULL)
}
}

bintray_download <- function(destfile) {
source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
try_download(source_url, destfile)
}

apache_download <- function(destfile, n_mirrors = 3) {
apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
apache_urls <- c(
# This returns a different mirror each time
rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),
"https://downloads.apache.org/" # The backup
)
downloaded <- FALSE
for (u in apache_urls) {
downloaded <- try_download(paste0(u, apache_path), destfile)
if (downloaded) {
break
}
}
downloaded
}

find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
Expand All @@ -176,7 +201,10 @@ build_libarrow <- function(src_dir, dst_dir) {
# Set up make for parallel building
makeflags <- Sys.getenv("MAKEFLAGS")
if (makeflags == "") {
makeflags <- sprintf("-j%s", parallel::detectCores())
# CRAN policy says not to use more than 2 cores during checks
# If you have more and want to use more, set MAKEFLAGS
ncores <- min(parallel::detectCores(), 2)
makeflags <- sprintf("-j%s", ncores)
Sys.setenv(MAKEFLAGS = makeflags)
}
if (!quietly) {
Expand Down Expand Up @@ -212,10 +240,7 @@ ensure_cmake <- function() {
)
cmake_tar <- tempfile()
cmake_dir <- tempfile()
try(
download.file(cmake_binary_url, cmake_tar, quiet = quietly),
silent = quietly
)
try_download(cmake_binary_url, cmake_tar)
untar(cmake_tar, exdir = cmake_dir)
unlink(cmake_tar)
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
Expand Down
9 changes: 8 additions & 1 deletion r/tools/winlibs.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,18 @@ if(!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))){
} else {
# Download static arrow from rwinlib
if (getRversion() < "3.3.0") setInternet2()
quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true")
get_file <- function(template, version) {
try(download.file(sprintf(template, version), "lib.zip", quiet = TRUE), silent = TRUE)
try(
suppressWarnings(
download.file(sprintf(template, version), "lib.zip", quiet = quietly)
),
silent = quietly
)
}
# URL templates
# TODO: don't hard-code RTools 3.5? Can we detect which toolchain we have?
# ifelse(nzchar(Sys.getenv("RTOOLS40_HOME")), "40", "35")
nightly <- "https://dl.bintray.com/ursalabs/arrow-r/libarrow/bin/windows-35/arrow-%s.zip"
rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip"
# First look for a nightly
Expand Down

0 comments on commit d1f7573

Please sign in to comment.