Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildlibrary
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ValidationKey: '67118392'
ValidationKey: '67319980'
AcceptedWarnings:
- 'Warning: package ''.*'' was built under R version'
- 'Warning: namespace ''.*'' is not available and has been replaced'
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ cff-version: 1.2.0
message: If you use this software, please cite it using the metadata from this file.
type: software
title: 'madrat: May All Data be Reproducible and Transparent (MADRaT) *'
version: 3.28.4
date-released: '2025-12-16'
version: 3.29.0
date-released: '2026-01-09'
abstract: Provides a framework which should improve reproducibility and transparency
in data processing. It provides functionality such as automatic meta data creation
and management, rudimentary quality management, data caching, work-flow management
Expand Down
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Type: Package
Package: madrat
Title: May All Data be Reproducible and Transparent (MADRaT) *
Version: 3.28.4
Date: 2025-12-16
Version: 3.29.0
Date: 2026-01-09
Authors@R: c(
person("Jan Philipp", "Dietrich", , "[email protected]", role = c("aut", "cre"),
comment = c(affiliation = "Potsdam Institute for Climate Impact Research", ORCID = "0000-0002-4309-6431")),
Expand Down Expand Up @@ -45,6 +45,7 @@ Depends:
Imports:
callr,
digest,
filelock (>= 1.0.3),
igraph (>= 2.1.1),
Matrix,
methods,
Expand Down
85 changes: 59 additions & 26 deletions R/retrieveData.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,10 @@ retrieveData <- function(model, rev = 0, dev = "", cachetype = "def", puc = iden

if (length(matchingPUCs) == 1) {
vcat(-2, " - data will be created from existing puc (", matchingPUCs, ").", fill = 300)
do.call(pucAggregate, c(list(puc = file.path(getConfig("pucfolder"), matchingPUCs)), renv = renv,
strict = NULL, cfg$input[cfg$pucArguments]))
.withLockedPuc(matchingPUCs, function() {
do.call(pucAggregate, c(list(puc = file.path(getConfig("pucfolder"), matchingPUCs)),
renv = renv, strict = NULL, cfg$input[cfg$pucArguments]))
})
return(invisible(paste0(outputfolder, ".tgz")))
}
}
Expand Down Expand Up @@ -176,31 +178,33 @@ retrieveData <- function(model, rev = 0, dev = "", cachetype = "def", puc = iden
}
pucName <- paste0(cfg$pucName, ".puc")
pucPath <- file.path(getConfig("pucfolder"), pucName)
if (!file.exists(pucPath)) {
cacheFiles <- readLines(pucFiles)
if (all(file.exists(cacheFiles))) {
vcat(1, " - create puc (", pucPath, ")", fill = 300, show_prefix = FALSE)
with_tempdir({
file.copy(cacheFiles, ".")
otherFiles <- c("config.rds", "diagnostics.log")
file.copy(file.path(outputfolder, otherFiles), ".")

.fillRenvCache(requiredPackages = attr(cfg$functionName, "package"))

missingFiles <- basename(cacheFiles)[!file.exists(basename(cacheFiles))]
if (length(missingFiles) == 0) {
# create the actual puc file: a tar gz archive containing config, diagnostics, renv.lock, and all
# required madrat cache files
.tarAndVerify(pucPath)
} else {
vcat(1, "puc file not created, some cache files are missing:\n",
paste(missingFiles, collapse = "\n"))
}
}, tmpdir = madTempDir())
} else {
vcat(1, "puc file not created: could not find all relevant files.")
.withLockedPuc(pucName, function() {
if (!file.exists(pucPath)) {
cacheFiles <- readLines(pucFiles)
if (all(file.exists(cacheFiles))) {
vcat(1, " - create puc (", pucPath, ")", fill = 300, show_prefix = FALSE)
with_tempdir({
file.copy(cacheFiles, ".")
otherFiles <- c("config.rds", "diagnostics.log")
file.copy(file.path(outputfolder, otherFiles), ".")

.fillRenvCache(requiredPackages = attr(cfg$functionName, "package"))

missingFiles <- basename(cacheFiles)[!file.exists(basename(cacheFiles))]
if (length(missingFiles) == 0) {
# create the actual puc file: a tar gz archive containing config, diagnostics, renv.lock, and all
# required madrat cache files
.tarAndVerify(pucPath)
} else {
vcat(1, "puc file not created, some cache files are missing:\n",
paste(missingFiles, collapse = "\n"))
}
}, tmpdir = madTempDir())
} else {
vcat(1, "puc file not created: could not find all relevant files.")
}
}
}
})
} else {
vcat(1, "puc file not created: could not find list of files to be added.")
}
Expand Down Expand Up @@ -394,3 +398,32 @@ retrieveData <- function(model, rev = 0, dev = "", cachetype = "def", puc = iden

return(returnCode)
}

.withLockedPuc <- function(pucName, fn) {
# This could be improved by using separate locks for read and
# write access to not block simultaneous reads. This would
# however significantly increase the complexity of this mechanism.

# Ensure .locks folder exists
lockFileFolder <- file.path(getConfig("pucfolder"), ".locks")
if (!dir.exists(lockFileFolder)) {
dir.create(lockFileFolder)
}

# Get the lock
lockFilePath <- file.path(getConfig("pucfolder"), ".locks", paste0(pucName, ".lock"))
lock <- filelock::lock(lockFilePath, timeout = 6 * 60 * 60 * 1000) # Wait for 6h
if (is.null(lock)) {
# This should really not happen, as the Inf timeout should
# ensure we always get a lock.
stop("Failed to acquire lock for puc file ", lockFilePath)
}

# Execute the fn with the lock
tryCatch({
return(fn())
},
finally = {
filelock::unlock(lock)
})
}
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# May All Data be Reproducible and Transparent (MADRaT) *

R package **madrat**, version **3.28.4**
R package **madrat**, version **3.29.0**

[![CRAN status](https://www.r-pkg.org/badges/version/madrat)](https://cran.r-project.org/package=madrat) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1115490.svg)](https://doi.org/10.5281/zenodo.1115490) [![R build status](https://github.com/pik-piam/madrat/workflows/check/badge.svg)](https://github.com/pik-piam/madrat/actions) [![codecov](https://codecov.io/gh/pik-piam/madrat/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/madrat) [![r-universe](https://pik-piam.r-universe.dev/badges/madrat)](https://pik-piam.r-universe.dev/builds)

Expand Down Expand Up @@ -55,7 +55,7 @@ In case of questions / problems please contact Jan Philipp Dietrich <dietrich@pi

To cite package **madrat** in publications use:

Dietrich J, Sauer P, Baumstark L, Wirth S, Giannousakis A, Rodrigues R, Bodirsky B, Leip D, Crawford M, Kreidenweis U, Klein D, Rein P (2025). "madrat: May All Data be Reproducible and Transparent (MADRaT) *." doi:10.5281/zenodo.1115490 <https://doi.org/10.5281/zenodo.1115490>, Version: 3.28.4, <https://github.com/pik-piam/madrat>.
Dietrich J, Sauer P, Baumstark L, Wirth S, Giannousakis A, Rodrigues R, Bodirsky B, Leip D, Crawford M, Kreidenweis U, Klein D, Rein P (2026). "madrat: May All Data be Reproducible and Transparent (MADRaT) *." doi:10.5281/zenodo.1115490 <https://doi.org/10.5281/zenodo.1115490>, Version: 3.29.0, <https://github.com/pik-piam/madrat>.

A BibTeX entry for LaTeX users is

Expand All @@ -64,9 +64,9 @@ A BibTeX entry for LaTeX users is
title = {madrat: May All Data be Reproducible and Transparent (MADRaT) *},
author = {Jan Philipp Dietrich and Pascal Sauer and Lavinia Baumstark and Stephen Wirth and Anastasis Giannousakis and Renato Rodrigues and Benjamin Leon Bodirsky and Debbora Leip and Michael Crawford and Ulrich Kreidenweis and David Klein and Patrick Rein},
doi = {10.5281/zenodo.1115490},
date = {2025-12-16},
year = {2025},
date = {2026-01-09},
year = {2026},
url = {https://github.com/pik-piam/madrat},
note = {Version: 3.28.4},
note = {Version: 3.29.0},
}
```
115 changes: 115 additions & 0 deletions tests/testthat/test-puc.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,121 @@ test_that("puc creation works", {
expect_message(pucAggregate("rev42_extra_example_tag.puc", extra = "blub", regionmapping = "regionmappingH12.csv",
renv = FALSE), "already available")
expect_true(file.exists(file.path(getConfig("outputfolder"), "rev42_h12_7a5441e5_example_customizable_tag.tgz")))

expect_message(retrieveData("example", rev = 42, extra = "test2", renv = FALSE), "Run pucAggregate")
expect_true(file.exists(file.path(getConfig("outputfolder"), "rev42_h12_5f3d77a0_example_customizable_tag.tgz")))
})

test_that("puc creation is thread-safe", {
skip_on_cran()

# Tip for debugging this test:
# If something breaks in one of the sub-processes, use p1$get_result()
# to check what happened.

# Store paths
lockDirName <- ".locks"
lockDir <- file.path(getConfig("pucfolder"), lockDirName)

# Set up utility functions
.waitForMessage <- function(process, message) {
while (!(length(messages <- process$read_output_lines()) > 0 && any(messages == message))) {
Sys.sleep(0.1)
}
}

# Set up PUC-folder as working directory
unlink(list.files(getConfig("pucfolder"), full.names = TRUE)) # To ensure no remaining pucs are in there
unlink(list.files(lockDir, full.names = TRUE)) # To ensure no remaining locks
dir.create(lockDir, showWarnings = FALSE)

# Set up locks
firstCheckpoint <- filelock::lock(file.path(lockDir, "checkpoint1.lock"))
secondCheckpoint <- filelock::lock(file.path(lockDir, "checkpoint2.lock"))

p1 <- callr::r_bg(function(madratConfig) {
# Set up environment
pkgload::load_all("../..")
do.call(madrat::setConfig, madratConfig)
lockDir <- file.path(getConfig("pucfolder"), ".locks")

# Set up .withLockedPuc wrapper to inject control logic
# into the passed function
originalWithLockedPuc <- madrat:::.withLockedPuc
assignInNamespace(".withLockedPuc", function(pucName, fn) {
originalWithLockedPuc(pucName, function() {
if (any(grepl("pucAggregate", deparse(fn), fixed = TRUE))) {
# Only interested in creation, so we do a quick return for reading the puc.
return(fn())
}

tryCatch({
cat("ready\n")
firstCheckpoint <- filelock::lock(file.path(lockDir, "checkpoint1.lock"))
fn()
cat("fn done\n")
secondCheckpoint <- filelock::lock(file.path(lockDir, "checkpoint2.lock"))
},
finally = {
filelock::unlock(firstCheckpoint)
filelock::unlock(secondCheckpoint)
})
})
}, ns = "madrat")

# Start
madrat::retrieveData("example", rev = 45)
}, args = list(madratConfig = getConfig()))

# Wait for p1 to signal that it is ready, i.e. it has entered the critical section
.waitForMessage(p1, "ready")

# This is a copy of testFunction1 except for the call at the end
p2 <- callr::r_bg(function(madratConfig) {
# Set up environment
pkgload::load_all("../..")
do.call(madrat::setConfig, madratConfig)

# Set up .withLockedPuc wrapper to inject control logic
# into the passed function
originalWithLockedPuc <- madrat:::.withLockedPuc
assignInNamespace(".withLockedPuc", function(pucName, fn) {
originalWithLockedPuc(pucName, function() {
if (any(grepl("pucAggregate", deparse(fn), fixed = TRUE))) {
# Only interested in creation, so we do a quick return for reading the puc.
return(fn())
}
fn()
})
}, ns = "madrat")

# Start
cat("ready\n")
madrat::retrieveData("example", rev = 45)
}, args = list(madratConfig = getConfig()))

# Wait for p2 to signal that it is ready, i.e. it was at the point where it could execute
# the critical section (there is no guarantee that it tried getting in yet, if this test
# is flaky, this is one of the critical spots).
.waitForMessage(p2, "ready")

filelock::unlock(firstCheckpoint)

# Wait for p1 to signal that it is done, i.e. it has executed fn
.waitForMessage(p1, "fn done")

Sys.sleep(1) # Give p2 some time to fall asleep (start waiting for the puc lock)

expect_true(p2$get_status() == "sleeping")
expect_true(file.exists(file.path(getConfig("pucfolder"), "rev45_extra_example_tag.puc")))
unlink("rev45_extra_example_tag.puc")

filelock::unlock(secondCheckpoint)
p1$wait()
p2$wait()

expect_true(file.exists(file.path(getConfig("pucfolder"), "rev45_extra_example_tag.puc")))

expect_false(p1$is_alive())
expect_false(p2$is_alive())
})