Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Group data by variable not month #272

Draft
wants to merge 31 commits into
base: dev-v2
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a5f8102
Add timing information to driver
bpbond Feb 23, 2025
62a4bfc
Add timing data and change to using readr::write_csv
bpbond Feb 21, 2025
12a4f9e
Better handle release date and enforce clean repo (#254)
bpbond Feb 23, 2025
421b86d
Skip raw file if there's a corresponding L0 file (#256)
bpbond Feb 23, 2025
c8a909b
Don't check dir.create() return value (#255)
bpbond Feb 23, 2025
80bffe3
Fix reset() behavior so that files and folders are removed cleanly (#…
bpbond Feb 24, 2025
f91a6af
Embed resources in qmd-generated html files (#258)
bpbond Feb 24, 2025
abc57cb
Sapflow: fix deep sensor labeling and shallow depth value (#259)
bpbond Feb 25, 2025
fc0bf32
Check for error returned by parallel jobs (#260)
bpbond Feb 28, 2025
e71b855
Change L0 step to handle removed and edited raw files (#261)
bpbond Mar 3, 2025
873c47e
helpers.R cleanup (#266)
bpbond Mar 3, 2025
069ed70
Move functions from compasstools to helper file (#267)
bpbond Mar 4, 2025
478c917
Rework out-of-service to be more general and flexible (#269)
bpbond Mar 6, 2025
2c6b5f7
Update helpers.R
bpbond Mar 7, 2025
b23fbce
Change L1_normalize to write by research name
bpbond Mar 10, 2025
563d4a8
Change research names to use hyphens, not underlines
bpbond Mar 10, 2025
4cb51ae
Update L1 outputs to be by research name
bpbond Mar 10, 2025
f33d674
Update helpers.R
bpbond Mar 10, 2025
69a2818
Update helpers.R
bpbond Mar 13, 2025
31e6057
Update design_table.csv
bpbond Mar 13, 2025
0636cda
Create README_v2-0.txt
bpbond Mar 13, 2025
f37e91b
Change L1_normalize to write by research name
bpbond Mar 10, 2025
1e30174
Change research names to use hyphens, not underlines
bpbond Mar 10, 2025
13717d9
Update L1 outputs to be by research name
bpbond Mar 10, 2025
ea54ef9
Update helpers.R
bpbond Mar 10, 2025
5160370
Update helpers.R
bpbond Mar 13, 2025
8daea73
Update design_table.csv
bpbond Mar 13, 2025
fc01f0b
Create README_v2-0.txt
bpbond Mar 13, 2025
d487751
Update design_table.csv
bpbond Mar 15, 2025
e2bebab
Merge branch 'rn-not-month' of https://github.com/COMPASS-DOE/sensor-…
bpbond Mar 15, 2025
af49adb
Update design_table.csv
bpbond Mar 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 22 additions & 21 deletions pipeline/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ read_csv_group <- function(files, col_types = NULL, quiet = FALSE, ...) {

# File data into sub-folders based on what level data it is:
# L1_normalize outputs
# Folders are site_plot_year_month
# Folders are site_plot_year_researchname
# Filenames are Site_logger_table_year_month_hash
# L1 outputs
# Folders are site_year
# Filenames are site_plot_timeperiod_L1_version
# Filenames are site_plot_timeperiod_researchname_L1_version
# L2 outputs
# Folders are site_year
# Filenames are site_timeperiod_table_L2_version
Expand All @@ -86,55 +86,56 @@ write_to_folders <- function(x, root_dir,
stopifnot(length(unique(x$Site)) == 1)
}
stopifnot("TIMESTAMP" %in% names(x))
stopifnot("research_name" %in% names(x))

# Prep: identify years and months, along with current date
# Prep: identify years and research names, along with current date
years <- year(x$TIMESTAMP)
months <- sprintf("%02i", month(x$TIMESTAMP)) # add leading zero if needed
research_names <- x$research_name
nowyr <- year(Sys.Date())
nowmo <- month(Sys.Date())
vversion <- paste0("v", version)

# Loop by years and months
# Loop by years and research names
lines_written <- list()
for(y in unique(years)) {
if(is.na(y)) {
stop(data_level, " invalid year ", y)
}

for(m in unique(months)) {
for(rn in unique(research_names)) {
write_this_plot <- FALSE

# Isolate the data to write
dat <- x[y == years & m == months,]
dat <- x[y == years & rn == research_names,]
if(!nrow(dat)) {
message("No data for ", y, "_", m, " - skipping")
message("No data for ", y, "_", rn, " - skipping")
next
}

# Sanity checks
if(is.na(m)) {
stop(data_level, " invalid month ", m)
if(is.na(rn)) {
stop(data_level, " invalid research_name ", m)
}
if(y > nowyr || (y == nowyr && m > nowmo)) {
if(y > nowyr) {
stop("I am being asked to write ", nrow(dat), " rows of future data: ",
paste(site, logger, table, y, m))
paste(site, logger, table, y))
}

# Construct folder and file names based on data_level
time_period <- paste(format(min(dat$TIMESTAMP), format = "%Y%m%d"),
format(max(dat$TIMESTAMP), format = "%Y%m%d"),
sep = "-")
if(data_level == "L1_normalize") {
folder <- file.path(root_dir, paste(site, plot, y, m, sep = "_"))
# A given month's data is usually split across two datalogger
# files; add a short hash to end of filename to ensure we don't
# overwrite anything that's already there
short_hash <- substr(digest::digest(dat, algo = "md5"), 1, 4)
filename <- paste(logger, table, y, m, short_hash, sep = "_")
folder <- file.path(root_dir, paste(site, plot, y, rn, sep = "_"))
# Add a short hash to end of filename to ensure we don't
# overwrite anything that's already there (e.g. another
# month's data)
short_hash <- substr(digest::digest(dat, algo = "md5"), 1, 6)
filename <- paste(logger, table, y, rn, short_hash, sep = "_")
na_string <- NA_STRING_L1
} else if(data_level == "L1") {
folder <- file.path(root_dir, paste(site, y, sep = "_"))
filename <- paste(site, plot, time_period, data_level, vversion, sep = "_")
filename <- paste(site, plot, time_period, rn, data_level, vversion, sep = "_")
na_string <- NA_STRING_L1
write_this_plot <- TRUE
p <- ggplot(x, aes(TIMESTAMP, Value, group = paste(Instrument_ID, Sensor_ID))) +
Expand Down Expand Up @@ -185,7 +186,7 @@ write_to_folders <- function(x, root_dir,
}

lines_written[[fqfn]] <- nrow(dat)
} # for m
} # for rn
} # for y
invisible(lines_written)
}
Expand All @@ -212,7 +213,7 @@ reset <- function(root = here::here("pipeline/data_TEST")) {
remove_items("L0/")
remove_items("L1_normalize/")
# remove L1_normalize folders
remove_items("L1_normalize/", pat = "[A-Z]{3}_[A-Z]+_[0-9]{4}_[0-9]{2}")
remove_items("L1_normalize/", pat = "[A-Z]{3}_[A-Z]+_[0-9]{4}_[a-z-]+")
remove_items("L1/")
# remove L1 folders
remove_items("L1/", pat = "[A-Z]{3}_[0-9]{4}")
Expand Down
Loading