Skip to content

Commit 5e74e57

Browse files
authored
Flexible time zone handling (#301)
* First pass at flexible time zone handling * Add timestamp test data * Write timestamp information to metadata * Update documentation
1 parent f048411 commit 5e74e57

File tree

11 files changed

+91
-13
lines changed

11 files changed

+91
-13
lines changed

pipeline/L1.qmd

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ params:
1111
L1_METADATA: "L1_metadata/"
1212
METADATA_VARS_TABLE: "L1_metadata_variables.csv"
1313
METADATA_COLUMNS_TABLE: "L1_metadata_columns.csv"
14+
# We use "Etc/GMT+5" rather than e.g. "America/New_York" for
15+
# L1_DATA_TIMEZONE because outputs should always be in STANDARD time
16+
# See https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
17+
L1_DATA_TIMEZONE: "Etc/GMT+5"
1418
L1_VERSION: "???"
1519
L1_RELEASE_DATE: "???"
1620
write_plots: true
@@ -174,6 +178,7 @@ readme <- gsub("[VERSION]", params$L1_VERSION, readme, fixed = TRUE)
174178
readme <- gsub("[DATESTAMP]", params$L1_RELEASE_DATE, readme, fixed = TRUE)
175179
readme <- gsub("[OBSERVATIONS]", n_obs, readme, fixed = TRUE)
176180
readme <- gsub("[GIT_COMMIT]", GIT_COMMIT, readme, fixed = TRUE)
181+
readme <- gsub("[TIMEZONE]", params$L1_DATA_TIMEZONE, readme, fixed = TRUE)
177182
readme_outfn <- file.path(L1, basename(readme_fn))
178183
message("Writing overall README ", readme_outfn, "...")
179184
writeLines(readme, readme_outfn)
@@ -244,6 +249,8 @@ for(dd in data_dirs) {
244249
md <- md[-col_info_pos]
245250
# The NA code is an in-line replacement
246251
md <- gsub("[NA_STRING_L1]", NA_STRING_L1, md, fixed = TRUE)
252+
# The time zone is an in-line replacement
253+
md <- gsub("[TIMEZONE]", params$L1_DATA_TIMEZONE, md, fixed = TRUE)
247254
248255
# Insert variable metadata
249256
var_info_pos <- grep("[VARIABLE_INFO]", md, fixed = TRUE)

pipeline/L1_normalize.qmd

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@ title-block-banner: true
55
params:
66
html_outfile: "L1_normalize.html"
77
DATA_ROOT: "data_TEST/"
8-
METADATA_ROOT: "metadata/"
98
L0: "L0/"
109
L1_NORMALIZE: "L1_normalize/"
1110
DESIGN_TABLE: "design_table.csv"
11+
# We use "Etc/GMT+5" rather than e.g. "America/New_York" for
12+
# L1_DATA_TIMEZONE because outputs should always be in STANDARD time
13+
# See https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
14+
L1_DATA_TIMEZONE: "Etc/GMT+5"
15+
METADATA_ROOT: "metadata/"
16+
METADATA_TIMEZONES_TABLE: "L1_metadata/L1_metadata_timezones.csv"
1217
METADATA_VARS_TABLE: "L1_metadata/L1_metadata_variables.csv"
1318
OOS: "out-of-service/"
1419
logfile: ""
@@ -73,6 +78,10 @@ if(any(duplicated(chk))) {
7378
}
7479
DESIGN_TABLE_ENTRIES <- with(dt_ex, paste(Logger, Table, loggernet_variable)) # save for later
7580
81+
# Read the site time zone table
82+
METADATA_TZ_TABLE <- file.path(params$METADATA_ROOT, params$METADATA_TIMEZONES_TABLE)
83+
tzt <- read_csv(METADATA_TZ_TABLE, col_types = "ccc")
84+
7685
# Read the variable metadata table
7786
METADATA_VARS_TABLE <- file.path(params$METADATA_ROOT, params$METADATA_VARS_TABLE)
7887
mt <- read_csv(METADATA_VARS_TABLE, col_types = "ccccccddc")
@@ -162,7 +171,7 @@ f <- function(fn, out_dir, design_table) {
162171
times = substr(dat$TIMESTAMP, 1, 10), # isolate YYYY-MM-DD
163172
valid_through = dat$valid_through)
164173
165-
message("\tDropping ", sum(!dat_retain), " out-of-date design links")
174+
if(sum(!dat_retain)) message("\tDropping ", sum(!dat_retain), " out-of-date design links")
166175
dat <- dat[dat_retain,]
167176
dat$valid_through <- NULL
168177
@@ -232,13 +241,31 @@ f <- function(fn, out_dir, design_table) {
232241
# all `oos_data` entries, OR'ing the results as we go,
233242
# instead of breaking out
234243
dat$F_OOS <- as.integer(oos(oos_data[[tbl]], dat))
235-
message("\tAdded ", sum(dat$F_OOS), " OOS flags for ", tbl)
244+
if(sum(dat$F_OOS)) message("\tAdded ", sum(dat$F_OOS), " OOS flags for ", tbl)
236245
oos_data_used[tbl] <<- TRUE # won't work when parallelized
237246
break
238247
}
239248
}
240249
smry$`OOS%` <- round(sum(dat$F_OOS) / nrow(dat) * 100, 1)
241250
251+
# ------------- Time zone change, if needed
252+
253+
site <- dat$Site[1]
254+
if(site %in% tzt$Site) {
255+
site_tz <- tzt$Datalogger_time_zone[which(site == tzt$Site)]
256+
if(site_tz != params$L1_DATA_TIMEZONE) {
257+
message("\tChanging timestamps from ", site_tz, " to ", params$L1_DATA_TIMEZONE)
258+
# The timestamps were recorded in the site's time zone, which differs
259+
# from the L1 timezone, so convert to POSIXct and change tz
260+
timestamps <- ymd_hms(dat$TIMESTAMP, tz = site_tz)
261+
timestamps <- with_tz(timestamps, tzone = params$L1_DATA_TIMEZONE)
262+
dat$TIMESTAMP <- format(timestamps, "%Y-%m-%d %H:%M:%S")
263+
}
264+
} else {
265+
stop("This site ", site, " is not in the time zone table ",
266+
params$METADATA_TIMEZONES_TABLE)
267+
}
268+
242269
# ------------- Write output files and clean up
243270
244271
write_to_folders(dat,
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"TOA5","Compass_TZTEST_TR_999","CR1000X","28733","CR1000X.Std.06.00","CPU:COMPASS_v3.3.CR1X","12050","CheckTable"
2+
"TIMESTAMP","RECORD","BattV","SolarV","Batt_CHECK","Solar_CHECK","Flag(1)","Flag(2)","Flag(3)","Flag(4)","Flag(5)","Flag(6)","Flag(7)","Flag(8)","Flag(9)","Flag(10)","Flag(11)","Flag(12)","Flag(13)","Flag(14)","Statname","PB"
3+
"TS","RN","Volts","Volts","","","","","","","","","","","","","","","","","",""
4+
"","","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp"
5+
"2023-05-08 00:00:00",1598,11.93,25.42,"LOW","OKAY",0,0,-1,0,-1,0,0,-1,0,0,0,0,0,0,"Compass_TZTEST_TR",999
6+
"2023-05-08 00:15:00",1599,11.93,25.41,"LOW","OKAY",0,0,-1,0,-1,0,0,-1,0,0,0,0,0,0,"Compass_TZTEST_TR",999
7+
"2023-05-08 00:30:00",1600,11.93,25.4,"LOW","OKAY",0,0,-1,0,-1,0,0,-1,0,0,0,0,0,0,"Compass_TZTEST_TR",999
8+
"2023-05-08 00:45:00",1601,11.93,25.38,"LOW","OKAY",0,0,-1,0,-1,0,0,-1,0,0,0,0,0,0,"Compass_TZTEST_TR",999
9+
"2023-05-08 01:00:00",1602,11.93,25.37,"LOW","OKAY",0,0,-1,0,-1,0,0,-1,0,0,0,0,0,0,"Compass_TZTEST_TR",999
10+
"2023-05-08 01:15:00",1603,11.93,25.36,"LOW","OKAY",0,0,-1,0,-1,0,0,-1,0,0,0,0,0,0,"Compass_TZTEST_TR",999

pipeline/docs/making-a-new-release.md

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,22 @@ site-specific contact information is correct and, if needed, update the
6464
various site files in `./pipeline/metadata/L1_metadata`. Also update the
6565
key publications for each site.
6666

67+
9. Confirm that the datalogger time zone information listed in
68+
`./metadata/L1_metadata/L1_metadata_timezones.csv` is correct, and that
69+
the output time zone (set in `driver.R` or the individual Quarto files)
70+
is what you want.
6771

6872
## Run the pipeline
6973

70-
9. Commit all your changes. Now set the `ROOT` variable in `driver.R`
74+
10. Commit all your changes. Now set the `ROOT` variable in `driver.R`
7175
(if that's what you're using) to "./data" instead of "./data_TEST".
7276
**This change does NOT get committed**, however, because you want GitHub
7377
Actions to continue to use the _test_ data.
7478

75-
10. From the `./pipeline` folder, run `reset("data/")` (sourced from
79+
11. From the `./pipeline` folder, run `reset("data/")` (sourced from
7680
`helpers.R`) to remove any previous files.
7781

78-
11. Run the processing pipeline. If you use `driver.R` it will be
82+
12. Run the processing pipeline. If you use `driver.R` it will be
7983
relatively fast, because highly parallelized, but you don't get
8084
informative html logs, because the parallel processes can't write to
8185
output. If you want the full, detailed logs, run without parallelism
@@ -89,14 +93,14 @@ for examples of how to resolve these.
8993

9094
## Check, clean up, upload
9195

92-
12. Double-check the final release README file.
96+
13. Double-check the final release README file.
9397

94-
13. You may want to clean up the resulting L1 folder; for example,
98+
14. You may want to clean up the resulting L1 folder; for example,
9599
remove unwanted hidden files (`find ./ -name ".DS_Store" | xargs rm`) or
96100
'stub' data (`find ./ -name "*202407*" | xargs rm`). (Before doing this,
97101
use find's `-print` option to make sure you know what you're deleting!)
98102

99-
14. Push the data (including `L1`, `Raw`, `L0`, and `Logs`) to the
103+
15. Push the data (including `L1`, `Raw`, `L0`, and `Logs`) to the
100104
COMPASS HPC. For example:
101105

102106
```
@@ -110,9 +114,9 @@ rsync -av --chown=:compass-fme-data --perms --chmod=g+rx --exclude=".*" L1/ <use
110114
# Homebrew (https://brew.sh) to install an up-to-date version of rsync.
111115
```
112116

113-
15. Upload to the Google Drive, renaming the folder to the correct
117+
16. Upload to the Google Drive, renaming the folder to the correct
114118
version number, for `L1` and `Raw`.
115119

116-
16. Make a Git release corresponding to the version number.
120+
17. Make a Git release corresponding to the version number.
117121

118-
17. Let everyone know!
122+
18. Let everyone know!

pipeline/docs/system-files.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ File | Description
8888
`GWI.txt` | Site description file for the GWI site
8989
`L1_metadata_columns.csv` | This specifies the names and ordering of the L1 data columns. Upon being generated, L1 files are checked against this list and the process will error if there's a discrepancy
9090
`L1_metadata_template.txt` | Template for the various metadata files in each site-year folder. Information placeholders in square brackets are replaced in the L1 metadata-generation step
91+
`L1_metadata_timezones.csv` | Time zone that the dataloggers are set to at each site; used by `L1_normalize.qmd`
9192
`L1_metadata_variables.csv` | Along with the design table, this is a key 'information center' for the system. Also known as the 'bounds and units table', it specifies unit conversions, expected bands, variable metadata, etc. Every output variable must have an entry here, or the L1_normalize step will error
9293
`MSM.txt` | Site description file for the MSM site
9394
`OWC.txt` | Site description file for the OWC site

pipeline/driver.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ source("helpers.R")
2424
ROOT <- "./data_TEST"
2525
VERSION <- "1-2"
2626
RELEASE_DATE <- "2024-02-21"
27+
# We use "Etc/GMT+5" rather than e.g. "America/New_York" because
28+
# outputs should always be in STANDARD time
29+
# See https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
30+
L1_DATA_TZ <- "Etc/GMT+5"
2731

2832
# Log file ----------------------------------------------------
2933

@@ -87,6 +91,7 @@ begin <- Sys.time()
8791
driver_try(
8892
quarto_render("L1_normalize.qmd",
8993
execute_params = list(DATA_ROOT = ROOT,
94+
L1_DATA_TIMEZONE = L1_DATA_TZ,
9095
html_outfile = outfile,
9196
logfile = LOGFILE,
9297
run_parallel = TRUE))
@@ -113,6 +118,7 @@ driver_try(
113118
execute_params = list(DATA_ROOT = ROOT,
114119
L1_VERSION = VERSION,
115120
L1_RELEASE_DATE = RELEASE_DATE,
121+
L1_DATA_TIMEZONE = L1_DATA_TZ,
116122
html_outfile = outfile,
117123
logfile = LOGFILE,
118124
run_parallel = FALSE))

pipeline/metadata/L1_metadata/L1_metadata_columns.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Column,Description
33
Site,Site code; see site info above (character)
44
Plot,Plot code; see site info above (character)
5-
TIMESTAMP,Datalogger timestamp in EST (POSIXct)
5+
TIMESTAMP,Datalogger timestamp in [TIMEZONE] (POSIXct)
66
Instrument,Name of measurement instrument (character)
77
Instrument_ID,Identifier of instrument within plot (character)
88
Sensor_ID,"Identifier of individual sensor, tree, etc. being measured (character)"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Site,Datalogger_time_zone,Comment
2+
TZTEST,Etc/GMT+6,TEST ONLY
3+
CRC,Etc/GMT+5,This is EST
4+
PTR,Etc/GMT+5,This is EST
5+
OWC,Etc/GMT+5,This is EST
6+
GWI,Etc/GMT+5,This is EST
7+
MSM,Etc/GMT+5,This is EST
8+
GCW,Etc/GMT+5,This is EST
9+
SWH,Etc/GMT+5,This is EST
10+
TMP,Etc/GMT+5,This is EST

pipeline/metadata/L1_metadata/README_v???.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ COMPASS-FME Level 1 data
22
Version: [VERSION]
33
Date: [DATESTAMP]
44
Observations: [OBSERVATIONS]
5+
Data timestamps: [TIMEZONE]
56
Git commit: [GIT_COMMIT]
67

78
DESCRIPTION
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
THIS IS A DUMMY FILE FOR THE "TZTEST" SITE USED IN TIME ZONE TESTING.
2+

0 commit comments

Comments
 (0)