-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathagg_mean.Rd
301 lines (254 loc) · 12.2 KB
/
agg_mean.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Summarizing.R
\name{agg_mean}
\alias{agg_mean}
\alias{agg_fun}
\alias{agg_sum}
\alias{agg_fsd}
\alias{agg_DT_SD}
\title{Time series summarization}
\usage{
agg_mean(x, format, breaks = NULL, interval = NULL, tz = "GMT", ...)
agg_fun(x, format, fun, breaks = NULL, interval = NULL, tz = "GMT", ...)
agg_sum(
x,
format,
agg_per = NULL,
breaks = NULL,
interval = NULL,
NEE_scor = TRUE,
GPP_scor = FALSE,
quant = grep("^PAR|^PPFD|^APAR", names(x), value = TRUE),
power = grep("^GR|^Rg|^SW|^SR|^LW|^LR|^Rn|^NETRAD|^G$|^H|^LE", names(x), value = TRUE),
carbon = grep("^NEE|^GPP|^Reco", names(x), value = TRUE),
ET = grep("^ET", names(x), value = TRUE),
tz = "GMT",
...
)
agg_fsd(
x,
format,
agg_per = NULL,
breaks = NULL,
interval = NULL,
quant = grep("^PAR|^PPFD|^APAR", names(x), value = TRUE),
power = grep("^GR|^Rg|^SW|^SR|^LW|^LR|^Rn|^NETRAD|^G$|^H|^LE", names(x), value = TRUE),
carbon = grep("^NEE", names(x), value = TRUE),
ET = grep("^ET", names(x), value = TRUE),
tz = "GMT"
)
agg_DT_SD(
x,
format,
agg_per = NULL,
breaks = NULL,
interval = NULL,
carbon = grep("^Reco|^GPP", names(x), value = TRUE),
tz = "GMT"
)
}
\arguments{
\item{x}{A data frame with required timestamp column (\code{x$timestamp}) of
class \code{"POSIXt"}.}
\item{format}{A character string specifying \code{x$timestamp} formatting for
aggregation through internal \code{\link{strftime}} function.}
\item{breaks}{A vector of cut points or number giving the number of intervals
which \code{x$timestamp} is to be cut into or an interval specification,
one of \code{"sec"}, \code{"min"}, \code{"hour"}, \code{"day"},
\code{"DSTday"}, \code{"week"}, \code{"month"}, \code{"quarter"} or
\code{"year"}, optionally preceded by an integer and a space, or followed
by \code{"s"}.}
\item{interval}{A numeric value specifying the time interval (in seconds) of
the generated date-time sequence. If \code{NULL}, \code{interval}
autodetection is attempted.}
\item{tz}{A character string specifying the time zone to be used for the
conversion. System-specific (see \code{\link{as.POSIXlt}} or
\code{\link{timezones}}), but \code{""} is the current time zone, and
\code{"GMT"} is UTC. Invalid values are most commonly treated as UTC, on
some platforms with a warning.}
\item{...}{Further arguments to be passed to the internal
\code{\link{aggregate}} function.}
\item{fun}{Either a function or a non-empty character string naming the
function to be called.}
\item{agg_per}{A character string providing the time interval of aggregation
that will be appended to units (e.g. \code{"hh-1"}, \code{"week-1"} or
\code{"month-1"}).}
\item{NEE_scor, GPP_scor}{A logical value. Should sign correction of NEE (GPP)
be performed? See Sign Correction in Details.}
\item{quant}{A character vector listing variable names that require
conversion from quantum to energy units before aggregation.}
\item{power}{A character vector listing variable names that require
conversion from power to energy units before aggregation.}
\item{carbon}{A character vector listing variable names that require
conversion from CO2 concentration to C mass flux units before aggregation.}
\item{ET}{A character vector listing variable names that require conversion
from hourly interval to actual measurement interval before aggregation.
Designed for evapotranspiration (ET) typically reported in mm hour-1 for
half-hourly measurements.}
}
\value{
\code{agg_mean}, \code{agg_fun} and \code{agg_sum} produce a data
frame with attributes varnames and units assigned to each respective
column.
\code{agg_fsd} and \code{agg_DT_SD} produce a list with two data frames
\code{mean} and \code{sum} with attributes varnames and units assigned to
each respective column or \code{NULL} value if required columns are not
recognized.
Each produced data frame has first column called "Intervals" with vector of
labels describing aggregation period provided as factor, and second column
"days" providing fraction (or multiple) of days aggregated within each
period.
}
\description{
Utilities that simplify aggregation of data and their uncertainties over
defined time intervals.
}
\details{
\code{agg_mean} and \code{agg_sum} compute mean and sum over intervals
defined by \code{format} and/or \code{breaks} for all columns.
\code{agg_fun} allows to apply any function over defined time intervals
(e.g. min, max, median). No unit conversions are attempted. Notice that
\code{agg_mean(x, format)} and \code{agg_fun(x, format, mean)} are
identical.
\code{agg_fsd} and \code{agg_DT_SD} estimate aggregated mean and summed
uncertainties over defined time periods for \code{REddyProc} package
gap-filling and daytime-based flux partitioning outputs, respectively. The
uncertainty aggregation accounts for autocorrelation among records. It is
performed only for autodetected columns with appropriate suffixes (see
further). Note that uncertainty products of \code{agg_fsd} and
\code{agg_DT_SD} are reported as standard deviations (\code{SD}) and require
further correction to represent uncertainty bounds for given confidence
interval (e.g. \code{SD * 1.96} for 95\% confidence level).
The summarizations are done on a data frame \code{x} with required timestamp
column (\code{x$timestamp}) of class \code{"POSIXt"}. With exception of
\code{agg_mean}, the timestamp must form regular sequence without \code{NA}s
due to time resolution estimation.
Change of aggregation interval can be achieved through \code{breaks} and
\code{format} arguments.
The data frame \code{x} can be \link[=cut.POSIXt]{cut} to custom intervals
using argument \code{breaks}. Note that labels are constructed from the
left-hand end of the intervals and converted to \code{"POSIXct"} class. This
can be useful when aggregating e.g. half-hourly data over hourly
(\code{breaks = "60 mins"}) or three-day (\code{breaks = "3 days"})
intervals.
The formatting of the timestamp (original or after cutting) using
\code{format} is another (preferable) way to change aggregation intervals.
For example changing original \code{"POSIXt"} time format (\code{"\%Y-\%m-\%d
\%H:\%M:\%S"}) to \code{"\%Y-\%m-\%d"}, \code{"\%W_\%y"}, \code{"\%m-\%y"} or
\code{"\%Y"} will result in daily, weekly, monthly or yearly aggregation
intervals, respectively. Note that improper \code{format} can repress
expected effect of \code{breaks}.
\code{agg_fsd} and \code{agg_DT_SD} require certain columns with defined
suffixes in order to evaluate uncertainty correctly. These columns are a
product of \code{REddyProc} package gap-filling and flux partitioning methods
and are documented here:
\url{https://www.bgc-jena.mpg.de/bgi/index.php/Services/REddyProcWebOutput}.
Detailed description of uncertainty aggregation is available here:
\url{https://github.com/bgctw/REddyProc/blob/master/vignettes/aggUncertainty.md}.
\code{agg_fsd} requires columns with suffixes \code{_fall}, \code{_orig},
\code{_fqc} and \code{_fsd} for each variable.
\code{agg_DT_SD} requires corresponding columns with \code{\link{regexp}}
patterns \code{"^NEE_.*_orig$"}, \code{"^NEE_.*_fqc$"}, \code{"^Reco_DT_"},
\code{"^GPP_DT_"}, \code{"^Reco_DT_.*_SD$"} and \code{"^GPP_DT_.*_SD$"}.
}
\section{Unit Conversion}{
In case of aggregation using \code{sum}, i.e.
\code{agg_sum}, \code{agg_fsd} and \code{agg_DT_SD}, appropriate unit
conversion can be applied to columns defined by \code{quant}, \code{power},
\code{carbon} and \code{ET} arguments. The conversion factor used for
approximate PAR conversion from umol m-2 s-1 to W m-2 is 4.57 as proposed
by Thimijan and Heins (1983; Tab. 3, Lightsource - Sun and sky, daylight).
}
\section{Sign Correction}{
Although the sign convention used for measured NEE
(Net Ecosystem Exchange) typically denotes negative fluxes as CO2 uptake,
summed NEE is typically reported with the opposite sign convention and is
assumed to converge to NEP (Net Ecosystem Production), especially over
longer aggregation intervals. Similarly, estimated negative GPP (Gross
Primary Production) typically denotes carbon sink but should be corrected
to positive values if summed over a time period.
There is no reliable way to guess the sign convention used in the data set.
Thus \code{agg_sum} allows to specify whether NEE (\code{NEE_scor}) and/or
GPP (\code{GPP_scor}) sign correction is required. By default
\code{NEE_scor = TRUE} and \code{GPP_scor = FALSE} considering sign
conventions used in \code{REddyProc} package. \code{agg_sum} automatically
detects all NEE and GPP columns in \code{x} using regular expressions and
applies the sign correction settings.
}
\section{References}{
Bayley, G. and Hammersley, J., 1946. The "Effective"
Number of Independent Observations in an Autocorrelated Time Series.
Supplement to the Journal of the Royal Statistical Society, 8(2), 184-197.
doi: \url{https://doi.org/10.2307/2983560}
Thimijan, R.W. and Heins R.D., 1983. Photometric, Radiometric, and Quantum
Light Units of Measure: A Review of Procedures for Interconversion.
Horticultural Science, Vol. 18(6), 818-822.
Zieba, A. and Ramza, P., 2011. Standard Deviation of the Mean of
Autocorrelated Observations Estimated with the Use of the Autocorrelation
Function Estimated From the Data. Metrology and Measurement Systems, 18(4),
529-542. doi: \url{https://doi.org/10.2478/v10178-011-0052-x}
}
\examples{
\dontrun{
library(REddyProc)
library(bigleaf)
# Load example dataset from REddyProc package and use selected variables
DETha98 <- fConvertTimeToPosix(Example_DETha98, 'YDH', Year = 'Year',
Day = 'DoY', Hour = 'Hour')[-(2:4)]
EProc <- sEddyProc$new('DE-Tha', DETha98,
c('NEE', 'LE', 'Rg', 'Tair', 'VPD', 'Ustar'))
names(DETha98)[1] <- "timestamp"
# Center timestamp to represent the middle of the averaging period
# - necessary for reliable data aggregation
DETha98$timestamp <- DETha98$timestamp - 60*15
# Aggregate by averaging
# - by default any NA value in an aggregation period produces NA
agg_mean(DETha98, "\%b-\%y")
agg_mean(DETha98, "\%b-\%y", na.rm = TRUE)
# Aggregate by summation
# - sign and unit conversions are demonstrated
(zz <- agg_sum(DETha98, "\%b-\%y", agg_per = "month-1"))
openeddy::units(zz, names = TRUE)
# Extract minimum and maximum within the intervals
# - two notations possible: a function (min) or function name ("max")
agg_fun(DETha98, "\%b-\%y", min, na.rm = TRUE)
agg_fun(DETha98, "\%b-\%y", "max", na.rm = TRUE)
# Gap-fill NEE using approximate fixed uStar threshold
EProc$sMDSGapFillAfterUstar('NEE', uStarTh = 0.3, FillAll = TRUE)
# Gap-fill all other selected variables
for (i in c('LE', 'Rg', 'Tair', 'VPD')) EProc$sMDSGapFill(i, FillAll = TRUE)
# Export results and convert latent heat (LE) to evapotranspiration (ET)
# - typical ET units are mm hour-1 independent of actual measurement interval
results <- cbind(DETha98["timestamp"], EProc$sExportResults())
LE_vars <- c("LE_orig", "LE_f", "LE_fqc", "LE_fall", "LE_fsd")
ET_vars <- gsub("LE", "ET", LE_vars)
results[, ET_vars] <-
lapply(LE_vars,
function(x) LE.to.ET(results[, x], results$Tair_f) * 3600)
openeddy::units(results[ET_vars]) <- rep("mm hour-1", length(ET_vars))
# Overwrite ET_fqc with proper values
results$ET_fqc <- results$LE_fqc
openeddy::units(results$ET_fqc) <- "-"
# Aggregate uncertainty derived from look-up table standard deviation (SD)
# - sign and unit conversions are demonstrated
(unc <- agg_fsd(results, "\%b-\%y", agg_per = "month-1"))
lapply(unc, openeddy::units, names = TRUE)
# Perform Lasslop et al. (2010) flux partitioning based on DayTime (DT) data
# - Reco and GPP uncertainty evaluation is available only for this method
# - Reichstein et al. (2005) Reco model uncertainty is not exported and
# GPP is computed as residual (not modelled)
EProc$sSetLocationInfo(LatDeg = 51.0, LongDeg = 13.6, TimeZoneHour = 1)
EProc$sGLFluxPartition(suffix = "uStar")
# Aggregate uncertainty derived from SD of Reco and GPP models
# - unit conversions are demonstrated
results <- cbind(DETha98["timestamp"], EProc$sExportResults())
(unc_DT <- agg_DT_SD(results, "\%b-\%y", agg_per = "month-1"))
lapply(unc_DT, openeddy::units, names = TRUE)
}
}
\seealso{
\code{\link{aggregate}}, \code{\link{as.POSIXlt}},
\code{\link{cut.POSIXt}}, \code{\link{mean}}, \code{\link{regexp}},
\code{\link{strftime}}, \code{\link{sum}}, \code{\link{timezones}},
\code{\link{varnames}}
}