Skip to content
85 changes: 29 additions & 56 deletions docs/Multi-year_on_year_example.ipynb

Large diffs are not rendered by default.

30 changes: 11 additions & 19 deletions docs/sphinx/source/changelog/v3.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,33 @@ v3.2.0 (X, X, 2026)

Enhancements
------------
* :py:func:`~rdtools.degradation.degradation_year_on_year` has new parameter ``label=``
to return the calc_info['YoY_values'] as either right labeled (default), left or
center labeled. (:issue:`459`)
* :py:func:`~rdtools.plotting.degradation_timeseries_plot` now defaults to rolling
median, centered on the timestamp (pd.rolling(center=True)), and reduces
``min_periods`` from ``rolling_days//2`` to ``rolling_days//4``.
(:issue:`455`)
* :py:func:`~rdtools.degradation.degradation_year_on_year` has new parameter ``multi_yoy``
(default False) to trigger multiple YoY degradation calculations similar to Hugo Quest et
al 2023. In this mode, instead of a series of 1-year duration slopes, 2-year, 3-year etc
slopes are also included. calc_info['YoY_values'] returns a non-monotonic index
in this mode due to multiple overlapping annual slopes. (:issue:`394`)
* :py:func:`~rdtools.degradation.degradation_year_on_year` now returns
``calc_info['YoY_times']`` DataFrame with ``dt_right``, ``dt_center``, and ``dt_left``
columns for each YoY slope. (:issue:`459`)
* :py:func:`~rdtools.plotting.degradation_timeseries_plot` now supports ``multi_yoy=True``
data by resampling overlapping YoY values to their mean. A warning is issued when this
resampling occurs. (:issue:`394`)
data. To avoid over-filtering, only slopes on the order of 2 years (731 days) or
shorter are included in the center-labeled median. (:issue:`394`) (:pull:`498`)
* :py:func:`~rdtools.plotting.degradation_timeseries_plot` refactored to do a rolling
median, for all slopes whose center timestamp is inside the window.
``min_periods`` reduced to ``rolling_days//4``.
(:issue:`455`) (:pull:`498`)
* :py:func:`~rdtools.plotting.degradation_summary_plots` ``detailed=True`` mode now
properly handles points used odd vs even number of times (not just 0, 1, 2).
(:issue:`394`)
* :py:func:`~rdtools.degradation.degradation_year_on_year` now returns
``calc_info['YoY_times']`` DataFrame with ``dt_right``, ``dt_center``, and ``dt_left``
columns for each YoY slope. (:issue:`459`)
* Added new example notebook ``docs/Multi-year_on_year_example.ipynb`` demonstrating the
``label='center'`` and ``multi_yoy=True`` features of
``multi_yoy=True`` features of
:py:func:`~rdtools.degradation.degradation_year_on_year`. (:issue:`394`)
* :py:meth:`~rdtools.analysis_chains.TrendAnalysis.sensor_analysis` and
:py:meth:`~rdtools.analysis_chains.TrendAnalysis.clearsky_analysis` now
explicitly default ``yoy_kwargs={"label": "right"}``.

Bug Fixes
---------
* Fixed ``usage_of_points`` calculation in :py:func:`~rdtools.degradation.degradation_year_on_year`
to properly handle ``multi_yoy=True`` mode with overlapping slopes. (:issue:`394`)


Maintenance
-----------
* Added ``_avg_timestamp_old_Pandas`` helper function for pandas <2.0 compatibility
Expand All @@ -63,8 +56,7 @@ Testing
decreasing).
* Added test for ``multi_yoy=True`` parameter in ``degradation_year_on_year``.
* Added tests for :py:func:`~rdtools.plotting.degradation_timeseries_plot`
covering ``label='center'``, ``label='left'``, multi-YoY duplicate index
handling, and ``KeyError`` path.
covering multi-YoY duplicate index handling, and ``KeyError`` path.
* Set matplotlib backend to ``Agg`` in test ``conftest.py`` to avoid tkinter issues.


Expand Down
6 changes: 2 additions & 4 deletions rdtools/analysis_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ def _clearsky_preprocess(self):
)

def sensor_analysis(
self, analyses=["yoy_degradation"], yoy_kwargs={"label": "right"}, srr_kwargs={}
self, analyses=["yoy_degradation"], yoy_kwargs={}, srr_kwargs={}
):
"""
Perform entire sensor-based analysis workflow.
Expand All @@ -1014,7 +1014,6 @@ def sensor_analysis(
and 'srr_soiling'
yoy_kwargs : dict
kwargs to pass to :py:func:`rdtools.degradation.degradation_year_on_year`
default is {"label": "right"}, which will right-label the YoY slope values.
srr_kwargs : dict
kwargs to pass to :py:func:`rdtools.soiling.soiling_srr`

Expand Down Expand Up @@ -1042,7 +1041,7 @@ def sensor_analysis(
self.results["sensor"] = sensor_results

def clearsky_analysis(
self, analyses=["yoy_degradation"], yoy_kwargs={"label": "right"}, srr_kwargs={}
self, analyses=["yoy_degradation"], yoy_kwargs={}, srr_kwargs={}
):
"""
Perform entire clear-sky-based analysis workflow. Results are stored
Expand All @@ -1055,7 +1054,6 @@ def clearsky_analysis(
and 'srr_soiling'
yoy_kwargs : dict
kwargs to pass to :py:func:`rdtools.degradation.degradation_year_on_year`.
default is {"label": "right"}, which will right-label the YoY slope values.
srr_kwargs : dict
kwargs to pass to :py:func:`rdtools.soiling.soiling_srr`

Expand Down
17 changes: 4 additions & 13 deletions rdtools/degradation.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ def degradation_year_on_year(energy_normalized, recenter=True,
If `uncertainty_method` is 'circular_block', `block_length`
determines the length of the blocks used in the circular block bootstrapping
in number of days. Must be shorter than a third of the time series.
label : {'right', 'center', 'left'}, default 'right'
Which Year-on-Year slope edge to label.
multi_yoy : bool, default False
Whether to return the standard Year-on-Year slopes where each slope
is calculated over points separated by 365 days (default) or
Expand All @@ -226,8 +224,8 @@ def degradation_year_on_year(energy_normalized, recenter=True,
degradation rate estimate
calc_info : dict

* `YoY_values` - pandas series of year on year slopes, either right
left or center labeled, depending on the `label` parameter.
* `YoY_values` - pandas series of year on year slopes, right
labeled.
* `renormalizing_factor` - float of value used to recenter data
* `exceedance_level` - the degradation rate that was outperformed with
probability of `exceedance_prob`
Expand All @@ -242,10 +240,6 @@ def degradation_year_on_year(energy_normalized, recenter=True,
energy_normalized.name = 'energy'
energy_normalized.index.name = 'dt'

if label not in {"right", "left", "center"}:
raise ValueError(f"Unsupported value {label} for `label`."
" Must be 'right', 'left' or 'center'.")

# Detect less than 2 years of data. This is complicated by two things:
# - leap days muddle the precise meaning of "two years of data".
# - can't just check the number of days between the first and last
Expand Down Expand Up @@ -332,11 +326,8 @@ def degradation_year_on_year(energy_normalized, recenter=True,
YoY_times = YoY_times[['dt', 'dt_center', 'dt_left']]
YoY_times = YoY_times.rename(columns={'dt': 'dt_right'})

YoY_times.set_index(YoY_times[f'dt_{label}'], inplace=True)
YoY_times.index.name = 'dt'

# now apply either right, left, or center label index to the yoy_result
yoy_result.index = YoY_times[f'dt_{label}']
# now apply right label index to the yoy_result
yoy_result.index = YoY_times.index
yoy_result.index.name = 'dt'

# the following is throwing a futurewarning if infer_objects() isn't included here.
Expand Down
88 changes: 63 additions & 25 deletions rdtools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
yoy_info : dict
a dictionary with keys:
* YoY_values - pandas series of year on year slopes
* YoY_times - pandas series of corresponding timestamps
rolling_days: int, default 365
Number of days for rolling window. Note that
the window must contain at least 25% of datapoints to be included in
Expand All @@ -465,6 +466,9 @@ def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
It should be noted that ``yoy_info`` is an output
from :py:func:`rdtools.degradation.degradation_year_on_year`.

Also, if a multi-YoY analysis is passed in, only slopes of length <=
731 days are considered in this time-series plot to avoid over-smoothing

Returns
-------
matplotlib.figure.Figure
Expand All @@ -477,49 +481,83 @@ def _bootstrap(x, percentile, reps):
mb1 = np.nanmedian(xb1, axis=0)
return np.percentile(mb1, percentile)

def _roll_median(df, win_right, rolling_days, min_periods):
"""
rolling median
df includes following columns: dt_center, yoy
win_right: Datetime of the right end of the rolling window
rolling_days: number of days in the rolling window
min_periods: minimum number of points in the rolling window to return a value

returns: median of yoy values if the center of the slope is in the rolling window,
or NaN if there are fewer than min_periods points. Time index of the returned
value is centered on the rolling window.
"""
win_left = win_right - pd.Timedelta(days=rolling_days)
in_window = (df['dt_center'] <= win_right) & (df['dt_center'] >= win_left)
if in_window.sum() < min_periods:
return np.nan
else:
return df.loc[in_window, 'yoy'].median()

try:
results_values = yoy_info['YoY_values']
results = yoy_info['YoY_times'].join(yoy_info['YoY_values'].rename('yoy'))
except KeyError:
raise KeyError("yoy_info input dictionary does not contain key `YoY_values`.")
raise KeyError("yoy_info input dictionary does not contain keys"
" `YoY_times` and `YoY_values`.")

if plot_color is None:
plot_color = 'tab:orange'
if ci_color is None:
ci_color = 'C0'

results_values = results_values.sort_index()
if results_values.index.has_duplicates:
# this occurs with degradation_year_on_year(multi_yoy=True). resample to daily mean
warnings.warn(
"Input `yoy_info['YoY_values']` appears to have multiple annual "
"slopes per day, which is the case if "
"degradation_year_on_year(multi_yoy=True). "
"Proceeding to plot with a daily mean which will average out the "
"time-series trend. Recommend re-running with "
"degradation_year_on_year(multi_yoy=False)."
)
roller = results_values.resample('D').mean().rolling(f'{rolling_days}d',
min_periods=rolling_days//4,
center=True)
else:
roller = results_values.rolling(f'{rolling_days}d', min_periods=rolling_days//4,
center=True)
# unfortunately it seems that you can't return multiple values in the rolling.apply() kernel.
# TODO: figure out some workaround to return both percentiles in a single pass
# filter to only 2 years + 1 day length slopes to avoid over-smoothing in the multi-yoy case
results = results[(results['dt_right'] - results['dt_left']) <= pd.Timedelta(days=365 * 2 + 1)]

# loop through results in a daily timeindex from min(dt_left) to max(dt_right)
# Apply rolling median and bootstrap confidence intervals

timeindex = pd.date_range(start=results['dt_left'].min(),
end=results['dt_right'].max(), freq='D')
results_median = pd.Series(index=timeindex, dtype=float)
for win_center in timeindex:
win_right = win_center + pd.Timedelta(days=rolling_days/2)
results_median.loc[win_center] = _roll_median(df=results,
win_right=win_right,
rolling_days=rolling_days,
min_periods=rolling_days//4)

# calculate confidence intervals for each point in the rolling median.
if include_ci:
ci_lower = roller.apply(_bootstrap, kwargs={'percentile': 2.5, 'reps': 100}, raw=True)
ci_upper = roller.apply(_bootstrap, kwargs={'percentile': 97.5, 'reps': 100}, raw=True)
# downsample the timeindex to every 2 days to speed up the bootstrap calculation,
# since it is very slow.
timeindex = timeindex[::2]
ci_lower = pd.Series(index=timeindex, dtype=float)
ci_upper = pd.Series(index=timeindex, dtype=float)
for win_center in timeindex:
win_right = win_center + pd.Timedelta(days=rolling_days/2)
win_left = win_center - pd.Timedelta(days=rolling_days/2)
in_window = (results['dt_center'] <= win_right) & (results['dt_center'] >= win_left)
if in_window.sum() < rolling_days//4:
ci_lower.loc[win_center] = np.nan
ci_upper.loc[win_center] = np.nan
else:
ci_lower.loc[win_center] = _bootstrap(results.loc[in_window, 'yoy'],
percentile=2.5, reps=50)
ci_upper.loc[win_center] = _bootstrap(results.loc[in_window, 'yoy'],
percentile=97.5, reps=50)

if fig is None:
fig, ax = plt.subplots()
else:
ax = fig.axes[0]
if include_ci:
ax.fill_between(ci_lower.index,
ci_lower, ci_upper, color=ci_color)
median = roller.median()
median = results_median.sort_index()
ax.plot(median.index,
median, color=plot_color, **kwargs)
ax.axhline(results_values.median(), c='k', ls='--')
ax.axhline(results_median.median(), c='k', ls='--')
plt.ylabel('Degradation trend (%/yr)')
fig.autofmt_xdate()

Expand Down
29 changes: 0 additions & 29 deletions rdtools/test/degradation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,35 +202,6 @@ def test_usage_of_points(self):
self.test_corr_energy[input_freq])
self.assertTrue((np.sum(rd_result[2]['usage_of_points'])) == 1462)

def test_degradation_year_on_year_label_center(self):
''' Test degradation_year_on_year with label="center". '''

funcName = sys._getframe().f_code.co_name
logging.debug('Running {}'.format(funcName))

# test YOY degradation calc with label='center'
input_freq = 'D'
rd_result = degradation_year_on_year(
self.test_corr_energy[input_freq], label='center')
self.assertAlmostEqual(rd_result[0], 100 * self.rd, places=1)
rd_result1 = degradation_year_on_year(
self.test_corr_energy[input_freq])
rd_result2 = degradation_year_on_year(
self.test_corr_energy[input_freq], label='right')
pd.testing.assert_index_equal(rd_result1[2]['YoY_values'].index,
rd_result2[2]['YoY_values'].index)
# 365/2 days difference between center and right label
assert (rd_result2[2]['YoY_values'].index -
rd_result[2]['YoY_values'].index).mean().days == \
pytest.approx(183, abs=1)

with pytest.raises(ValueError):
degradation_year_on_year(self.test_corr_energy[input_freq],
label='LEFT')
with pytest.raises(ValueError):
degradation_year_on_year(self.test_corr_energy[input_freq],
label=None)

def test_avg_timestamp_old_Pandas(self):
"""Test the _avg_timestamp_old_Pandas function for correct averaging."""
from rdtools.degradation import _avg_timestamp_old_Pandas
Expand Down
66 changes: 5 additions & 61 deletions rdtools/test/plotting_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import matplotlib
import plotly
import pytest
import warnings
import re
import copy

Expand Down Expand Up @@ -59,20 +58,6 @@ def degradation_info(degradation_power_signal):
return degradation_power_signal, rd, rd_ci, calc_info


@pytest.fixture()
def degradation_info_center(degradation_power_signal):
# center-labeled YOY output for time-series degradation plot
rd, rd_ci, calc_info = degradation_year_on_year(degradation_power_signal, label='center')
return degradation_power_signal, rd, rd_ci, calc_info


@pytest.fixture()
def degradation_info_left(degradation_power_signal):
# left-labeled YOY output for time-series degradation plot
rd, rd_ci, calc_info = degradation_year_on_year(degradation_power_signal, label='left')
return degradation_power_signal, rd, rd_ci, calc_info


def test_degradation_summary_plots(degradation_info):
power, yoy_rd, yoy_ci, yoy_info = degradation_info

Expand Down Expand Up @@ -265,49 +250,13 @@ def test_availability_summary_plots_empty(availability_analysis_object):
plt.close('all')


def test_degradation_timeseries_plot(degradation_info, degradation_info_center,
degradation_info_left):
def test_degradation_timeseries_plot(degradation_info):
power, yoy_rd, yoy_ci, yoy_info = degradation_info

# test defaults (label='right')
# test defaults
result_right = degradation_timeseries_plot(yoy_info)
assert_isinstance(result_right, plt.Figure)
xlim_right = result_right.get_axes()[0].get_xlim()[0]

# test label='center'
result_center = degradation_timeseries_plot(yoy_info=degradation_info_center[3],
include_ci=False)
assert_isinstance(result_center, plt.Figure)
xlim_center = result_center.get_axes()[0].get_xlim()[0]

# test label='left'
result_left = degradation_timeseries_plot(yoy_info=degradation_info_left[3],
include_ci=False)
assert_isinstance(result_left, plt.Figure)
xlim_left = result_left.get_axes()[0].get_xlim()[0]

# test default label matches label='right'
result_default = degradation_timeseries_plot(yoy_info=yoy_info, include_ci=False)
xlim_default = result_default.get_axes()[0].get_xlim()[0]
assert xlim_default == xlim_right

# Check that the xlim values are offset as expected
# right > center > left (since offset_days increases)
assert xlim_right > xlim_center > xlim_left

# The expected difference from right to left is 365 days (1 yrs), allow 5% tolerance
expected_diff = 365
actual_diff = (xlim_right - xlim_left)
tolerance = expected_diff * 0.05
assert abs(actual_diff - expected_diff) <= tolerance, \
f"difference of right-left xlim {actual_diff} not within 5% of 1 yr."

# The expected difference from right to center is 182 days, allow 5% tolerance
expected_diff2 = 182
actual_diff2 = (xlim_right - xlim_center)
tolerance2 = expected_diff2 * 0.05
assert abs(actual_diff2 - expected_diff2) <= tolerance2, \
f"difference of right-center xlim {actual_diff2} not within 5% of 1/2 year."
result_right.get_axes()[0].get_xlim()[0]

with pytest.raises(KeyError):
degradation_timeseries_plot({'a': 1}, include_ci=False)
Expand All @@ -318,12 +267,7 @@ def test_degradation_timeseries_plot(degradation_info, degradation_info_center,
new_val = yoy_multi['YoY_values'].iloc[100]
yoy_values_multi = pd.concat([yoy_multi['YoY_values'], pd.Series([new_val], index=[new_idx])])
yoy_multi['YoY_values'] = yoy_values_multi
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
result = degradation_timeseries_plot(yoy_info=yoy_multi, include_ci=False)
assert_isinstance(result, plt.Figure)
assert len(w) > 0, "Expected at least one warning to be raised"
assert any(issubclass(warn.category, UserWarning) for warn in w), \
"Expected a UserWarning to be raised for multi-YoY values"
result = degradation_timeseries_plot(yoy_info=yoy_multi, include_ci=False)
assert_isinstance(result, plt.Figure)

plt.close('all')
Loading