Skip to content

Commit bde314f

Browse files
committed
Implement weekday sensitive noise
1 parent 4a02fc6 commit bde314f

File tree

2 files changed

+137
-19
lines changed

2 files changed

+137
-19
lines changed

forecast_structs.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
from scipy.interpolate import UnivariateSpline
88

99

10+
PD_WEEKDAY = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] # Weekday code labels as defined by Pandas. 0 = Mon, etc
11+
12+
1013
class CDCDataBunch:
1114

1215
def __init__(self):
@@ -33,11 +36,11 @@ class ForecastExecutionData:
3336
Data for the handling of the forecast pipeline for a single state.
3437
Handled inside the function forecast_state().
3538
"""
36-
3739
def __init__(self):
3840
# Input parameters and data
3941
self.state_name = None
4042
self.state_series: pd.Series = None
43+
self.preproc_series: pd.Seires = None
4144
self.nweeks_fore = None
4245
self.except_params: dict = None
4346
self.tg_params: dict = None
@@ -85,12 +88,12 @@ def __init__(self):
8588

8689
# Synthesis
8790
self.synth_name = None
88-
self.rt_fore2d: np.ndarray = None
91+
self.rt_fore2d: np.ndarray = None # Synthesized R(t) ensemble. a[i_sample, i_day]
8992
self.num_ct_samples = None
9093

9194
# Reconstruction
92-
self.ct_fore2d: np.ndarray = None
93-
self.ct_fore2d_weekly: np.ndarray = None
95+
self.ct_fore2d: np.ndarray = None # Synthesized cases time series ensemble. a[i_sample, i_day]
96+
self.ct_fore2d_weekly: np.ndarray = None # a[i_sample, i_week]
9497
# self.weekly_quantiles: np.ndarray = None
9598

9699

@@ -111,7 +114,7 @@ def __init__(self):
111114
# Preprocesing results
112115
self.t_daily: np.ndarray = None
113116
self.past_daily_tlabels: pd.DatetimeIndex = None # Daily time stamps for the ROI
114-
# self.fore_daily_tlabels: pd.DatetimeIndex = None # Daily time stamps for the forecast region
117+
self.fore_daily_tlabels: pd.DatetimeIndex = None # Daily time stamps for the forecast region
115118
self.ct_past: np.ndarray = None # Preprocessed daily data that goes down the forecast pipeline
116119
self.float_data_daily: np.ndarray = None # Float data after denoising process
117120
self.noise_obj: AbstractNoise = None
@@ -122,11 +125,11 @@ def __init__(self):
122125

123126
# Synthesis
124127
self.synth_name = None
125-
self.rt_fore2d: np.ndarray = None
128+
self.rt_fore2d: np.ndarray = None # Synthesized R(t) ensemble. a[i_sample, i_day]
126129
self.num_ct_samples = None
127130

128131
# Reconstruction
129-
self.ct_fore2d: np.ndarray = None
132+
self.ct_fore2d: np.ndarray = None # a[i_sample, i_day]
130133
# self.ct_fore2d_weekly: np.ndarray = None
131134
# self.weekly_quantiles: np.ndarray = None
132135

@@ -200,6 +203,7 @@ def __init__(self, **kwargs):
200203
def fit(self, data: pd.Series, denoised: pd.Series):
201204
"""Abstract method. Extracts parameters from the (noisy) data and denoised series. Feeds inner variables."""
202205

203-
def generate(self, new_denoised: np.ndarray):
204-
"""Abstract method. Must return a new data with calibrated noise incorporated."""
206+
def generate(self, new_denoised: np.ndarray, time_labels: pd.Index):
207+
"""Abstract method. Must return a new data with calibrated noise incorporated.
208+
"""
205209
raise NotImplementedError

preprocessing.py

+124-10
Original file line numberDiff line numberDiff line change
@@ -23,27 +23,102 @@ def apply_lowpass_filter(signal_array, cutoff=0.4, order=2):
2323
return filtfilt(butter_b, butter_a, signal_array, method="gust")
2424

2525

26+
def calc_weekday_noise_ensembles(data: pd.Series, denoised: pd.Series, relative=True):
27+
"""
28+
Produce an ensemble of noise values grouped by weekday.
29+
Output signature: a[i_weekday, i_week]
30+
"""
31+
# Group by weekdays
32+
data_gp = data.groupby(lambda d: d.weekday())
33+
deno_gp = denoised.groupby(lambda d: d.weekday())
34+
35+
# Create weekdau vs week 2D arrays (weekays) containing data grouped per weekday. Signature: a[i_weekday, i_week]
36+
data_weekay = np.array([g[1].values for g in data_gp]) # WARNING: MISSING DATA or INCOMPLETE WEEK WILL RAISE ERROR
37+
deno_weekay = np.array([g[1].values for g in deno_gp])
38+
39+
# For regular data, a 2D array should work well.
40+
result = data_weekay - deno_weekay
41+
if relative:
42+
result /= deno_weekay # May produce NAN for zero points
43+
44+
# #If data is irregular (not same amount of samples in each weekday), this should work. Results is a list, not array
45+
# result = [data - deno for data, deno in zip(data_weekay, deno_weekay)]
46+
# if relative:
47+
# result = [dif / deno for dif, deno in zip(result, deno_weekay)]
48+
49+
return result
50+
51+
52+
def generate_weekday_noise(noise_call, weekday_params: np.ndarray, time_labels: pd.DatetimeIndex, num_samples):
53+
"""
54+
Produce weekday sensitive noise for a generic distribution (given by noise_call callable).
55+
Weekday_params is either a 1D or 2D array with parameters of the distribution:
56+
* If it's 1D, noise_call must accept only one argument, and weekday_params must have size 7 (one for each weekday).
57+
* If 2D, then expected signature is a[i_weekday, i_param], where the number of parameters of noise_call is given by
58+
the second dimension size of weekday_params.
59+
60+
The signature of noise_call must be:
61+
noise_call(*params, size=num_samples)
62+
63+
Where the size of params is the same as the number of columns in weekday_params.
64+
65+
Output signature: noise[i_sample, i_t]
66+
"""
67+
weeklen = 7
68+
69+
# Weekday params check
70+
w_shape = weekday_params.shape
71+
if w_shape[0] != weeklen:
72+
raise ValueError(f"Hey, weekday_params must have size {weeklen} but has shape {w_shape}.")
73+
74+
if len(w_shape) == 1: # Convert 1D array
75+
weekday_params = np.array(weekday_params).T # Make it a column matrix
76+
77+
# -----------
78+
noise = np.ones(shape=(num_samples, time_labels.shape[0])) # a[i_sample, i_t]
79+
80+
for i_date, date in enumerate(time_labels):
81+
wkday = date.weekday()
82+
noise[:, i_date] = noise_call(*weekday_params[wkday], size=num_samples)
83+
84+
return noise
85+
86+
87+
def choose_roi_for_denoise(exd: ForecastExecutionData):
88+
if exd.preproc_params.get("use_pre_roi", "False"):
89+
return exd.preproc_series
90+
else:
91+
return exd.state_series
92+
93+
2694
# ----------------------------------------------------------------------------------------------------------------------
2795
# DENOISING METHODS (COVID-HOSP FOCUSED) (remember to update DENOISE CALLABLE dict)
2896
# ----------------------------------------------------------------------------------------------------------------------
2997
# noinspection PyUnusedLocal
3098
def denoise_lowpass(exd: ForecastExecutionData, fc: CovHospForecastOutput, filt_order=2, cutoff=0.4, **kwargs):
31-
return pd.Series(apply_lowpass_filter(exd.state_series.values, cutoff, filt_order),
32-
index=exd.state_series.index)
99+
roi = choose_roi_for_denoise(exd)
100+
return pd.Series(apply_lowpass_filter(roi.values, cutoff, filt_order),
101+
index=roi.index)
33102

34103

35104
# noinspection PyUnusedLocal
36105
def denoise_polyfit(exd: ForecastExecutionData, fc: CovHospForecastOutput, poly_degree=3, **kwargs):
37-
poly_coef, poly_resid = np.polyfit(fc.t_daily, exd.state_series.values, deg=poly_degree, full=True)[0:2]
106+
# Selects proper roi and its integer index
107+
roi = choose_roi_for_denoise(exd)
108+
t_daily = np.arange(roi.shape[0])
109+
110+
poly_coef, poly_resid = np.polyfit(t_daily, roi.values, deg=poly_degree, full=True)[0:2]
38111
poly_f = np.poly1d(poly_coef) # Polynomial callable class.
39-
return pd.Series(poly_f(fc.t_daily), index=exd.state_series.index)
112+
113+
return pd.Series(poly_f(t_daily), index=roi.index)
40114

41115

42116
# noinspection PyUnusedLocal
43117
def denoise_rolling_average(exd: ForecastExecutionData, fc: CovHospForecastOutput, rollav_window=4, **kwargs):
44-
denoised = exd.state_series.rolling(rollav_window).mean() # Rolling average
45-
denoised[:rollav_window-1] = exd.state_series[:rollav_window-1] # Fill NAN values with original ones
46-
# fc.denoised_weekly[:] *= exd.state_series[-1] / data_weekly[-1] if data_weekly[-1] else 1
118+
roi = choose_roi_for_denoise(exd)
119+
denoised = roi.rolling(rollav_window).mean() # Rolling average
120+
denoised[:rollav_window-1] = roi[:rollav_window-1] # Fill NAN values with original ones
121+
# fc.denoised_weekly[:] *= roi[-1] / data_weekly[-1] if data_weekly[-1] else 1
47122
# # ^ Rescale to match last day
48123

49124
return denoised
@@ -77,26 +152,65 @@ def __init__(self, **kwargs):
77152
self.mean = None
78153
self.std = None
79154
self.coef = kwargs.get("noise_coef", 1.0) # Multiply noise by this coefficient
155+
self.seed = kwargs.get("noise_seed", None) # Seed
156+
self._rng = np.random.default_rng(self.seed)
80157

81158
def fit(self, data: pd.Series, denoised: pd.Series):
82159
reldev = calc_relative_dev_sample(data, denoised)
83160
self.mean = reldev.mean()
84161
# self.std = reldev.std() / 2. # -()- Use appropriate standard deviation
85162
self.std = reldev.std() # Use doubled standard deviation
86163

87-
def generate(self, new_denoised: np.ndarray):
88-
noise = np.maximum(self.coef * np.random.normal(self.mean, self.std, size=new_denoised.shape), -1.) # Clamped above -1
164+
def generate(self, new_denoised: np.ndarray, time_labels):
165+
"""new_denoised: a[i_sample, i_t]"""
166+
noise = np.maximum(self.coef * self._rng.normal(self.mean, self.std, size=new_denoised.shape), -1.) # Clamped above -1
167+
return new_denoised * (1. + noise)
168+
169+
170+
class NormalWeekdayNoise(AbstractNoise):
171+
"""Weekday-sensitive Gaussian noise."""
172+
def __init__(self, **kwargs):
173+
super().__init__(**kwargs)
174+
self.mean_array = None
175+
self.std_array = None
176+
self.coef = kwargs.get("noise_coef", 1.0) # Multiply noise by this coefficient
177+
self.seed = kwargs.get("noise_seed", None) # Seed
178+
self._rng = np.random.default_rng(self.seed)
179+
180+
def fit(self, data: pd.Series, denoised: pd.Series):
181+
"""Calculate an array of means and standard deviations, one for each weekday."""
182+
dif_weekay = calc_weekday_noise_ensembles(data, denoised, relative=True) # Sample of differences
183+
184+
# One set of parameters for each day of the week, sorted by pd_weekday
185+
self.mean_array = np.fromiter((x.mean() for x in dif_weekay), dtype=float)
186+
self.std_array = np.fromiter((x.std() for x in dif_weekay), dtype=float)
187+
188+
def _noise_call(self, mean, std, size):
189+
"""Callable to generate the normal noise."""
190+
return self._rng.normal(mean, std, size=size)
191+
192+
def generate(self, new_denoised: np.ndarray, time_labels):
193+
"""new_denoised: a[i_sample, i_t]"""
194+
params = np.array([self.mean_array, self.std_array]).T
195+
num_samples = new_denoised.shape[0]
196+
noise = generate_weekday_noise(self._noise_call, params, time_labels, num_samples)
197+
noise = np.maximum(self.coef * noise, -1.) # Clamped above -1. Applies coefficient
198+
89199
return new_denoised * (1. + noise)
90200

201+
# def generate(self, new_denoised, time_labels):
202+
# return new_denoised
203+
91204

92205
class NoneNoise(AbstractNoise):
93206
"""Produces zero noise."""
94-
def generate(self, new_denoised):
207+
def generate(self, new_denoised, time_labels):
95208
return new_denoised
96209

97210

98211
NOISE_CLASS = {
99212
"normal": NormalMultNoise,
213+
"weekday_normal": NormalWeekdayNoise,
100214
"none": NoneNoise,
101215
}
102216

0 commit comments

Comments
 (0)