@@ -23,27 +23,102 @@ def apply_lowpass_filter(signal_array, cutoff=0.4, order=2):
23
23
return filtfilt (butter_b , butter_a , signal_array , method = "gust" )
24
24
25
25
26
+ def calc_weekday_noise_ensembles (data : pd .Series , denoised : pd .Series , relative = True ):
27
+ """
28
+ Produce an ensemble of noise values grouped by weekday.
29
+ Output signature: a[i_weekday, i_week]
30
+ """
31
+ # Group by weekdays
32
+ data_gp = data .groupby (lambda d : d .weekday ())
33
+ deno_gp = denoised .groupby (lambda d : d .weekday ())
34
+
35
+ # Create weekdau vs week 2D arrays (weekays) containing data grouped per weekday. Signature: a[i_weekday, i_week]
36
+ data_weekay = np .array ([g [1 ].values for g in data_gp ]) # WARNING: MISSING DATA or INCOMPLETE WEEK WILL RAISE ERROR
37
+ deno_weekay = np .array ([g [1 ].values for g in deno_gp ])
38
+
39
+ # For regular data, a 2D array should work well.
40
+ result = data_weekay - deno_weekay
41
+ if relative :
42
+ result /= deno_weekay # May produce NAN for zero points
43
+
44
+ # #If data is irregular (not same amount of samples in each weekday), this should work. Results is a list, not array
45
+ # result = [data - deno for data, deno in zip(data_weekay, deno_weekay)]
46
+ # if relative:
47
+ # result = [dif / deno for dif, deno in zip(result, deno_weekay)]
48
+
49
+ return result
50
+
51
+
52
+ def generate_weekday_noise (noise_call , weekday_params : np .ndarray , time_labels : pd .DatetimeIndex , num_samples ):
53
+ """
54
+ Produce weekday sensitive noise for a generic distribution (given by noise_call callable).
55
+ Weekday_params is either a 1D or 2D array with parameters of the distribution:
56
+ * If it's 1D, noise_call must accept only one argument, and weekday_params must have size 7 (one for each weekday).
57
+ * If 2D, then expected signature is a[i_weekday, i_param], where the number of parameters of noise_call is given by
58
+ the second dimension size of weekday_params.
59
+
60
+ The signature of noise_call must be:
61
+ noise_call(*params, size=num_samples)
62
+
63
+ Where the size of params is the same as the number of columns in weekday_params.
64
+
65
+ Output signature: noise[i_sample, i_t]
66
+ """
67
+ weeklen = 7
68
+
69
+ # Weekday params check
70
+ w_shape = weekday_params .shape
71
+ if w_shape [0 ] != weeklen :
72
+ raise ValueError (f"Hey, weekday_params must have size { weeklen } but has shape { w_shape } ." )
73
+
74
+ if len (w_shape ) == 1 : # Convert 1D array
75
+ weekday_params = np .array (weekday_params ).T # Make it a column matrix
76
+
77
+ # -----------
78
+ noise = np .ones (shape = (num_samples , time_labels .shape [0 ])) # a[i_sample, i_t]
79
+
80
+ for i_date , date in enumerate (time_labels ):
81
+ wkday = date .weekday ()
82
+ noise [:, i_date ] = noise_call (* weekday_params [wkday ], size = num_samples )
83
+
84
+ return noise
85
+
86
+
87
+ def choose_roi_for_denoise (exd : ForecastExecutionData ):
88
+ if exd .preproc_params .get ("use_pre_roi" , "False" ):
89
+ return exd .preproc_series
90
+ else :
91
+ return exd .state_series
92
+
93
+
26
94
# ----------------------------------------------------------------------------------------------------------------------
27
95
# DENOISING METHODS (COVID-HOSP FOCUSED) (remember to update DENOISE CALLABLE dict)
28
96
# ----------------------------------------------------------------------------------------------------------------------
29
97
# noinspection PyUnusedLocal
30
98
def denoise_lowpass (exd : ForecastExecutionData , fc : CovHospForecastOutput , filt_order = 2 , cutoff = 0.4 , ** kwargs ):
31
- return pd .Series (apply_lowpass_filter (exd .state_series .values , cutoff , filt_order ),
32
- index = exd .state_series .index )
99
+ roi = choose_roi_for_denoise (exd )
100
+ return pd .Series (apply_lowpass_filter (roi .values , cutoff , filt_order ),
101
+ index = roi .index )
33
102
34
103
35
104
# noinspection PyUnusedLocal
36
105
def denoise_polyfit (exd : ForecastExecutionData , fc : CovHospForecastOutput , poly_degree = 3 , ** kwargs ):
37
- poly_coef , poly_resid = np .polyfit (fc .t_daily , exd .state_series .values , deg = poly_degree , full = True )[0 :2 ]
106
+ # Selects proper roi and its integer index
107
+ roi = choose_roi_for_denoise (exd )
108
+ t_daily = np .arange (roi .shape [0 ])
109
+
110
+ poly_coef , poly_resid = np .polyfit (t_daily , roi .values , deg = poly_degree , full = True )[0 :2 ]
38
111
poly_f = np .poly1d (poly_coef ) # Polynomial callable class.
39
- return pd .Series (poly_f (fc .t_daily ), index = exd .state_series .index )
112
+
113
+ return pd .Series (poly_f (t_daily ), index = roi .index )
40
114
41
115
42
116
# noinspection PyUnusedLocal
43
117
def denoise_rolling_average (exd : ForecastExecutionData , fc : CovHospForecastOutput , rollav_window = 4 , ** kwargs ):
44
- denoised = exd .state_series .rolling (rollav_window ).mean () # Rolling average
45
- denoised [:rollav_window - 1 ] = exd .state_series [:rollav_window - 1 ] # Fill NAN values with original ones
46
- # fc.denoised_weekly[:] *= exd.state_series[-1] / data_weekly[-1] if data_weekly[-1] else 1
118
+ roi = choose_roi_for_denoise (exd )
119
+ denoised = roi .rolling (rollav_window ).mean () # Rolling average
120
+ denoised [:rollav_window - 1 ] = roi [:rollav_window - 1 ] # Fill NAN values with original ones
121
+ # fc.denoised_weekly[:] *= roi[-1] / data_weekly[-1] if data_weekly[-1] else 1
47
122
# # ^ Rescale to match last day
48
123
49
124
return denoised
@@ -77,26 +152,65 @@ def __init__(self, **kwargs):
77
152
self .mean = None
78
153
self .std = None
79
154
self .coef = kwargs .get ("noise_coef" , 1.0 ) # Multiply noise by this coefficient
155
+ self .seed = kwargs .get ("noise_seed" , None ) # Seed
156
+ self ._rng = np .random .default_rng (self .seed )
80
157
81
158
def fit (self , data : pd .Series , denoised : pd .Series ):
82
159
reldev = calc_relative_dev_sample (data , denoised )
83
160
self .mean = reldev .mean ()
84
161
# self.std = reldev.std() / 2. # -()- Use appropriate standard deviation
85
162
self .std = reldev .std () # Use doubled standard deviation
86
163
87
- def generate (self , new_denoised : np .ndarray ):
88
- noise = np .maximum (self .coef * np .random .normal (self .mean , self .std , size = new_denoised .shape ), - 1. ) # Clamped above -1
164
+ def generate (self , new_denoised : np .ndarray , time_labels ):
165
+ """new_denoised: a[i_sample, i_t]"""
166
+ noise = np .maximum (self .coef * self ._rng .normal (self .mean , self .std , size = new_denoised .shape ), - 1. ) # Clamped above -1
167
+ return new_denoised * (1. + noise )
168
+
169
+
170
+ class NormalWeekdayNoise (AbstractNoise ):
171
+ """Weekday-sensitive Gaussian noise."""
172
+ def __init__ (self , ** kwargs ):
173
+ super ().__init__ (** kwargs )
174
+ self .mean_array = None
175
+ self .std_array = None
176
+ self .coef = kwargs .get ("noise_coef" , 1.0 ) # Multiply noise by this coefficient
177
+ self .seed = kwargs .get ("noise_seed" , None ) # Seed
178
+ self ._rng = np .random .default_rng (self .seed )
179
+
180
+ def fit (self , data : pd .Series , denoised : pd .Series ):
181
+ """Calculate an array of means and standard deviations, one for each weekday."""
182
+ dif_weekay = calc_weekday_noise_ensembles (data , denoised , relative = True ) # Sample of differences
183
+
184
+ # One set of parameters for each day of the week, sorted by pd_weekday
185
+ self .mean_array = np .fromiter ((x .mean () for x in dif_weekay ), dtype = float )
186
+ self .std_array = np .fromiter ((x .std () for x in dif_weekay ), dtype = float )
187
+
188
+ def _noise_call (self , mean , std , size ):
189
+ """Callable to generate the normal noise."""
190
+ return self ._rng .normal (mean , std , size = size )
191
+
192
+ def generate (self , new_denoised : np .ndarray , time_labels ):
193
+ """new_denoised: a[i_sample, i_t]"""
194
+ params = np .array ([self .mean_array , self .std_array ]).T
195
+ num_samples = new_denoised .shape [0 ]
196
+ noise = generate_weekday_noise (self ._noise_call , params , time_labels , num_samples )
197
+ noise = np .maximum (self .coef * noise , - 1. ) # Clamped above -1. Applies coefficient
198
+
89
199
return new_denoised * (1. + noise )
90
200
201
+ # def generate(self, new_denoised, time_labels):
202
+ # return new_denoised
203
+
91
204
92
205
class NoneNoise (AbstractNoise ):
93
206
"""Produces zero noise."""
94
- def generate (self , new_denoised ):
207
+ def generate (self , new_denoised , time_labels ):
95
208
return new_denoised
96
209
97
210
98
211
NOISE_CLASS = {
99
212
"normal" : NormalMultNoise ,
213
+ "weekday_normal" : NormalWeekdayNoise ,
100
214
"none" : NoneNoise ,
101
215
}
102
216
0 commit comments