26
26
from databricks .automl_runtime .forecast import OFFSET_ALIAS_MAP , DATE_OFFSET_KEYWORD_MAP
27
27
from databricks .automl_runtime .forecast .model import ForecastModel , mlflow_forecast_log_model
28
28
from databricks .automl_runtime import version
29
- from databricks .automl_runtime .forecast .utils import is_quaterly_alias , make_future_dataframe
29
+ from databricks .automl_runtime .forecast .utils import is_quaterly_alias , make_future_dataframe , apply_preprocess_func
30
30
31
31
32
32
PROPHET_ADDITIONAL_PIP_DEPS = [
@@ -110,26 +110,36 @@ def make_future_dataframe(self, horizon: int = None, include_history: bool = Tru
110
110
freq = pd .DateOffset (** offset_kwarg ),
111
111
include_history = include_history )
112
112
113
- def _predict_impl (self , horizon : int = None , include_history : bool = True ) -> pd .DataFrame :
113
+ def _predict_impl (self , future_df : pd . DataFrame ) -> pd .DataFrame :
114
114
"""
115
115
Predict using the API from prophet model.
116
- :param horizon: Int number of periods to forecast forward.
117
- :param include_history: Boolean to include the historical dates in the data
118
- frame for predictions.
119
- :return: A pd.DataFrame with the forecast components.
116
+ :param future_df: future input dataframe. This dataframe should contain
117
+ the time series column and covariate columns if available. It is used as the
118
+ input for generating predictions.
119
+ :return: A pd.DataFrame that represents the model's output. The predicted target
120
+ column is named 'yhat'.
120
121
"""
121
- future_pd = self .make_future_dataframe (horizon = horizon or self ._horizon , include_history = include_history )
122
- return self .model ().predict (future_pd )
122
+ return self .model ().predict (future_df )
123
123
124
- def predict_timeseries (self , horizon : int = None , include_history : bool = True ) -> pd .DataFrame :
124
+ def predict_timeseries (self , horizon : int = None , include_history : bool = True , future_df : pd . DataFrame = None ) -> pd .DataFrame :
125
125
"""
126
- Predict using the prophet model.
126
+ Predict using the prophet model. The input dataframe will be preprocessed if with covariates.
127
127
:param horizon: Int number of periods to forecast forward.
128
128
:param include_history: Boolean to include the historical dates in the data
129
129
frame for predictions.
130
- :return: A pd.DataFrame with the forecast components.
130
+ :param future_df: Optional future input dataframe. This dataframe should contain
131
+ the time series column and covariate columns if available. It is used as the
132
+ input for generating predictions.
133
+ :return: A pd.DataFrame that represents the model's output. The predicted target
134
+ column is named 'yhat'.
131
135
"""
132
- return self ._predict_impl (horizon , include_history )
136
+ if future_df is None :
137
+ future_df = self .make_future_dataframe (horizon = horizon or self ._horizon , include_history = include_history )
138
+
139
+ if self ._preprocess_func and self ._split_col :
140
+ future_df = apply_preprocess_func (future_df , self ._preprocess_func , self ._split_col )
141
+ future_df .rename (columns = {self ._time_col : "ds" }, inplace = True )
142
+ return self ._predict_impl (future_df )
133
143
134
144
def predict (self , context : mlflow .pyfunc .model .PythonModelContext , model_input : pd .DataFrame ) -> pd .Series :
135
145
"""
@@ -143,15 +153,7 @@ def predict(self, context: mlflow.pyfunc.model.PythonModelContext, model_input:
143
153
test_df = model_input .copy ()
144
154
145
155
if self ._preprocess_func and self ._split_col :
146
- # Apply the same preprocessing pipeline to test_df. The preprocessing function requires the "y" column
147
- # and the split column to be present, as they are used in the trial notebook. These columns are added
148
- # temporarily and removed after preprocessing.
149
- # see https://src.dev.databricks.com/databricks-eng/universe/-/blob/automl/python/databricks/automl/core/sections/templates/preprocess/finish_with_transform.jinja?L3
150
- # and https://src.dev.databricks.com/databricks-eng/universe/-/blob/automl/python/databricks/automl/core/sections/templates/preprocess/select_columns.jinja?L8-10
151
- test_df ["y" ] = None
152
- test_df [self ._split_col ] = "prediction"
153
- test_df = self ._preprocess_func (test_df )
154
- test_df .drop (columns = ["y" , self ._split_col ], inplace = True , errors = "ignore" )
156
+ test_df = apply_preprocess_func (test_df , self ._preprocess_func , self ._split_col )
155
157
156
158
test_df .rename (columns = {self ._time_col : "ds" }, inplace = True )
157
159
predict_df = self .model ().predict (test_df )
@@ -260,28 +262,36 @@ def _predict_impl(self, df: pd.DataFrame, horizon: int = None, include_history:
260
262
future_pd [self ._id_cols ] = df [self ._id_cols ].iloc [0 ]
261
263
return future_pd
262
264
263
- def predict_timeseries (self , horizon : int = None , include_history : bool = True ) -> pd .DataFrame :
265
+ def predict_timeseries (self , horizon : int = None , include_history : bool = True , future_df : pd . DataFrame = None ) -> pd .DataFrame :
264
266
"""
265
267
Predict using the prophet model.
266
268
:param horizon: Int number of periods to forecast forward.
267
269
:param include_history: Boolean to include the historical dates in the data
268
270
frame for predictions.
269
- :return: A pd.DataFrame with the forecast components.
271
+ :param future_df: Optional future input dataframe. This dataframe should contain
272
+ the time series column and covariate columns if available. It is used as the
273
+ input for generating predictions.
274
+ :return: A pd.DataFrame that represents the model's output. The predicted target
275
+ column is named 'yhat'.
270
276
"""
271
277
horizon = horizon or self ._horizon
272
- end_time = pd .Timestamp (self ._timeseries_end )
273
- future_df = make_future_dataframe (
274
- start_time = self ._timeseries_starts ,
275
- end_time = end_time ,
276
- horizon = horizon ,
277
- frequency_unit = self ._frequency_unit ,
278
- frequency_quantity = self ._frequency_quantity ,
279
- include_history = include_history ,
280
- groups = self ._model_json .keys (),
281
- identity_column_names = self ._id_cols
282
- )
278
+ if future_df is None :
279
+ end_time = pd .Timestamp (self ._timeseries_end )
280
+ future_df = make_future_dataframe (
281
+ start_time = self ._timeseries_starts ,
282
+ end_time = end_time ,
283
+ horizon = horizon ,
284
+ frequency_unit = self ._frequency_unit ,
285
+ frequency_quantity = self ._frequency_quantity ,
286
+ include_history = include_history ,
287
+ groups = self ._model_json .keys (),
288
+ identity_column_names = self ._id_cols
289
+ )
283
290
future_df ["ts_id" ] = future_df [self ._id_cols ].apply (tuple , axis = 1 )
284
- return future_df .groupby (self ._id_cols ).apply (lambda df : self ._predict_impl (df , horizon , include_history )).reset_index ()
291
+ if self ._preprocess_func and self ._split_col :
292
+ future_df = apply_preprocess_func (future_df , self ._preprocess_func , self ._split_col )
293
+ future_df .rename (columns = {self ._time_col : "ds" }, inplace = True )
294
+ return future_df .groupby (self ._id_cols ).apply (lambda df : self ._predict_impl (df , horizon , include_history )).reset_index (drop = True )
285
295
286
296
@staticmethod
287
297
def get_reserved_cols () -> List [str ]:
@@ -354,7 +364,6 @@ def model_prediction(df):
354
364
return_df = test_df .merge (predict_df , how = "left" , on = ["ds" ] + self ._id_cols )
355
365
return return_df ["yhat" ]
356
366
357
-
358
367
def mlflow_prophet_log_model (prophet_model : Union [ProphetModel , MultiSeriesProphetModel ],
359
368
sample_input : pd .DataFrame = None ) -> None :
360
369
"""
0 commit comments