24
24
]
25
25
26
26
# The valid information criteria
27
- VALID_CRITERIA = {'aic' , 'bic' , 'hqic' }
27
+ VALID_CRITERIA = {'aic' , 'bic' , 'hqic' , 'oob' }
28
28
29
29
30
30
def auto_arima (y , exogenous = None , start_p = 2 , d = None , start_q = 2 , max_p = 5 , max_d = 2 , max_q = 5 ,
@@ -33,7 +33,8 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
33
33
seasonal_test = 'ch' , n_jobs = 1 , start_params = None , trend = 'c' , method = None , transparams = True ,
34
34
solver = 'lbfgs' , maxiter = 50 , disp = 0 , callback = None , offset_test_args = None , seasonal_test_args = None ,
35
35
suppress_warnings = False , error_action = 'warn' , trace = False , random = False , random_state = None ,
36
- n_fits = 10 , return_valid_fits = False , ** fit_args ):
36
+ n_fits = 10 , return_valid_fits = False , out_of_sample_size = 0 , scoring = 'mse' , scoring_args = None ,
37
+ ** fit_args ):
37
38
"""The ``auto_arima`` function seeks to identify the most optimal parameters for an ``ARIMA`` model,
38
39
and returns a fitted ARIMA model. This function is based on the commonly-used R function,
39
40
`forecase::auto.arima``[3].
@@ -45,8 +46,9 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
45
46
conducting the Canova-Hansen to determine the optimal order of seasonal differencing, ``D``.
46
47
47
48
In order to find the best model, ``auto_arima`` optimizes for a given ``information_criterion``, one of
48
- {'aic', 'bic', 'hqic'} (Akaine Information Criterion, Bayesian Information Criterion or Hannan-Quinn
49
- Information Criterion, respectively) and returns the ARIMA which minimizes the value.
49
+ {'aic', 'bic', 'hqic', 'oob'} (Akaine Information Criterion, Bayesian Information Criterion, Hannan-Quinn
50
+ Information Criterion, or "out of bag"--for validation scoring--respectively) and returns the ARIMA which
51
+ minimizes the value.
50
52
51
53
Note that due to stationarity issues, ``auto_arima`` might not find a suitable model that will converge. If this
52
54
is the case, a ``ValueError`` will be thrown suggesting stationarity-inducing measures be taken prior
@@ -127,8 +129,7 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
127
129
128
130
information_criterion : str, optional (default='aic')
129
131
The information criterion used to select the best ARIMA model. One of
130
- ``pyramid.arima.auto_arima.VALID_CRITERIA``, ('aic', 'bic'). Note that if
131
- n_samples <= 3, AIC will be used.
132
+ ``pyramid.arima.auto_arima.VALID_CRITERIA``, ('aic', 'bic', 'hqic', 'oob').
132
133
133
134
alpha : float, optional (default=0.05)
134
135
Level of the test for testing significance.
@@ -224,6 +225,17 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
224
225
If True, will return all valid ARIMA fits. If False (by default), will only
225
226
return the best fit.
226
227
228
+ out_of_sample_size : int, optional (default=0)
229
+ The number of examples from the tail of the time series to use as validation
230
+ examples.
231
+
232
+ scoring : str, optional (default='mse')
233
+ If performing validation (i.e., if ``out_of_sample_size`` > 0), the metric
234
+ to use for scoring the out-of-sample data. One of {'mse', 'mae'}
235
+
236
+ scoring_args : dict, optional (default=None)
237
+ A dictionary of key-word arguments to be passed to the ``scoring`` metric.
238
+
227
239
**fit_args : dict, optional (default=None)
228
240
A dictionary of keyword arguments to pass to the :func:`ARIMA.fit` method.
229
241
@@ -282,7 +294,9 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
282
294
transparams = transparams , solver = solver , maxiter = maxiter ,
283
295
disp = disp , callback = callback , fit_params = fit_args ,
284
296
suppress_warnings = suppress_warnings , trace = trace ,
285
- error_action = error_action )),
297
+ error_action = error_action , scoring = scoring ,
298
+ out_of_sample_size = out_of_sample_size ,
299
+ scoring_args = scoring_args )),
286
300
return_valid_fits )
287
301
288
302
# test ic, and use AIC if n <= 3
@@ -396,7 +410,9 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
396
410
transparams = transparams , solver = solver , maxiter = maxiter ,
397
411
disp = disp , callback = callback , fit_params = fit_args ,
398
412
suppress_warnings = suppress_warnings , trace = trace ,
399
- error_action = error_action )),
413
+ error_action = error_action , scoring = scoring ,
414
+ out_of_sample_size = out_of_sample_size ,
415
+ scoring_args = scoring_args )),
400
416
return_valid_fits )
401
417
402
418
# seasonality issues
@@ -442,7 +458,8 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
442
458
start_params = start_params , trend = trend , method = method , transparams = transparams ,
443
459
solver = solver , maxiter = maxiter , disp = disp , callback = callback ,
444
460
fit_params = fit_args , suppress_warnings = suppress_warnings ,
445
- trace = trace , error_action = error_action )
461
+ trace = trace , error_action = error_action , out_of_sample_size = out_of_sample_size ,
462
+ scoring = scoring , scoring_args = scoring_args )
446
463
for order , seasonal_order in gen )
447
464
448
465
# filter the non-successful ones
@@ -461,12 +478,14 @@ def auto_arima(y, exogenous=None, start_p=2, d=None, start_q=2, max_p=5, max_d=2
461
478
462
479
def _fit_arima (x , xreg , order , seasonal_order , start_params , trend , method , transparams ,
463
480
solver , maxiter , disp , callback , fit_params , suppress_warnings , trace ,
464
- error_action ):
481
+ error_action , out_of_sample_size , scoring , scoring_args ):
465
482
try :
466
483
fit = ARIMA (order = order , seasonal_order = seasonal_order , start_params = start_params ,
467
484
trend = trend , method = method , transparams = transparams ,
468
485
solver = solver , maxiter = maxiter , disp = disp ,
469
- callback = callback , suppress_warnings = suppress_warnings )\
486
+ callback = callback , suppress_warnings = suppress_warnings ,
487
+ out_of_sample_size = out_of_sample_size , scoring = scoring ,
488
+ scoring_args = scoring_args )\
470
489
.fit (x , exogenous = xreg , ** fit_params )
471
490
472
491
# for non-stationarity errors, return None
0 commit comments