Skip to content

Commit f036124

Browse files
committed
Move updated SARIMA notebook
1 parent 0e8421b commit f036124

File tree

2 files changed

+54
-853
lines changed

2 files changed

+54
-853
lines changed

SARIMA.ipynb

-804
This file was deleted.

notebooks/SARIMA.ipynb

+54-49
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"colab": {
66
"name": "SARIMA.ipynb",
77
"provenance": [],
8+
"collapsed_sections": [],
89
"include_colab_link": true
910
},
1011
"kernelspec": {
@@ -39,6 +40,7 @@
3940
" - autoregression model i.e. regression of the time series onto itself. The basic assumption is that the current series values depend on its previous values with some lag (or several lags). The maximum lag in the model is referred to as p\n",
4041
" . To determine the initial p\n",
4142
" , you need to look at the PACF plot and find the biggest significant lag after which most other lags become insignificant.\n",
43+
" \n",
4244
"MA(q)\n",
4345
" - moving average model. Without going into too much detail, this models the error of the time series, again with the assumption that the current error depends on the previous with some lag, which is referred to as q\n",
4446
" . The initial value can be found on the ACF plot with the same logic as before.\n",
@@ -102,7 +104,7 @@
102104
"from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error\n",
103105
"from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error"
104106
],
105-
"execution_count": 0,
107+
"execution_count": null,
106108
"outputs": []
107109
},
108110
{
@@ -142,26 +144,26 @@
142144
" smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)\n",
143145
" plt.tight_layout()"
144146
],
145-
"execution_count": 0,
147+
"execution_count": null,
146148
"outputs": []
147149
},
148150
{
149151
"cell_type": "code",
150152
"metadata": {
151153
"id": "j4sj2_tL78Ia",
152154
"colab_type": "code",
153-
"outputId": "5620b1e5-6bd5-4ab2-852f-9753e6d738dd",
154155
"colab": {
155156
"base_uri": "https://localhost:8080/",
156157
"height": 398
157-
}
158+
},
159+
"outputId": "5620b1e5-6bd5-4ab2-852f-9753e6d738dd"
158160
},
159161
"source": [
160162
"# Get datasets from github repository\n",
161163
"!wget https://raw.githubusercontent.com/Yorko/mlcourse.ai/master/data/ads.csv\n",
162164
"!wget https://raw.githubusercontent.com/Yorko/mlcourse.ai/master/data/currency.csv"
163165
],
164-
"execution_count": 0,
166+
"execution_count": null,
165167
"outputs": [
166168
{
167169
"output_type": "stream",
@@ -205,7 +207,7 @@
205207
"ads = pd.read_csv('ads.csv', index_col=['Time'], parse_dates=['Time'])\n",
206208
"currency = pd.read_csv('currency.csv', index_col=['Time'], parse_dates=['Time'])"
207209
],
208-
"execution_count": 0,
210+
"execution_count": null,
209211
"outputs": []
210212
},
211213
{
@@ -215,19 +217,21 @@
215217
"colab_type": "text"
216218
},
217219
"source": [
218-
"## Plot time series data"
220+
"## Plot time series data\n",
221+
"\n",
222+
"- ACF and PACF functions will be very useful for estimate the ARMA parameters, keep in mind."
219223
]
220224
},
221225
{
222226
"cell_type": "code",
223227
"metadata": {
224228
"id": "uzqzlw938luZ",
225229
"colab_type": "code",
226-
"outputId": "c6405669-db46-4387-83ef-722bd10c1b0b",
227230
"colab": {
228231
"base_uri": "https://localhost:8080/",
229232
"height": 389
230-
}
233+
},
234+
"outputId": "c6405669-db46-4387-83ef-722bd10c1b0b"
231235
},
232236
"source": [
233237
"plt.figure(figsize=(18, 6))\n",
@@ -236,7 +240,7 @@
236240
"plt.grid(True)\n",
237241
"plt.show()"
238242
],
239-
"execution_count": 0,
243+
"execution_count": null,
240244
"outputs": [
241245
{
242246
"output_type": "display_data",
@@ -257,11 +261,11 @@
257261
"metadata": {
258262
"id": "IlPQn7kB8oI7",
259263
"colab_type": "code",
260-
"outputId": "d997efb0-4c93-4015-bde5-a1225e390a5d",
261264
"colab": {
262265
"base_uri": "https://localhost:8080/",
263266
"height": 389
264-
}
267+
},
268+
"outputId": "d997efb0-4c93-4015-bde5-a1225e390a5d"
265269
},
266270
"source": [
267271
"plt.figure(figsize=(18, 6))\n",
@@ -270,7 +274,7 @@
270274
"plt.grid(True)\n",
271275
"plt.show()"
272276
],
273-
"execution_count": 0,
277+
"execution_count": null,
274278
"outputs": [
275279
{
276280
"output_type": "display_data",
@@ -291,16 +295,16 @@
291295
"metadata": {
292296
"id": "zKL_VIbl8vRL",
293297
"colab_type": "code",
294-
"outputId": "f82f11ab-0bee-4e04-8797-409d5e2fca1b",
295298
"colab": {
296299
"base_uri": "https://localhost:8080/",
297300
"height": 513
298-
}
301+
},
302+
"outputId": "f82f11ab-0bee-4e04-8797-409d5e2fca1b"
299303
},
300304
"source": [
301305
"tsplot(ads.Ads, lags=60)"
302306
],
303-
"execution_count": 0,
307+
"execution_count": null,
304308
"outputs": [
305309
{
306310
"output_type": "display_data",
@@ -321,18 +325,18 @@
321325
"metadata": {
322326
"id": "mpHij1ef803k",
323327
"colab_type": "code",
324-
"outputId": "94876629-05d5-4964-d734-de279c0ae720",
325328
"colab": {
326329
"base_uri": "https://localhost:8080/",
327330
"height": 513
328-
}
331+
},
332+
"outputId": "94876629-05d5-4964-d734-de279c0ae720"
329333
},
330334
"source": [
331335
"# The seasonal difference\n",
332336
"ads_diff = ads.Ads - ads.Ads.shift(24)\n",
333337
"tsplot(ads_diff[24:], lags=60)"
334338
],
335-
"execution_count": 0,
339+
"execution_count": null,
336340
"outputs": [
337341
{
338342
"output_type": "display_data",
@@ -363,17 +367,17 @@
363367
"metadata": {
364368
"id": "HIbu5nM688tm",
365369
"colab_type": "code",
366-
"outputId": "215d3ffb-8a6d-4665-826a-dfe8378daaeb",
367370
"colab": {
368371
"base_uri": "https://localhost:8080/",
369372
"height": 513
370-
}
373+
},
374+
"outputId": "215d3ffb-8a6d-4665-826a-dfe8378daaeb"
371375
},
372376
"source": [
373377
"ads_diff = ads_diff - ads_diff.shift(1)\n",
374378
"tsplot(ads_diff[24+1:], lags=60)"
375379
],
376-
"execution_count": 0,
380+
"execution_count": null,
377381
"outputs": [
378382
{
379383
"output_type": "display_data",
@@ -398,12 +402,12 @@
398402
"source": [
399403
"## SARIMA parameters\n",
400404
"\n",
401-
"- p\n",
402-
" is most probably 4 since it is the last significant lag on the PACF, after which, most others are not significant.\n",
405+
"- **p**\n",
406+
" is most probably 4 since it is the last significant lag on the **PACF**, after which, most others are not significant.\n",
403407
"- d\n",
404408
" equals 1 because we had first differences\n",
405-
"- q\n",
406-
" should be somewhere around 4 as well as seen on the ACF\n",
409+
"- **q**\n",
410+
" should be somewhere around 4 as well as seen on the **ACF**\n",
407411
"- P\n",
408412
" might be 2, since 24-th and 48-th lags are somewhat significant on the PACF\n",
409413
"- D\n",
@@ -417,11 +421,11 @@
417421
"metadata": {
418422
"id": "nFqLgQgB9FAR",
419423
"colab_type": "code",
420-
"outputId": "9468b952-e6dd-46e3-e9d4-c237843e7c9c",
421424
"colab": {
422425
"base_uri": "https://localhost:8080/",
423426
"height": 34
424-
}
427+
},
428+
"outputId": "9468b952-e6dd-46e3-e9d4-c237843e7c9c"
425429
},
426430
"source": [
427431
"# setting initial values and some bounds for them\n",
@@ -438,7 +442,7 @@
438442
"parameters_list = list(parameters)\n",
439443
"len(parameters_list)"
440444
],
441-
"execution_count": 0,
445+
"execution_count": null,
442446
"outputs": [
443447
{
444448
"output_type": "execute_result",
@@ -462,9 +466,10 @@
462466
"colab": {}
463467
},
464468
"source": [
465-
"def optimizeSARIMA(parameters_list, d, D, s):\n",
469+
"def optimizeSARIMA(y, parameters_list, d, D, s):\n",
466470
" \"\"\"Return dataframe with parameters and corresponding AIC\n",
467471
" \n",
472+
" y - time series\n",
468473
" parameters_list - list with (p, q, P, Q) tuples\n",
469474
" d - integration order in ARIMA model\n",
470475
" D - seasonal integration order \n",
@@ -477,7 +482,7 @@
477482
" for param in tqdm_notebook(parameters_list):\n",
478483
" # we need try-except because on some combinations model fails to converge\n",
479484
" try:\n",
480-
" model=sm.tsa.statespace.SARIMAX(ads.Ads, order=(param[0], d, param[1]), \n",
485+
" model=sm.tsa.statespace.SARIMAX(y, order=(param[0], d, param[1]), \n",
481486
" seasonal_order=(param[2], D, param[3], s)).fit(disp=-1)\n",
482487
" except:\n",
483488
" continue\n",
@@ -496,29 +501,29 @@
496501
" \n",
497502
" return result_table"
498503
],
499-
"execution_count": 0,
504+
"execution_count": null,
500505
"outputs": []
501506
},
502507
{
503508
"cell_type": "code",
504509
"metadata": {
505510
"id": "n2D_Pzxc9mtr",
506511
"colab_type": "code",
507-
"outputId": "a5cf28d8-2693-494f-dead-d3b533333265",
508512
"colab": {
509513
"base_uri": "https://localhost:8080/",
510514
"height": 69,
511515
"referenced_widgets": [
512516
"26fb1f95b0414c88a650b00f7dd7743c"
513517
]
514-
}
518+
},
519+
"outputId": "a5cf28d8-2693-494f-dead-d3b533333265"
515520
},
516521
"source": [
517522
"%%time\n",
518523
"warnings.filterwarnings(\"ignore\") \n",
519-
"result_table = optimizeSARIMA(parameters_list, d, D, s)"
524+
"result_table = optimizeSARIMA(ads.Ads, parameters_list, d, D, s)"
520525
],
521-
"execution_count": 0,
526+
"execution_count": null,
522527
"outputs": [
523528
{
524529
"output_type": "display_data",
@@ -552,16 +557,16 @@
552557
"metadata": {
553558
"id": "9GbyTEDb_y2m",
554559
"colab_type": "code",
555-
"outputId": "6cce2a2e-69c4-41aa-e570-41a844688c07",
556560
"colab": {
557561
"base_uri": "https://localhost:8080/",
558562
"height": 206
559-
}
563+
},
564+
"outputId": "6cce2a2e-69c4-41aa-e570-41a844688c07"
560565
},
561566
"source": [
562567
"result_table.head()"
563568
],
564-
"execution_count": 0,
569+
"execution_count": null,
565570
"outputs": [
566571
{
567572
"output_type": "execute_result",
@@ -640,11 +645,11 @@
640645
"metadata": {
641646
"id": "_vr3oGkxBarW",
642647
"colab_type": "code",
643-
"outputId": "c1f5b667-92da-4845-e29d-952889f3ca0f",
644648
"colab": {
645649
"base_uri": "https://localhost:8080/",
646650
"height": 537
647-
}
651+
},
652+
"outputId": "c1f5b667-92da-4845-e29d-952889f3ca0f"
648653
},
649654
"source": [
650655
"# set the parameters that give the lowest AIC\n",
@@ -654,7 +659,7 @@
654659
" seasonal_order=(P, D, Q, s)).fit(disp=-1)\n",
655660
"print(best_model.summary())"
656661
],
657-
"execution_count": 0,
662+
"execution_count": null,
658663
"outputs": [
659664
{
660665
"output_type": "stream",
@@ -699,16 +704,16 @@
699704
"metadata": {
700705
"id": "XqqXdHQ-BoDa",
701706
"colab_type": "code",
702-
"outputId": "737f202d-1161-401d-80c2-eac91701f1b0",
703707
"colab": {
704708
"base_uri": "https://localhost:8080/",
705709
"height": 513
706-
}
710+
},
711+
"outputId": "737f202d-1161-401d-80c2-eac91701f1b0"
707712
},
708713
"source": [
709714
"tsplot(best_model.resid[24+1:], lags=60)"
710715
],
711-
"execution_count": 0,
716+
"execution_count": null,
712717
"outputs": [
713718
{
714719
"output_type": "display_data",
@@ -762,24 +767,24 @@
762767
" plt.legend()\n",
763768
" plt.grid(True)"
764769
],
765-
"execution_count": 0,
770+
"execution_count": null,
766771
"outputs": []
767772
},
768773
{
769774
"cell_type": "code",
770775
"metadata": {
771776
"id": "9Iyjz_IfBtqU",
772777
"colab_type": "code",
773-
"outputId": "f21b389e-538e-440c-98d9-2f642722c15e",
774778
"colab": {
775779
"base_uri": "https://localhost:8080/",
776780
"height": 444
777-
}
781+
},
782+
"outputId": "f21b389e-538e-440c-98d9-2f642722c15e"
778783
},
779784
"source": [
780785
"plotSARIMA(ads, best_model, 50)"
781786
],
782-
"execution_count": 0,
787+
"execution_count": null,
783788
"outputs": [
784789
{
785790
"output_type": "display_data",

0 commit comments

Comments
 (0)