1
+ {
2
+ "cells" : [
3
+ {
4
+ "cell_type" : " markdown" ,
5
+ "metadata" : {},
6
+ "source" : [
7
+ " \n " ,
8
+ " ### This is an example of a basic 10-fold cross validation on the boston housing dataset of sklearn using Ridge-Regression."
9
+ ]
10
+ },
11
+ {
12
+ "cell_type" : " code" ,
13
+ "execution_count" : 46 ,
14
+ "metadata" : {},
15
+ "outputs" : [],
16
+ "source" : [
17
+ " import pandas as pd\n " ,
18
+ " import numpy as np\n " ,
19
+ " from sklearn.datasets import load_boston\n " ,
20
+ " import matplotlib.pyplot as plt\n " ,
21
+ " from sklearn.linear_model import Ridge\n " ,
22
+ " from sklearn.model_selection import train_test_split\n " ,
23
+ " from sklearn.metrics import r2_score\n " ,
24
+ " from sklearn.preprocessing import Normalizer\n " ,
25
+ " \n " ,
26
+ " \n " ,
27
+ " boston_dataset = load_boston()\n " ,
28
+ " \n " ,
29
+ " X = boston_dataset.data \n " ,
30
+ " \n " ,
31
+ " transformer = Normalizer().fit(X)\n " ,
32
+ " \n " ,
33
+ " transformer.transform(X)\n " ,
34
+ " \n " ,
35
+ " y = boston_dataset.target.reshape(-1, 1) # Converting to Column vector "
36
+ ]
37
+ },
38
+ {
39
+ "cell_type" : " code" ,
40
+ "execution_count" : 47 ,
41
+ "metadata" : {},
42
+ "outputs" : [
43
+ {
44
+ "name" : " stdout" ,
45
+ "output_type" : " stream" ,
46
+ "text" : [
47
+ " Cost after iteration 1 - 0.7126916135010682\n " ,
48
+ " Cost after iteration 2 - 0.6580161986180606\n " ,
49
+ " Cost after iteration 3 - 0.6593572420098802\n " ,
50
+ " Cost after iteration 4 - 0.6827689513882875\n " ,
51
+ " Cost after iteration 5 - 0.7427883698752009\n " ,
52
+ " Cost after iteration 6 - 0.7037718042199794\n " ,
53
+ " Cost after iteration 7 - 0.6266438909778318\n " ,
54
+ " Cost after iteration 8 - 0.7455364640710955\n " ,
55
+ " Cost after iteration 9 - 0.7291283475880821\n " ,
56
+ " Cost after iteration 10 - 0.7353142325508967\n " ,
57
+ " Aggregate R2 Coeff 0.69960\n "
58
+ ]
59
+ }
60
+ ],
61
+ "source" : [
62
+ " r2_coeff = []\n " ,
63
+ " clf = Ridge(alpha=10)\n " ,
64
+ " for i in range(10):\n " ,
65
+ " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)\n " ,
66
+ " clf.fit(X_train, y_train)\n " ,
67
+ " y_predict=clf.predict(X_test)\n " ,
68
+ " \n " ,
69
+ " RSS = (np.mean((y_test-y_predict)**2)/np.std(y_test)**2)\n " ,
70
+ " r2_coeff.append(1-RSS)\n " ,
71
+ " print(\" Cost after iteration {} - {}\" .format(i+1, 1-RSS))\n " ,
72
+ " \n " ,
73
+ " average_r2 = np.mean(r2_coeff)\n " ,
74
+ " print(\" Aggregate R2 Coeff {0:.5f}\" .format(average_r2))"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type" : " code" ,
79
+ "execution_count" : null ,
80
+ "metadata" : {},
81
+ "outputs" : [],
82
+ "source" : []
83
+ }
84
+ ],
85
+ "metadata" : {
86
+ "kernelspec" : {
87
+ "display_name" : " Python 3" ,
88
+ "language" : " python" ,
89
+ "name" : " python3"
90
+ },
91
+ "language_info" : {
92
+ "codemirror_mode" : {
93
+ "name" : " ipython" ,
94
+ "version" : 3
95
+ },
96
+ "file_extension" : " .py" ,
97
+ "mimetype" : " text/x-python" ,
98
+ "name" : " python" ,
99
+ "nbconvert_exporter" : " python" ,
100
+ "pygments_lexer" : " ipython3" ,
101
+ "version" : " 3.7.4-final"
102
+ }
103
+ },
104
+ "nbformat" : 4 ,
105
+ "nbformat_minor" : 2
106
+ }
0 commit comments