-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregression.py
65 lines (50 loc) · 2.74 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from itertools import combinations_with_replacement
class Model:
def __init__(self, model, x, y, degree, model_params=None, fit_params=None):
self.model = model
self.degree = degree
self.fit_params = fit_params if fit_params is not None else {}
self.model_params = model_params if model_params is not None else {}
# Создаём полином степени degree и присваиваем имена для всех независимых переменных
poly = PolynomialFeatures(degree=self.degree)
self.column_names = ['const']
for d in range(1, self.degree + 1):
combinations = list(combinations_with_replacement(list(x.columns), d))
self.column_names += ['&'.join(comb) for comb in combinations]
self.y = y
self.x = pd.DataFrame(poly.fit_transform(x.values), columns=self.column_names)
self.df = pd.concat([self.y, self.x.drop(columns='const')], axis=1)
self.results = self.model(self.y, self.x, **self.model_params).fit(**self.fit_params)
def stepwise_selection(self, criteria: str = 'AIC'):
"""
Улучшаем модель при помощи:
::AIC (Akaike Information Criterion)
или
::BIC (Bayesian Information Criterion)
"""
output = (f' STEPS {criteria} \n'
f'==============================================================================\n')
remaining_features = list(self.x.columns)
best_model = self.results
best_criterion = best_model.aic if criteria == 'AIC' else best_model.bic
k = True
drop_index = None
while k:
k = False
output += (f'Selected Features: {remaining_features[1:]}\n'
f'{criteria}: {best_criterion}\n')
for index in range(1, len(remaining_features)): # идём с 1 чтобы не удалить константу
features = remaining_features[:index] + remaining_features[(index + 1):]
model = self.model(self.y, self.x[features], **self.model_params).fit(**self.fit_params)
criterion = model.aic if criteria == 'AIC' else model.bic
if criterion < best_criterion:
k = True
best_criterion = criterion
best_model = model
drop_index = index
if k:
remaining_features.pop(drop_index)
print(output)
return pd.concat([self.y, self.x[remaining_features[1:]]], axis=1)