forked from microsoft/qlib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgbdt.py
91 lines (75 loc) · 2.94 KB
/
gbdt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from __future__ import division
from __future__ import print_function
import numpy as np
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, mean_squared_error
from .base import Model
from ...utils import drop_nan_by_y_index
class LGBModel(Model):
"""LightGBM Model
Parameters
----------
param_update : dict
training parameters
"""
_params = dict()
def __init__(self, loss="mse", **kwargs):
if loss not in {"mse", "binary"}:
raise NotImplementedError
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
self._params.update(objective=loss, **kwargs)
self._model = None
def fit(
self,
x_train,
y_train,
x_valid,
y_valid,
w_train=None,
w_valid=None,
num_boost_round=1000,
early_stopping_rounds=50,
verbose_eval=20,
evals_result=dict(),
**kwargs
):
# Lightgbm need 1D array as its label
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
else:
raise ValueError("LightGBM doesn't support multi-label training")
w_train_weight = None if w_train is None else w_train.values
w_valid_weight = None if w_valid is None else w_valid.values
dtrain = lgb.Dataset(x_train.values, label=y_train_1d, weight=w_train_weight)
dvalid = lgb.Dataset(x_valid.values, label=y_valid_1d, weight=w_valid_weight)
self._model = lgb.train(
self._params,
dtrain,
num_boost_round=num_boost_round,
valid_sets=[dtrain, dvalid],
valid_names=["train", "valid"],
early_stopping_rounds=early_stopping_rounds,
verbose_eval=verbose_eval,
evals_result=evals_result,
**kwargs
)
evals_result["train"] = list(evals_result["train"].values())[0]
evals_result["valid"] = list(evals_result["valid"].values())[0]
def predict(self, x_test):
if self._model is None:
raise ValueError("model is not fitted yet!")
return self._model.predict(x_test.values)
def score(self, x_test, y_test, w_test=None):
# Remove rows from x, y and w, which contain Nan in any columns in y_test.
x_test, y_test, w_test = drop_nan_by_y_index(x_test, y_test, w_test)
preds = self.predict(x_test)
w_test_weight = None if w_test is None else w_test.values
return self._scorer(y_test.values, preds, sample_weight=w_test_weight)
def save(self, filename):
if self._model is None:
raise ValueError("model is not fitted yet!")
self._model.save_model(filename)
def load(self, buffer):
self._model = lgb.Booster(params={"model_str": buffer.decode("utf-8")})