@@ -359,7 +359,7 @@ class CartModel(core.CoreModel):
359359 split_axis: What structure of split to consider for numerical features. -
360360 `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
361361 is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
362- Sparse oblique splits (i.e. random splits one a small number of features)
362+ Sparse oblique splits (i.e. random splits on a small number of features)
363363 from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
364364 `MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
365365 "Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
@@ -1030,6 +1030,9 @@ class GradientBoostedTreesModel(core.CoreModel):
10301030 variable importance of the model at the end of the training using the
10311031 validation dataset. Enabling this feature can increase the training time
10321032 significantly. Default: False.
1033+ cross_entropy_ndcg_truncation: Truncation of the cross-entropy NDCG loss
1034+ (default 5). Only used with cross-entropy NDCG loss i.e.
1035+ `loss="XE_NDCG_MART"` Default: 5.
10331036 dart_dropout: Dropout rate applied when using the DART i.e. when
10341037 forest_extraction=DART. Default: None.
10351038 early_stopping: Early stopping detects the overfitting of the model and
@@ -1048,12 +1051,12 @@ class GradientBoostedTreesModel(core.CoreModel):
10481051 Default: 10.
10491052 early_stopping_num_trees_look_ahead: Rolling number of trees used to detect
10501053 validation loss increase and trigger early stopping. Default: 30.
1051- focal_loss_alpha: EXPERIMENTAL. Weighting parameter for focal loss, positive
1052- samples weighted by alpha, negative samples by (1-alpha). The default 0.5
1053- value means no active class-level weighting. Only used with focal loss
1054- i.e. `loss="BINARY_FOCAL_LOSS"` Default: 0.5.
1055- focal_loss_gamma: EXPERIMENTAL. Exponent of the misprediction exponent term
1056- in focal loss, corresponds to gamma parameter in
1054+ focal_loss_alpha: EXPERIMENTAL, default 0.5. Weighting parameter for focal
1055+ loss, positive samples weighted by alpha, negative samples by (1-alpha).
1056+ The default 0.5 value means no active class-level weighting. Only used
1057+ with focal loss i.e. `loss="BINARY_FOCAL_LOSS"` Default: 0.5.
1058+ focal_loss_gamma: EXPERIMENTAL, default 2.0. Exponent of the misprediction
1059+ exponent term in focal loss, corresponds to gamma parameter in
10571060 https://arxiv.org/pdf/1708.02002.pdf. Only used with focal loss i.e.
10581061 `loss="BINARY_FOCAL_LOSS"` Default: 2.0.
10591062 forest_extraction: How to construct the forest: - MART: For Multiple
@@ -1122,12 +1125,13 @@ class GradientBoostedTreesModel(core.CoreModel):
11221125 likelihood loss. Mainly used for counting problems. Only valid for
11231126 regression. - `MULTINOMIAL_LOG_LIKELIHOOD`: Multinomial log likelihood
11241127 i.e. cross-entropy. Only valid for binary or multi-class classification. -
1125- `LAMBDA_MART_NDCG5 `: LambdaMART with NDCG5 . - `XE_NDCG_MART`: Cross
1128+ `LAMBDA_MART_NDCG `: LambdaMART with NDCG@5 . - `XE_NDCG_MART`: Cross
11261129 Entropy Loss NDCG. See arxiv.org/abs/1911.09798. - `BINARY_FOCAL_LOSS`:
11271130 Focal loss. Only valid for binary classification. See
11281131 https://arxiv.org/pdf/1708.02002.pdf. - `POISSON`: Poisson log likelihood.
11291132 Only valid for regression. - `MEAN_AVERAGE_ERROR`: Mean average error
1130- a.k.a. MAE.
1133+ a.k.a. MAE. - `LAMBDA_MART_NDCG5`: DEPRECATED, use LAMBDA_MART_NDCG.
1134+ LambdaMART with NDCG@5.
11311135 Default: "DEFAULT".
11321136 max_depth: Maximum depth of the tree. `max_depth=1` means that all trees
11331137 will be roots. `max_depth=-1` means that tree depth is not restricted by
@@ -1170,6 +1174,8 @@ class GradientBoostedTreesModel(core.CoreModel):
11701174 et al. in "Random Survival Forests"
11711175 (https://projecteuclid.org/download/pdfview_1/euclid.aoas/1223908043).
11721176 Default: "GLOBAL_IMPUTATION".
1177+ ndcg_truncation: Truncation of the NDCG loss (default 5). Only used with
1178+ NDCG loss i.e. `loss="LAMBDA_MART_NDCG". ` Default: 5.
11731179 num_candidate_attributes: Number of unique valid attributes tested for each
11741180 node. An attribute is valid if it has at least a valid split. If
11751181 `num_candidate_attributes=0`, the value is set to the classical default
@@ -1266,7 +1272,7 @@ class GradientBoostedTreesModel(core.CoreModel):
12661272 split_axis: What structure of split to consider for numerical features. -
12671273 `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
12681274 is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
1269- Sparse oblique splits (i.e. random splits one a small number of features)
1275+ Sparse oblique splits (i.e. random splits on a small number of features)
12701276 from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
12711277 `MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
12721278 "Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
@@ -1336,6 +1342,7 @@ def __init__(
13361342 categorical_set_split_max_num_items : Optional [int ] = - 1 ,
13371343 categorical_set_split_min_item_frequency : Optional [int ] = 1 ,
13381344 compute_permutation_variable_importance : Optional [bool ] = False ,
1345+ cross_entropy_ndcg_truncation : Optional [int ] = 5 ,
13391346 dart_dropout : Optional [float ] = None ,
13401347 early_stopping : Optional [str ] = "LOSS_INCREASE" ,
13411348 early_stopping_initial_iteration : Optional [int ] = 10 ,
@@ -1364,6 +1371,7 @@ def __init__(
13641371 mhld_oblique_sample_attributes : Optional [bool ] = None ,
13651372 min_examples : Optional [int ] = 5 ,
13661373 missing_value_policy : Optional [str ] = "GLOBAL_IMPUTATION" ,
1374+ ndcg_truncation : Optional [int ] = 5 ,
13671375 num_candidate_attributes : Optional [int ] = - 1 ,
13681376 num_candidate_attributes_ratio : Optional [float ] = - 1.0 ,
13691377 num_trees : Optional [int ] = 300 ,
@@ -1407,6 +1415,7 @@ def __init__(
14071415 "compute_permutation_variable_importance" : (
14081416 compute_permutation_variable_importance
14091417 ),
1418+ "cross_entropy_ndcg_truncation" : cross_entropy_ndcg_truncation ,
14101419 "dart_dropout" : dart_dropout ,
14111420 "early_stopping" : early_stopping ,
14121421 "early_stopping_initial_iteration" : early_stopping_initial_iteration ,
@@ -1439,6 +1448,7 @@ def __init__(
14391448 "mhld_oblique_sample_attributes" : mhld_oblique_sample_attributes ,
14401449 "min_examples" : min_examples ,
14411450 "missing_value_policy" : missing_value_policy ,
1451+ "ndcg_truncation" : ndcg_truncation ,
14421452 "num_candidate_attributes" : num_candidate_attributes ,
14431453 "num_candidate_attributes_ratio" : num_candidate_attributes_ratio ,
14441454 "num_trees" : num_trees ,
@@ -2369,7 +2379,7 @@ class RandomForestModel(core.CoreModel):
23692379 split_axis: What structure of split to consider for numerical features. -
23702380 `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
23712381 is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
2372- Sparse oblique splits (i.e. random splits one a small number of features)
2382+ Sparse oblique splits (i.e. random splits on a small number of features)
23732383 from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
23742384 `MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
23752385 "Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
0 commit comments