-
Notifications
You must be signed in to change notification settings - Fork 1.3k
[MRG] FIX geometric_mean_score: macro/weighted average is mean of per-class G-means (#1096) #1173
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -662,15 +662,47 @@ class is unrecognized by the classifier, G-mean resolves to zero. To | |
| >>> geometric_mean_score(y_true, y_pred, correction=0.001) | ||
| 0.010... | ||
| >>> geometric_mean_score(y_true, y_pred, average='macro') | ||
| 0.471... | ||
| 0.288... | ||
| >>> geometric_mean_score(y_true, y_pred, average='micro') | ||
| 0.471... | ||
| >>> geometric_mean_score(y_true, y_pred, average='weighted') | ||
| 0.471... | ||
| 0.288... | ||
| >>> geometric_mean_score(y_true, y_pred, average=None) | ||
| array([0.866..., 0. , 0. ]) | ||
| """ | ||
| if average is None or average != "multiclass": | ||
| # For macro/weighted averaging the previous implementation passed | ||
| # ``average`` directly to sensitivity_specificity_support, which | ||
| # returns the macro/weighted average of sensitivity and | ||
| # specificity, and then took ``sqrt(sen * spe)`` of those scalars. | ||
| # That is the geometric mean of mean-sensitivity and | ||
| # mean-specificity, NOT the mean of per-class G-means — and the two | ||
| # disagree on binary inputs (see #1096). Compute the per-class | ||
| # G-mean first and aggregate, so that | ||
| # ``geometric_mean_score(..., average='macro')`` equals | ||
| # ``np.mean(geometric_mean_score(..., average=None))`` as users | ||
| # rightly expect. | ||
| if average in ("macro", "weighted"): | ||
| sen, spe, sup = sensitivity_specificity_support( | ||
| y_true, | ||
| y_pred, | ||
| labels=labels, | ||
| pos_label=pos_label, | ||
| average=None, | ||
| warn_for=("specificity", "specificity"), | ||
| sample_weight=sample_weight, | ||
| ) | ||
| per_class = np.sqrt(sen * spe) | ||
| if average == "macro": | ||
| return np.mean(per_class) | ||
| # weighted: mean weighted by support (true samples per class). | ||
| # Mirror sklearn's behaviour and return 0 when the total | ||
| # support is zero rather than producing a NaN from 0/0. | ||
|
Comment on lines
+698
to
+700
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also no need for this. |
||
| total = sup.sum() | ||
| if total == 0: | ||
| return 0.0 | ||
| return np.average(per_class, weights=sup) | ||
|
|
||
| sen, spe, _ = sensitivity_specificity_support( | ||
| y_true, | ||
| y_pred, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -229,9 +229,13 @@ def test_geometric_mean_multiclass(y_true, y_pred, correction, expected_gmean): | |
| @pytest.mark.parametrize( | ||
| "y_true, y_pred, average, expected_gmean", | ||
| [ | ||
| ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "macro", 0.471), | ||
| # Macro / weighted G-mean is the (weighted) mean of the per-class | ||
| # G-mean array — which for this input is [0.866, 0, 0]. Their | ||
| # value is therefore 0.866/3 = 0.2887, NOT sqrt(macro_sen * | ||
| # macro_spe) = 0.471 as the metric used to return before #1096. | ||
|
Comment on lines
+232
to
+235
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this comment. |
||
| ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "macro", 0.2887), | ||
| ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "micro", 0.471), | ||
| ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "weighted", 0.471), | ||
| ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "weighted", 0.2887), | ||
| ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], None, [0.8660254, 0.0, 0.0]), | ||
| ], | ||
| ) | ||
|
|
@@ -251,12 +255,18 @@ def test_geometric_mean_average(y_true, y_pred, average, expected_gmean): | |
| "multiclass", | ||
| 0.707, | ||
| ), | ||
| # weighted G-mean with labels=[0, 1] and the given sample | ||
| # weights: per-class (sen, spe) restricted to [0, 1] are | ||
| # [(1.0, 0.5), (0.5, 0.0)] with supports [2, 4]; per-class | ||
| # G-means = [sqrt(0.5), 0] = [0.707, 0]; weighted average = | ||
| # (2*0.707 + 4*0)/6 = 0.236. Old expected 0.333 came from | ||
| # sqrt(weighted_sen * weighted_spe), the bug fixed in #1096. | ||
|
Comment on lines
+258
to
+263
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this comment |
||
| ( | ||
| [0, 1, 2, 0, 1, 2], | ||
| [0, 1, 1, 0, 0, 1], | ||
| [1, 2, 1, 1, 2, 1], | ||
| "weighted", | ||
| 0.333, | ||
| 0.236, | ||
| ), | ||
| ], | ||
| ) | ||
|
|
@@ -278,8 +288,13 @@ def test_geometric_mean_sample_weight( | |
| [ | ||
| ("multiclass", 0.36), | ||
| (None, [0.82, 0.24, 0.72]), | ||
| ("macro", 0.67), | ||
| ("weighted", 0.64), | ||
| # macro / weighted updated in #1096: now mean(per-class gmean) | ||
| # rather than sqrt(macro_sen * macro_spe). Per-class G-means | ||
| # for this fixture are [0.82, 0.24, 0.72]; macro = mean = 0.59; | ||
| # weighted mean over class supports (which differ slightly due | ||
| # to the 50/50 train/test split) = 0.55. | ||
|
Comment on lines
+291
to
+295
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove this comment |
||
| ("macro", 0.59), | ||
| ("weighted", 0.55), | ||
| ], | ||
| ) | ||
| def test_geometric_mean_score_prediction(average, expected_gmean): | ||
|
|
@@ -289,6 +304,29 @@ def test_geometric_mean_score_prediction(average, expected_gmean): | |
| assert gmean == pytest.approx(expected_gmean, rel=R_TOL) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "y_true, y_pred", | ||
| [ | ||
| # binary case from #1096 | ||
| ([0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 1]), | ||
| # binary, fully balanced | ||
| ([0, 1, 0, 1], [0, 1, 1, 0]), | ||
| # 3-class, asymmetric errors | ||
| ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 1, 2]), | ||
| ], | ||
| ) | ||
| def test_geometric_mean_macro_equals_mean_of_per_class(y_true, y_pred): | ||
| # Non-regression test for #1096: the macro G-mean MUST equal the | ||
| # arithmetic mean of the per-class G-mean array. Before #1096 the | ||
| # macro path returned sqrt(macro_sen * macro_spe), which generally | ||
| # disagrees with mean(per_class_gmean) — most visibly on binary | ||
| # inputs, where the original report observed 0.745 (correct ~0.577) | ||
| # for y_true=[0,0,1,0,1,1], y_pred=[0,0,0,0,0,1]. | ||
|
Comment on lines
+319
to
+324
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need all this text. You only need to mention that it is a non-regression test and link the PR (because we did not have an issue) |
||
| per_class = geometric_mean_score(y_true, y_pred, average=None) | ||
| macro = geometric_mean_score(y_true, y_pred, average="macro") | ||
| assert macro == pytest.approx(np.mean(per_class)) | ||
|
|
||
|
|
||
| def test_iba_geo_mean_binary(): | ||
| y_true, y_pred, _ = make_prediction(binary=True) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you remove this comment.