Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/whats_new/v0.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ Changelog
Bug fixes
.........

- Fix :func:`~imblearn.metrics.geometric_mean_score` with
``average='macro'`` (and ``'weighted'``): the function now returns the
(weighted) mean of the per-class G-means, matching ``np.mean`` of
``geometric_mean_score(..., average=None)`` as users expect. The
previous implementation returned ``sqrt(macro_sen * macro_spe)``,
which disagreed with the per-class average — most visibly on binary
inputs. :pr:`1173` by :user:`jbbqqf`.

Enhancements
............

Expand Down
36 changes: 34 additions & 2 deletions imblearn/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,15 +662,47 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
>>> geometric_mean_score(y_true, y_pred, correction=0.001)
0.010...
>>> geometric_mean_score(y_true, y_pred, average='macro')
0.471...
0.288...
>>> geometric_mean_score(y_true, y_pred, average='micro')
0.471...
>>> geometric_mean_score(y_true, y_pred, average='weighted')
0.471...
0.288...
>>> geometric_mean_score(y_true, y_pred, average=None)
array([0.866..., 0. , 0. ])
"""
if average is None or average != "multiclass":
# For macro/weighted averaging the previous implementation passed
# ``average`` directly to sensitivity_specificity_support, which
# returns the macro/weighted average of sensitivity and
# specificity, and then took ``sqrt(sen * spe)`` of those scalars.
# That is the geometric mean of mean-sensitivity and
# mean-specificity, NOT the mean of per-class G-means — and the two
# disagree on binary inputs (see #1096). Compute the per-class
# G-mean first and aggregate, so that
# ``geometric_mean_score(..., average='macro')`` equals
# ``np.mean(geometric_mean_score(..., average=None))`` as users
# rightly expect.
Comment on lines +674 to +684

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove this comment.

if average in ("macro", "weighted"):
sen, spe, sup = sensitivity_specificity_support(
y_true,
y_pred,
labels=labels,
pos_label=pos_label,
average=None,
warn_for=("specificity", "specificity"),
sample_weight=sample_weight,
)
per_class = np.sqrt(sen * spe)
if average == "macro":
return np.mean(per_class)
# weighted: mean weighted by support (true samples per class).
# Mirror sklearn's behaviour and return 0 when the total
# support is zero rather than producing a NaN from 0/0.
Comment on lines +698 to +700

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also no need for this.

total = sup.sum()
if total == 0:
return 0.0
return np.average(per_class, weights=sup)

sen, spe, _ = sensitivity_specificity_support(
y_true,
y_pred,
Expand Down
48 changes: 43 additions & 5 deletions imblearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,13 @@ def test_geometric_mean_multiclass(y_true, y_pred, correction, expected_gmean):
@pytest.mark.parametrize(
"y_true, y_pred, average, expected_gmean",
[
([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "macro", 0.471),
# Macro / weighted G-mean is the (weighted) mean of the per-class
# G-mean array — which for this input is [0.866, 0, 0]. Their
# value is therefore 0.866/3 = 0.2887, NOT sqrt(macro_sen *
# macro_spe) = 0.471 as the metric used to return before #1096.
Comment on lines +232 to +235

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this comment.

([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "macro", 0.2887),
([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "micro", 0.471),
([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "weighted", 0.471),
([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], "weighted", 0.2887),
([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], None, [0.8660254, 0.0, 0.0]),
],
)
Expand All @@ -251,12 +255,18 @@ def test_geometric_mean_average(y_true, y_pred, average, expected_gmean):
"multiclass",
0.707,
),
# weighted G-mean with labels=[0, 1] and the given sample
# weights: per-class (sen, spe) restricted to [0, 1] are
# [(1.0, 0.5), (0.5, 0.0)] with supports [2, 4]; per-class
# G-means = [sqrt(0.5), 0] = [0.707, 0]; weighted average =
# (2*0.707 + 4*0)/6 = 0.236. Old expected 0.333 came from
# sqrt(weighted_sen * weighted_spe), the bug fixed in #1096.
Comment on lines +258 to +263

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this comment

(
[0, 1, 2, 0, 1, 2],
[0, 1, 1, 0, 0, 1],
[1, 2, 1, 1, 2, 1],
"weighted",
0.333,
0.236,
),
],
)
Expand All @@ -278,8 +288,13 @@ def test_geometric_mean_sample_weight(
[
("multiclass", 0.36),
(None, [0.82, 0.24, 0.72]),
("macro", 0.67),
("weighted", 0.64),
# macro / weighted updated in #1096: now mean(per-class gmean)
# rather than sqrt(macro_sen * macro_spe). Per-class G-means
# for this fixture are [0.82, 0.24, 0.72]; macro = mean = 0.59;
# weighted mean over class supports (which differ slightly due
# to the 50/50 train/test split) = 0.55.
Comment on lines +291 to +295

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this comment

("macro", 0.59),
("weighted", 0.55),
],
)
def test_geometric_mean_score_prediction(average, expected_gmean):
Expand All @@ -289,6 +304,29 @@ def test_geometric_mean_score_prediction(average, expected_gmean):
assert gmean == pytest.approx(expected_gmean, rel=R_TOL)


@pytest.mark.parametrize(
"y_true, y_pred",
[
# binary case from #1096
([0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 1]),
# binary, fully balanced
([0, 1, 0, 1], [0, 1, 1, 0]),
# 3-class, asymmetric errors
([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 1, 2]),
],
)
def test_geometric_mean_macro_equals_mean_of_per_class(y_true, y_pred):
# Non-regression test for #1096: the macro G-mean MUST equal the
# arithmetic mean of the per-class G-mean array. Before #1096 the
# macro path returned sqrt(macro_sen * macro_spe), which generally
# disagrees with mean(per_class_gmean) — most visibly on binary
# inputs, where the original report observed 0.745 (correct ~0.577)
# for y_true=[0,0,1,0,1,1], y_pred=[0,0,0,0,0,1].
Comment on lines +319 to +324

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need all this text. You only need to mention that it is a non-regression test and link the PR (because we did not have an issue)

per_class = geometric_mean_score(y_true, y_pred, average=None)
macro = geometric_mean_score(y_true, y_pred, average="macro")
assert macro == pytest.approx(np.mean(per_class))


def test_iba_geo_mean_binary():
y_true, y_pred, _ = make_prediction(binary=True)

Expand Down