From 66755addf822a8013ca0e0a6292a3df58faaa7d7 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 09:09:33 +0000
Subject: [PATCH 01/32] add available device to test_canberra_metric.py

---
 .../regression/test_canberra_metric.py        | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index eaaee884768a..054f95529f5a 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -20,14 +20,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute():
+def test_compute(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = CanberraMetric()
+    m = CanberraMetric(device=available_device)
+    assert m._device == torch.device(available_device)
 
     canberra = DistanceMetric.get_metric("canberra")
 
@@ -58,8 +59,8 @@ def test_compute():
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -68,7 +69,9 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = CanberraMetric()
+        m = CanberraMetric(device=device)
+        assert m._device == torch.device(device)
+
         m.attach(engine, "cm")
 
         np_y = y.numpy().ravel()
@@ -92,11 +95,12 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+            _test(y_pred, y, batch_size, device=available_device)
 
 
-def test_error_is_not_nan():
-    m = CanberraMetric()
+def test_error_is_not_nan(available_device):
+    m = CanberraMetric(device=available_device)
+    assert m._device == torch.device(available_device)
     m.update((torch.zeros(4), torch.zeros(4)))
     assert not (torch.isnan(m._sum_of_errors).any() or torch.isinf(m._sum_of_errors).any()), m._sum_of_errors
 

From 9229e3b2d8e085f957dbf2d06eb97a2fb5716e7b Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 09:30:22 +0000
Subject: [PATCH 02/32] add _double_dtype ad dtype when transfrring errors to
 device

---
 ignite/metrics/regression/canberra_metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ignite/metrics/regression/canberra_metric.py b/ignite/metrics/regression/canberra_metric.py
index f8bd2732a386..45abd4abb943 100644
--- a/ignite/metrics/regression/canberra_metric.py
+++ b/ignite/metrics/regression/canberra_metric.py
@@ -74,7 +74,7 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = torch.abs(y - y_pred) / (torch.abs(y_pred) + torch.abs(y) + 1e-15)
-        self._sum_of_errors += torch.sum(errors).to(self._device)
+        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
 
     @sync_all_reduce("_sum_of_errors")
     def compute(self) -> float:

From 2f6320aa780e088017a5d1bea58a104aa4fe72fe Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 09:51:25 +0000
Subject: [PATCH 03/32] available devices in test_fractional_absolute_error.py,
 test_fractional_bias.py, test_geometric_mean_absolute_error.py

---
 .../test_fractional_absolute_error.py         | 14 +++++++------
 .../regression/test_fractional_bias.py        | 20 +++++++++++--------
 .../test_geometric_mean_absolute_error.py     | 13 +++++++-----
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_fractional_absolute_error.py b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
index c1c3b080576f..6b07d8e8de49 100644
--- a/tests/ignite/metrics/regression/test_fractional_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
@@ -28,14 +28,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute():
+def test_compute(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = FractionalAbsoluteError()
+    m = FractionalAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = (2 * np.abs((a - ground_truth)) / (np.abs(a) + np.abs(ground_truth))).sum()
@@ -62,8 +63,8 @@ def test_compute():
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -72,7 +73,8 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = FractionalAbsoluteError()
+        m = FractionalAbsoluteError(device=device)
+        assert m._device == torch.device(device)
         m.attach(engine, "fab")
 
         np_y = y.numpy().ravel()
@@ -98,7 +100,7 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+            _test(y_pred, y, batch_size, device=available_device)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_fractional_bias.py b/tests/ignite/metrics/regression/test_fractional_bias.py
index bf78d4870d5b..8ec9862ea6bb 100644
--- a/tests/ignite/metrics/regression/test_fractional_bias.py
+++ b/tests/ignite/metrics/regression/test_fractional_bias.py
@@ -28,14 +28,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_fractional_bias():
+def test_fractional_bias(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = FractionalBias()
+    m = FractionalBias(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = (2 * (ground_truth - a) / (a + ground_truth)).sum()
@@ -62,8 +63,8 @@ def test_fractional_bias():
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -72,7 +73,9 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = FractionalBias()
+        m = FractionalBias(device=device)
+        assert m._device == torch.device(device)
+
         m.attach(engine, "fb")
 
         np_y = y.double().numpy().ravel()
@@ -98,11 +101,12 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+            _test(y_pred, y, batch_size, device=available_device)
 
 
-def test_error_is_not_nan():
-    m = FractionalBias()
+def test_error_is_not_nan(available_device):
+    m = FractionalBias(device=available_device)
+    assert m._device == torch.device(available_device)
     m.update((torch.zeros(4), torch.zeros(4)))
     assert not (torch.isnan(m._sum_of_errors).any() or torch.isinf(m._sum_of_errors).any()), m._sum_of_errors
 
diff --git a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
index 05f023691a54..1e99e7eec0eb 100644
--- a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
@@ -28,7 +28,7 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute():
+def test_compute(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
@@ -36,7 +36,8 @@ def test_compute():
     ground_truth = np.random.randn(4)
     np_prod = 1.0
 
-    m = GeometricMeanAbsoluteError()
+    m = GeometricMeanAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
 
     errors = np.abs(ground_truth - a)
@@ -67,8 +68,8 @@ def test_compute():
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -77,7 +78,9 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = GeometricMeanAbsoluteError()
+        m = GeometricMeanAbsoluteError(device=device)
+        assert m._device == torch.device(device)
+
         m.attach(engine, "gmae")
 
         np_y = y.numpy().ravel()

From 557f549b86eb53d556f2c1bcd05f17ddda19c2d2 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 10:20:31 +0000
Subject: [PATCH 04/32] when transferring to device use dtype

---
 ignite/metrics/regression/fractional_absolute_error.py |  2 +-
 ignite/metrics/regression/fractional_bias.py           |  2 +-
 .../regression/geometric_mean_absolute_error.py        |  2 +-
 .../geometric_mean_relative_absolute_error.py          |  4 ++--
 ignite/metrics/regression/manhattan_distance.py        |  2 +-
 .../metrics/regression/mean_absolute_relative_error.py |  4 +++-
 ignite/metrics/regression/mean_normalized_bias.py      |  2 +-
 ignite/metrics/regression/pearson_correlation.py       | 10 +++++-----
 ignite/metrics/regression/r2_score.py                  |  6 +++---
 ignite/metrics/regression/wave_hedges_distance.py      |  2 +-
 10 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/ignite/metrics/regression/fractional_absolute_error.py b/ignite/metrics/regression/fractional_absolute_error.py
index c66e8e780f70..28c35596a329 100644
--- a/ignite/metrics/regression/fractional_absolute_error.py
+++ b/ignite/metrics/regression/fractional_absolute_error.py
@@ -70,7 +70,7 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = 2 * torch.abs(y.view_as(y_pred) - y_pred) / (torch.abs(y_pred) + torch.abs(y.view_as(y_pred)))
-        self._sum_of_errors += torch.sum(errors).to(self._device)
+        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_num_examples", "_sum_of_errors")
diff --git a/ignite/metrics/regression/fractional_bias.py b/ignite/metrics/regression/fractional_bias.py
index 7164cd4f1667..bac6c67bcf5b 100644
--- a/ignite/metrics/regression/fractional_bias.py
+++ b/ignite/metrics/regression/fractional_bias.py
@@ -70,7 +70,7 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = 2 * (y.view_as(y_pred) - y_pred) / (y_pred + y.view_as(y_pred) + 1e-30)
-        self._sum_of_errors += torch.sum(errors).to(self._device)
+        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_sum_of_errors", "_num_examples")
diff --git a/ignite/metrics/regression/geometric_mean_absolute_error.py b/ignite/metrics/regression/geometric_mean_absolute_error.py
index 92f61192839c..3f6f35176fb3 100644
--- a/ignite/metrics/regression/geometric_mean_absolute_error.py
+++ b/ignite/metrics/regression/geometric_mean_absolute_error.py
@@ -70,7 +70,7 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = torch.log(torch.abs(y.view_as(y_pred) - y_pred))
-        self._sum_of_errors += torch.sum(errors).to(self._device)
+        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_sum_of_errors", "_num_examples")
diff --git a/ignite/metrics/regression/geometric_mean_relative_absolute_error.py b/ignite/metrics/regression/geometric_mean_relative_absolute_error.py
index 8e2bfb9c0457..a9e639589729 100644
--- a/ignite/metrics/regression/geometric_mean_relative_absolute_error.py
+++ b/ignite/metrics/regression/geometric_mean_relative_absolute_error.py
@@ -80,8 +80,8 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
 
-        y_pred = y_pred.clone().to(self._device)
-        y = y.clone().to(self._device)
+        y_pred = y_pred.clone().to(dtype=self._double_dtype, device=self._device)
+        y = y.clone().to(dtype=self._double_dtype, device=self._device)
 
         self._predictions.append(y_pred)
         self._targets.append(y)
diff --git a/ignite/metrics/regression/manhattan_distance.py b/ignite/metrics/regression/manhattan_distance.py
index 9746fece91ea..dd71ff792670 100644
--- a/ignite/metrics/regression/manhattan_distance.py
+++ b/ignite/metrics/regression/manhattan_distance.py
@@ -70,7 +70,7 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output
         errors = torch.abs(y - y_pred)
-        self._sum_of_errors += torch.sum(errors).to(self._device)
+        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
 
     @sync_all_reduce("_sum_of_errors")
     def compute(self) -> float:
diff --git a/ignite/metrics/regression/mean_absolute_relative_error.py b/ignite/metrics/regression/mean_absolute_relative_error.py
index 1551ff9be720..a851aa4fb5a7 100644
--- a/ignite/metrics/regression/mean_absolute_relative_error.py
+++ b/ignite/metrics/regression/mean_absolute_relative_error.py
@@ -72,7 +72,9 @@ def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         if (y == 0).any():
             raise NotComputableError("The ground truth has 0.")
         absolute_error = torch.abs(y_pred - y.view_as(y_pred)) / torch.abs(y.view_as(y_pred))
-        self._sum_of_absolute_relative_errors += torch.sum(absolute_error).to(self._device)
+        self._sum_of_absolute_relative_errors += torch.sum(absolute_error).to(
+            dtype=self._double_dtype, device=self._device
+        )
         self._num_samples += y.size()[0]
 
     @sync_all_reduce("_sum_of_absolute_relative_errors", "_num_samples")
diff --git a/ignite/metrics/regression/mean_normalized_bias.py b/ignite/metrics/regression/mean_normalized_bias.py
index 9ac2e244dd00..e289361db7d8 100644
--- a/ignite/metrics/regression/mean_normalized_bias.py
+++ b/ignite/metrics/regression/mean_normalized_bias.py
@@ -74,7 +74,7 @@ def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
             raise NotComputableError("The ground truth has 0.")
 
         errors = (y.view_as(y_pred) - y_pred) / y
-        self._sum_of_errors += torch.sum(errors).to(self._device)
+        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_sum_of_errors", "_num_examples")
diff --git a/ignite/metrics/regression/pearson_correlation.py b/ignite/metrics/regression/pearson_correlation.py
index 2abe8a40e171..7b90df23f3ab 100644
--- a/ignite/metrics/regression/pearson_correlation.py
+++ b/ignite/metrics/regression/pearson_correlation.py
@@ -87,11 +87,11 @@ def reset(self) -> None:
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
-        self._sum_of_y_preds += y_pred.sum().to(self._device)
-        self._sum_of_ys += y.sum().to(self._device)
-        self._sum_of_y_pred_squares += y_pred.square().sum().to(self._device)
-        self._sum_of_y_squares += y.square().sum().to(self._device)
-        self._sum_of_products += (y_pred * y).sum().to(self._device)
+        self._sum_of_y_preds += y_pred.sum().to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_ys += y.sum().to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_y_pred_squares += y_pred.square().sum().to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_y_squares += y.square().sum().to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_products += (y_pred * y).sum().to(dtype=self._double_dtype, device=self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce(
diff --git a/ignite/metrics/regression/r2_score.py b/ignite/metrics/regression/r2_score.py
index f4089a3e2e1c..b4a4c4370d9f 100644
--- a/ignite/metrics/regression/r2_score.py
+++ b/ignite/metrics/regression/r2_score.py
@@ -70,10 +70,10 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output
         self._num_examples += y.shape[0]
-        self._sum_of_errors += torch.sum(torch.pow(y_pred - y, 2)).to(self._device)
+        self._sum_of_errors += torch.sum(torch.pow(y_pred - y, 2)).to(dtype=self._double_dtype, device=self._device)
 
-        self._y_sum += torch.sum(y).to(self._device)
-        self._y_sq_sum += torch.sum(torch.pow(y, 2)).to(self._device)
+        self._y_sum += torch.sum(y).to(dtype=self._double_dtype, device=self._device)
+        self._y_sq_sum += torch.sum(torch.pow(y, 2)).to(dtype=self._double_dtype, device=self._device)
 
     @sync_all_reduce("_num_examples", "_sum_of_errors", "_y_sq_sum", "_y_sum")
     def compute(self) -> float:
diff --git a/ignite/metrics/regression/wave_hedges_distance.py b/ignite/metrics/regression/wave_hedges_distance.py
index c226a1eb2a86..0549067e0440 100644
--- a/ignite/metrics/regression/wave_hedges_distance.py
+++ b/ignite/metrics/regression/wave_hedges_distance.py
@@ -68,7 +68,7 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = torch.abs(y.view_as(y_pred) - y_pred) / (torch.max(y_pred, y.view_as(y_pred)) + 1e-30)
-        self._sum_of_errors += torch.sum(errors).to(self._device)
+        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
 
     @sync_all_reduce("_sum_of_errors")
     def compute(self) -> float:

From 01307730d5bbc88533beaf1650113842bc8311cd Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 10:45:22 +0000
Subject: [PATCH 05/32] add available device to tests

---
 ..._geometric_mean_relative_absolute_error.py | 10 ++++++----
 .../regression/test_kendall_correlation.py    | 10 ++++++----
 .../regression/test_manhattan_distance.py     | 19 ++++++++++--------
 .../regression/test_maximum_absolute_error.py | 14 +++++++------
 .../test_mean_absolute_relative_error.py      | 14 +++++++------
 .../metrics/regression/test_mean_error.py     | 14 +++++++------
 .../regression/test_mean_normalized_bias.py   | 12 ++++++-----
 .../regression/test_median_absolute_error.py  | 15 ++++++++------
 .../test_median_absolute_percentage_error.py  | 15 ++++++++------
 .../test_median_relative_absolute_error.py    | 15 ++++++++------
 .../regression/test_pearson_correlation.py    | 20 +++++++++++--------
 .../metrics/regression/test_r2_score.py       | 15 ++++++++------
 .../regression/test_spearman_correlation.py   | 10 ++++++----
 .../regression/test_wave_hedges_distance.py   | 10 ++++++----
 14 files changed, 114 insertions(+), 79 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py b/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
index 9f7b14422e48..b78971e7f123 100644
--- a/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
@@ -29,13 +29,14 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute():
+def test_compute(available_device):
     size = 51
     np_y_pred = np.random.rand(size)
     np_y = np.random.rand(size)
     np_gmrae = np.exp(np.log(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean())).mean())
 
-    m = GeometricMeanRelativeAbsoluteError()
+    m = GeometricMeanRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -45,7 +46,7 @@ def test_compute():
     assert np_gmrae == pytest.approx(m.compute())
 
 
-def test_integration():
+def test_integration(available_device):
     y_pred = torch.rand(size=(100,))
     y = torch.rand(size=(100,))
 
@@ -59,7 +60,8 @@ def update_fn(engine, batch):
 
     engine = Engine(update_fn)
 
-    m = GeometricMeanRelativeAbsoluteError()
+    m = GeometricMeanRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "gmrae")
 
     np_y = y.numpy().ravel()
diff --git a/tests/ignite/metrics/regression/test_kendall_correlation.py b/tests/ignite/metrics/regression/test_kendall_correlation.py
index 5dd55b0691b3..84e2f905c192 100644
--- a/tests/ignite/metrics/regression/test_kendall_correlation.py
+++ b/tests/ignite/metrics/regression/test_kendall_correlation.py
@@ -59,14 +59,15 @@ def test_wrong_variant():
 
 
 @pytest.mark.parametrize("variant", ["b", "c"])
-def test_kendall_correlation(variant: str):
+def test_kendall_correlation(variant: str, available_device):
     a = np.random.randn(4).astype(np.float32)
     b = np.random.randn(4).astype(np.float32)
     c = np.random.randn(4).astype(np.float32)
     d = np.random.randn(4).astype(np.float32)
     ground_truth = np.random.randn(4).astype(np.float32)
 
-    m = KendallRankCorrelation(variant=variant)
+    m = KendallRankCorrelation(variant=variant, device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_ans = kendalltau(a, ground_truth, variant=variant).statistic
@@ -99,7 +100,7 @@ def test_case(request):
 
 @pytest.mark.parametrize("n_times", range(5))
 @pytest.mark.parametrize("variant", ["b", "c"])
-def test_integration(n_times: int, variant: str, test_case: Tuple[Tensor, Tensor, int]):
+def test_integration(n_times: int, variant: str, test_case: Tuple[Tensor, Tensor, int], available_device):
     y_pred, y, batch_size = test_case
 
     np_y = y.numpy().ravel()
@@ -113,7 +114,8 @@ def update_fn(engine: Engine, batch):
 
     engine = Engine(update_fn)
 
-    m = KendallRankCorrelation(variant=variant)
+    m = KendallRankCorrelation(variant=variant, device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "kendall_tau")
 
     data = list(range(y_pred.shape[0] // batch_size))
diff --git a/tests/ignite/metrics/regression/test_manhattan_distance.py b/tests/ignite/metrics/regression/test_manhattan_distance.py
index 50bdf321164b..406a66bee571 100644
--- a/tests/ignite/metrics/regression/test_manhattan_distance.py
+++ b/tests/ignite/metrics/regression/test_manhattan_distance.py
@@ -20,14 +20,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mahattan_distance():
+def test_mahattan_distance(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = ManhattanDistance()
+    m = ManhattanDistance(device=available_device)
+    assert m._device == torch.device(available_device)
 
     manhattan = DistanceMetric.get_metric("manhattan")
 
@@ -58,8 +59,8 @@ def test_mahattan_distance():
     assert manhattan.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -68,7 +69,8 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = ManhattanDistance()
+        m = ManhattanDistance(device=device)
+        assert m._device == torch.device(device)
         m.attach(engine, "md")
 
         np_y = y.numpy().ravel()
@@ -92,11 +94,12 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+            _test(y_pred, y, batch_size, device=available_device)
 
 
-def test_error_is_not_nan():
-    m = ManhattanDistance()
+def test_error_is_not_nan(available_device):
+    m = ManhattanDistance(device=available_device)
+    assert m._device == torch.device(available_device)
     m.update((torch.zeros(4), torch.zeros(4)))
     assert not (torch.isnan(m._sum_of_errors).any() or torch.isinf(m._sum_of_errors).any()), m._sum_of_errors
 
diff --git a/tests/ignite/metrics/regression/test_maximum_absolute_error.py b/tests/ignite/metrics/regression/test_maximum_absolute_error.py
index e5e0fb4369ff..39558e943bc3 100644
--- a/tests/ignite/metrics/regression/test_maximum_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_maximum_absolute_error.py
@@ -28,14 +28,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_maximum_absolute_error():
+def test_maximum_absolute_error(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = MaximumAbsoluteError()
+    m = MaximumAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     np_ans = -1
 
@@ -60,8 +61,8 @@ def test_maximum_absolute_error():
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -70,7 +71,8 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MaximumAbsoluteError()
+        m = MaximumAbsoluteError(device=device)
+        assert m._device == torch.device(device)
         m.attach(engine, "mae")
 
         np_y = y.numpy().ravel()
@@ -94,7 +96,7 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+            _test(y_pred, y, batch_size, device=available_device)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py b/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py
index 25b39860ed5b..98f1635aef5f 100644
--- a/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py
+++ b/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py
@@ -21,14 +21,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mean_absolute_relative_error():
+def test_mean_absolute_relative_error(available_device):
     a = torch.rand(4)
     b = torch.rand(4)
     c = torch.rand(4)
     d = torch.rand(4)
     ground_truth = torch.rand(4)
 
-    m = MeanAbsoluteRelativeError()
+    m = MeanAbsoluteRelativeError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((a, ground_truth))
     abs_error_a = torch.sum(torch.abs(ground_truth - a) / torch.abs(ground_truth))
@@ -78,8 +79,8 @@ def test_zero_sample():
         m.compute()
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -88,7 +89,8 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MeanAbsoluteRelativeError()
+        m = MeanAbsoluteRelativeError(device=device)
+        assert m._device == torch.device(device)
         m.attach(engine, "mare")
 
         np_y = y.numpy().ravel()
@@ -114,7 +116,7 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+            _test(y_pred, y, batch_size, device=available_device)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_mean_error.py b/tests/ignite/metrics/regression/test_mean_error.py
index e0e7fc975606..61c64e54e31e 100644
--- a/tests/ignite/metrics/regression/test_mean_error.py
+++ b/tests/ignite/metrics/regression/test_mean_error.py
@@ -26,14 +26,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mean_error():
+def test_mean_error(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = MeanError()
+    m = MeanError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = (ground_truth - a).sum()
@@ -60,8 +61,8 @@ def test_mean_error():
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -70,7 +71,8 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MeanError()
+        m = MeanError(device=device)
+        assert m._device == torch.device(device)
         m.attach(engine, "me")
 
         np_y = y.numpy().ravel()
@@ -95,7 +97,7 @@ def get_test_cases():
     for _ in range(5):
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+            _test(y_pred, y, batch_size, device=available_device)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_mean_normalized_bias.py b/tests/ignite/metrics/regression/test_mean_normalized_bias.py
index 66129ee7d595..ebded53405ce 100644
--- a/tests/ignite/metrics/regression/test_mean_normalized_bias.py
+++ b/tests/ignite/metrics/regression/test_mean_normalized_bias.py
@@ -38,14 +38,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mean_error():
+def test_mean_error(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = MeanNormalizedBias()
+    m = MeanNormalizedBias(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = ((ground_truth - a) / ground_truth).sum()
@@ -72,8 +73,8 @@ def test_mean_error():
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration():
-    def _test(y_pred, y, batch_size):
+def test_integration(available_device):
+    def _test(y_pred, y, batch_size, device="cpu"):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -82,7 +83,8 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MeanNormalizedBias()
+        m = MeanNormalizedBias(device=device)
+        assert m._device == torch.device(device)
         m.attach(engine, "mnb")
 
         np_y = y.numpy().ravel()
diff --git a/tests/ignite/metrics/regression/test_median_absolute_error.py b/tests/ignite/metrics/regression/test_median_absolute_error.py
index 7ea373e46a7e..29b8c55f4c90 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_error.py
@@ -34,7 +34,7 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_absolute_error():
+def test_median_absolute_error(available_device):
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
@@ -45,7 +45,8 @@ def test_median_absolute_error():
     np_y = np.random.rand(size)
     np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
 
-    m = MedianAbsoluteError()
+    m = MedianAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -55,7 +56,7 @@ def test_median_absolute_error():
     assert np_median_absolute_error == pytest.approx(m.compute())
 
 
-def test_median_absolute_error_2():
+def test_median_absolute_error_2(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -63,7 +64,8 @@ def test_median_absolute_error_2():
     np.random.shuffle(np_y)
     np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
 
-    m = MedianAbsoluteError()
+    m = MedianAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -77,7 +79,7 @@ def test_median_absolute_error_2():
     assert np_median_absolute_error == pytest.approx(m.compute())
 
 
-def test_integration_median_absolute_error():
+def test_integration_median_absolute_error(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -95,7 +97,8 @@ def update_fn(engine, batch):
 
     engine = Engine(update_fn)
 
-    m = MedianAbsoluteError()
+    m = MedianAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "median_absolute_error")
 
     data = list(range(size // batch_size))
diff --git a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
index fa8c549a5070..ec3858a9125b 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
@@ -34,7 +34,7 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_absolute_percentage_error():
+def test_median_absolute_percentage_error(available_device):
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
@@ -45,7 +45,8 @@ def test_median_absolute_percentage_error():
     np_y = np.random.rand(size)
     np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
 
-    m = MedianAbsolutePercentageError()
+    m = MedianAbsolutePercentageError(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -55,7 +56,7 @@ def test_median_absolute_percentage_error():
     assert np_median_absolute_percentage_error == pytest.approx(m.compute())
 
 
-def test_median_absolute_percentage_error_2():
+def test_median_absolute_percentage_error_2(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -63,7 +64,8 @@ def test_median_absolute_percentage_error_2():
     np.random.shuffle(np_y)
     np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
 
-    m = MedianAbsolutePercentageError()
+    m = MedianAbsolutePercentageError(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -77,7 +79,7 @@ def test_median_absolute_percentage_error_2():
     assert np_median_absolute_percentage_error == pytest.approx(m.compute())
 
 
-def test_integration_median_absolute_percentage_error():
+def test_integration_median_absolute_percentage_error(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -95,7 +97,8 @@ def update_fn(engine, batch):
 
     engine = Engine(update_fn)
 
-    m = MedianAbsolutePercentageError()
+    m = MedianAbsolutePercentageError(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "median_absolute_percentage_error")
 
     data = list(range(size // batch_size))
diff --git a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
index b2574632c15c..9b711336bd75 100644
--- a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
@@ -34,7 +34,7 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_relative_absolute_error():
+def test_median_relative_absolute_error(available_device):
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
@@ -45,7 +45,8 @@ def test_median_relative_absolute_error():
     np_y = np.random.rand(size)
     np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
 
-    m = MedianRelativeAbsoluteError()
+    m = MedianRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -55,7 +56,7 @@ def test_median_relative_absolute_error():
     assert np_median_absolute_relative_error == pytest.approx(m.compute())
 
 
-def test_median_relative_absolute_error_2():
+def test_median_relative_absolute_error_2(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -63,7 +64,8 @@ def test_median_relative_absolute_error_2():
     np.random.shuffle(np_y)
     np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
 
-    m = MedianRelativeAbsoluteError()
+    m = MedianRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -77,7 +79,7 @@ def test_median_relative_absolute_error_2():
     assert np_median_absolute_relative_error == pytest.approx(m.compute())
 
 
-def test_integration_median_relative_absolute_error_with_output_transform():
+def test_integration_median_relative_absolute_error_with_output_transform(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -95,7 +97,8 @@ def update_fn(engine, batch):
 
     engine = Engine(update_fn)
 
-    m = MedianRelativeAbsoluteError()
+    m = MedianRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "median_absolute_relative_error")
 
     data = list(range(size // batch_size))
diff --git a/tests/ignite/metrics/regression/test_pearson_correlation.py b/tests/ignite/metrics/regression/test_pearson_correlation.py
index b16a54c246e8..3b3025f72eeb 100644
--- a/tests/ignite/metrics/regression/test_pearson_correlation.py
+++ b/tests/ignite/metrics/regression/test_pearson_correlation.py
@@ -43,9 +43,10 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_degenerated_sample():
+def test_degenerated_sample(available_device):
     # one sample
-    m = PearsonCorrelation()
+    m = PearsonCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.tensor([1.0])
     y = torch.tensor([1.0])
     m.update((y_pred, y))
@@ -67,14 +68,15 @@ def test_degenerated_sample():
     assert pytest.approx(np_res) == m.compute()
 
 
-def test_pearson_correlation():
+def test_pearson_correlation(available_device):
     a = np.random.randn(4).astype(np.float32)
     b = np.random.randn(4).astype(np.float32)
     c = np.random.randn(4).astype(np.float32)
     d = np.random.randn(4).astype(np.float32)
     ground_truth = np.random.randn(4).astype(np.float32)
 
-    m = PearsonCorrelation()
+    m = PearsonCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_ans = scipy_corr(a, ground_truth)
@@ -106,7 +108,7 @@ def test_case(request):
 
 
 @pytest.mark.parametrize("n_times", range(5))
-def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int]):
+def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int], available_device):
     y_pred, y, batch_size = test_case
 
     def update_fn(engine: Engine, batch):
@@ -117,7 +119,8 @@ def update_fn(engine: Engine, batch):
 
     engine = Engine(update_fn)
 
-    m = PearsonCorrelation()
+    m = PearsonCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "corr")
 
     np_y = y.numpy().ravel()
@@ -131,8 +134,9 @@ def update_fn(engine: Engine, batch):
     assert pytest.approx(np_ans, rel=2e-4) == corr
 
 
-def test_accumulator_detached():
-    corr = PearsonCorrelation()
+def test_accumulator_detached(available_device):
+    corr = PearsonCorrelation(device=available_device)
+    assert corr._device == torch.device(available_device)
 
     y_pred = torch.tensor([2.0, 3.0], requires_grad=True)
     y = torch.tensor([-2.0, -1.0])
diff --git a/tests/ignite/metrics/regression/test_r2_score.py b/tests/ignite/metrics/regression/test_r2_score.py
index 62ad0e14938f..420f56f51a83 100644
--- a/tests/ignite/metrics/regression/test_r2_score.py
+++ b/tests/ignite/metrics/regression/test_r2_score.py
@@ -27,12 +27,13 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_r2_score():
+def test_r2_score(available_device):
     size = 51
     np_y_pred = np.random.rand(size)
     np_y = np.random.rand(size)
 
-    m = R2Score()
+    m = R2Score(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -42,14 +43,15 @@ def test_r2_score():
     assert r2_score(np_y, np_y_pred) == pytest.approx(m.compute())
 
 
-def test_r2_score_2():
+def test_r2_score_2(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
     np_y = np.random.rand(size, 1)
     np.random.shuffle(np_y)
 
-    m = R2Score()
+    m = R2Score(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.from_numpy(np_y_pred)
     y = torch.from_numpy(np_y)
 
@@ -63,7 +65,7 @@ def test_r2_score_2():
     assert r2_score(np_y, np_y_pred) == pytest.approx(m.compute())
 
 
-def test_integration_r2_score():
+def test_integration_r2_score(available_device):
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -80,7 +82,8 @@ def update_fn(engine, batch):
 
     engine = Engine(update_fn)
 
-    m = R2Score()
+    m = R2Score(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "r2_score")
 
     data = list(range(size // batch_size))
diff --git a/tests/ignite/metrics/regression/test_spearman_correlation.py b/tests/ignite/metrics/regression/test_spearman_correlation.py
index 4aac6221f629..d9c4e47fa359 100644
--- a/tests/ignite/metrics/regression/test_spearman_correlation.py
+++ b/tests/ignite/metrics/regression/test_spearman_correlation.py
@@ -53,14 +53,15 @@ def test_wrong_y_dtype():
         metric.update((y_pred, y))
 
 
-def test_spearman_correlation():
+def test_spearman_correlation(available_device):
     a = np.random.randn(4).astype(np.float32)
     b = np.random.randn(4).astype(np.float32)
     c = np.random.randn(4).astype(np.float32)
     d = np.random.randn(4).astype(np.float32)
     ground_truth = np.random.randn(4).astype(np.float32)
 
-    m = SpearmanRankCorrelation()
+    m = SpearmanRankCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_ans = spearmanr(a, ground_truth).statistic
@@ -92,7 +93,7 @@ def test_case(request):
 
 
 @pytest.mark.parametrize("n_times", range(5))
-def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int]):
+def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int], available_device):
     y_pred, y, batch_size = test_case
 
     np_y = y.numpy().ravel()
@@ -106,7 +107,8 @@ def update_fn(engine: Engine, batch):
 
     engine = Engine(update_fn)
 
-    m = SpearmanRankCorrelation()
+    m = SpearmanRankCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "spearman_corr")
 
     data = list(range(y_pred.shape[0] // batch_size))
diff --git a/tests/ignite/metrics/regression/test_wave_hedges_distance.py b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
index 376dd70b8c22..33e0e4da113f 100644
--- a/tests/ignite/metrics/regression/test_wave_hedges_distance.py
+++ b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
@@ -19,14 +19,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute():
+def test_compute(available_device):
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = WaveHedgesDistance()
+    m = WaveHedgesDistance(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = (np.abs(ground_truth - a) / np.maximum.reduce([a, ground_truth])).sum()
@@ -45,7 +46,7 @@ def test_compute():
     assert m.compute() == pytest.approx(np_sum)
 
 
-def test_integration():
+def test_integration(available_device):
     def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
@@ -55,7 +56,8 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = WaveHedgesDistance()
+        m = WaveHedgesDistance(device=available_device)
+        assert m._device == torch.device(available_device)
         m.attach(engine, "whd")
 
         np_y = y.numpy().ravel()

From 94a002b1430296cf6ecc87d4fe2f15de0c738800 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 10:48:12 +0000
Subject: [PATCH 06/32] use self._double_dtype instead of torch.double

---
 ignite/metrics/regression/fractional_bias.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ignite/metrics/regression/fractional_bias.py b/ignite/metrics/regression/fractional_bias.py
index bac6c67bcf5b..3e2f0719ca74 100644
--- a/ignite/metrics/regression/fractional_bias.py
+++ b/ignite/metrics/regression/fractional_bias.py
@@ -64,7 +64,7 @@ class FractionalBias(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=torch.double, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:

From 2631377e9cec1bb0c2cf56adc7c3e715e54f550a Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 10:59:46 +0000
Subject: [PATCH 07/32] use self._double_dtype when moving to device in
 epoch_metric.py

---
 ignite/metrics/epoch_metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ignite/metrics/epoch_metric.py b/ignite/metrics/epoch_metric.py
index 5918b6428120..e662b6640518 100644
--- a/ignite/metrics/epoch_metric.py
+++ b/ignite/metrics/epoch_metric.py
@@ -129,7 +129,7 @@ def update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         if y.ndimension() == 2 and y.shape[1] == 1:
             y = y.squeeze(dim=-1)
 
-        y_pred = y_pred.clone().to(self._device)
+        y_pred = y_pred.clone().to(dtype=self._double_dtype, device=self._device)
         y = y.clone().to(self._device)
 
         self._check_type((y_pred, y))

From d5b9e5a0082b00456c17a3368807c70db4f7ea00 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 11:27:10 +0000
Subject: [PATCH 08/32] removes unnecessary tests

---
 ignite/metrics/epoch_metric.py            |  2 +-
 tests/ignite/metrics/test_epoch_metric.py | 12 ++----------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/ignite/metrics/epoch_metric.py b/ignite/metrics/epoch_metric.py
index e662b6640518..d4f583cff165 100644
--- a/ignite/metrics/epoch_metric.py
+++ b/ignite/metrics/epoch_metric.py
@@ -130,7 +130,7 @@ def update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
             y = y.squeeze(dim=-1)
 
         y_pred = y_pred.clone().to(dtype=self._double_dtype, device=self._device)
-        y = y.clone().to(self._device)
+        y = y.clone().to(dtype=self._double_dtype, device=self._device)
 
         self._check_type((y_pred, y))
         self._predictions.append(y_pred)
diff --git a/tests/ignite/metrics/test_epoch_metric.py b/tests/ignite/metrics/test_epoch_metric.py
index 5c42957cf57d..7354e869dbe4 100644
--- a/tests/ignite/metrics/test_epoch_metric.py
+++ b/tests/ignite/metrics/test_epoch_metric.py
@@ -36,14 +36,6 @@ def compute_fn(y_preds, y_targets):
     output1 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3), dtype=torch.long))
     em.update(output1)
 
-    with pytest.raises(ValueError, match=r"Incoherent types between input y_pred and stored predictions"):
-        output2 = (torch.randint(0, 5, size=(4, 3)), torch.randint(0, 2, size=(4, 3)))
-        em.update(output2)
-
-    with pytest.raises(ValueError, match=r"Incoherent types between input y and stored targets"):
-        output2 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3)).to(torch.int32))
-        em.update(output2)
-
     with pytest.raises(
         NotComputableError, match="EpochMetric must have at least one example before it can be computed"
     ):
@@ -103,7 +95,7 @@ def compute_fn(y_preds, y_targets):
     targets = torch.cat([output1[1], output2[1], output3[1]], dim=0)
 
     result = em.compute()
-    assert result == compute_fn(preds, targets)
+    assert result == pytest.approx(compute_fn(preds, targets), rel=1e-6)
 
     em.reset()
     output1 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3), dtype=torch.long))
@@ -117,7 +109,7 @@ def compute_fn(y_preds, y_targets):
     targets = torch.cat([output1[1], output2[1], output3[1]], dim=0)
 
     result = em.compute()
-    assert result == compute_fn(preds, targets)
+    assert result == pytest.approx(compute_fn(preds, targets), rel=1e-6)
 
 
 def test_bad_compute_fn():

From f99b643a973fa24d8553939f1cbc87e67aa62a5a Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 14:52:06 +0000
Subject: [PATCH 09/32] rollbacks changes in epoch_metric.py

---
 ignite/metrics/epoch_metric.py            | 4 ++--
 tests/ignite/metrics/test_epoch_metric.py | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ignite/metrics/epoch_metric.py b/ignite/metrics/epoch_metric.py
index d4f583cff165..5918b6428120 100644
--- a/ignite/metrics/epoch_metric.py
+++ b/ignite/metrics/epoch_metric.py
@@ -129,8 +129,8 @@ def update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         if y.ndimension() == 2 and y.shape[1] == 1:
             y = y.squeeze(dim=-1)
 
-        y_pred = y_pred.clone().to(dtype=self._double_dtype, device=self._device)
-        y = y.clone().to(dtype=self._double_dtype, device=self._device)
+        y_pred = y_pred.clone().to(self._device)
+        y = y.clone().to(self._device)
 
         self._check_type((y_pred, y))
         self._predictions.append(y_pred)
diff --git a/tests/ignite/metrics/test_epoch_metric.py b/tests/ignite/metrics/test_epoch_metric.py
index 7354e869dbe4..dbc5f2c9237e 100644
--- a/tests/ignite/metrics/test_epoch_metric.py
+++ b/tests/ignite/metrics/test_epoch_metric.py
@@ -36,6 +36,14 @@ def compute_fn(y_preds, y_targets):
     output1 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3), dtype=torch.long))
     em.update(output1)
 
+    with pytest.raises(ValueError, match=r"Incoherent types between input y_pred and stored predictions"):
+        output2 = (torch.randint(0, 5, size=(4, 3)), torch.randint(0, 2, size=(4, 3)))
+        em.update(output2)
+
+    with pytest.raises(ValueError, match=r"Incoherent types between input y and stored targets"):
+        output2 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3)).to(torch.int32))
+        em.update(output2)
+
     with pytest.raises(
         NotComputableError, match="EpochMetric must have at least one example before it can be computed"
     ):

From e24ce010e59b0c1e8909201e8b8128691e19d092 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 19:22:03 +0000
Subject: [PATCH 10/32] redo test_integration

---
 .../regression/test_canberra_metric.py        | 59 +++++++++----------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index 054f95529f5a..c80b1c37584e 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -59,43 +59,38 @@ def test_compute(available_device):
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
-        def update_fn(engine, batch):
-            idx = (engine.state.iteration - 1) * batch_size
-            y_true_batch = np_y[idx : idx + batch_size]
-            y_pred_batch = np_y_pred[idx : idx + batch_size]
-            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+@pytest.mark.parametrize(
+    "test_cases",
+    [
+        (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
+        (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
+    ],
+)
+def test_integration(test_cases, available_device):
+    y_pred, y, batch_size = test_cases
+
+    def update_fn(engine, batch):
+        idx = (engine.state.iteration - 1) * batch_size
+        y_true_batch = y[idx : idx + batch_size]
+        y_pred_batch = y_pred[idx : idx + batch_size]
+        return y_pred_batch, y_true_batch
+
+    engine = Engine(update_fn)
 
-        engine = Engine(update_fn)
-
-        m = CanberraMetric(device=device)
-        assert m._device == torch.device(device)
-
-        m.attach(engine, "cm")
-
-        np_y = y.numpy().ravel()
-        np_y_pred = y_pred.numpy().ravel()
-
-        canberra = DistanceMetric.get_metric("canberra")
+    m = CanberraMetric(device=available_device)
+    assert m._device == torch.device(available_device)
 
-        data = list(range(y_pred.shape[0] // batch_size))
-        cm = engine.run(data, max_epochs=1).metrics["cm"]
+    m.attach(engine, "cm")
 
-        assert canberra.pairwise([np_y_pred, np_y])[0][1] == pytest.approx(cm)
+    canberra = DistanceMetric.get_metric("canberra")
 
-    def get_test_cases():
-        test_cases = [
-            (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
-            (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
-        ]
-        return test_cases
+    data = list(range(y_pred.shape[0] // batch_size))
+    cm = engine.run(data, max_epochs=1).metrics["cm"]
 
-    for _ in range(5):
-        # check multiple random inputs as random exact occurencies are rare
-        test_cases = get_test_cases()
-        for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size, device=available_device)
+    X = y_pred.cpu().numpy().reshape(len(y_pred), -1)
+    Y = y.cpu().numpy().reshape(len(y), -1)
+    expected = np.sum(canberra.pairwise(X, Y).diagonal())
+    assert expected == pytest.approx(cm)
 
 
 def test_error_is_not_nan(available_device):

From 3dbbe1e0d7f1c53520b72e2dc65982f15dfb1bee Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 20:09:52 +0000
Subject: [PATCH 11/32] redo test_integration

---
 tests/ignite/metrics/regression/test_canberra_metric.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index c80b1c37584e..da6665772f28 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -59,6 +59,7 @@ def test_compute(available_device):
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
 
+@pytest.mark.parametrize("n_times", range(5))
 @pytest.mark.parametrize(
     "test_cases",
     [
@@ -66,13 +67,13 @@ def test_compute(available_device):
         (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
     ],
 )
-def test_integration(test_cases, available_device):
+def test_integration(n_times, test_cases, available_device):
     y_pred, y, batch_size = test_cases
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = y[idx : idx + batch_size]
-        y_pred_batch = y_pred[idx : idx + batch_size]
+        y_true_batch = y[idx : idx + batch_size].to(dtype=torch.float32)
+        y_pred_batch = y_pred[idx : idx + batch_size].to(dtype=torch.float32)
         return y_pred_batch, y_true_batch
 
     engine = Engine(update_fn)

From 1cf59fa5f67f91a5b6c3a8d2c6b7dda14754874c Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 20:31:58 +0000
Subject: [PATCH 12/32] casting of eps in _update

---
 ignite/metrics/regression/canberra_metric.py            | 3 ++-
 tests/ignite/metrics/regression/test_canberra_metric.py | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ignite/metrics/regression/canberra_metric.py b/ignite/metrics/regression/canberra_metric.py
index 45abd4abb943..8fcff16d030b 100644
--- a/ignite/metrics/regression/canberra_metric.py
+++ b/ignite/metrics/regression/canberra_metric.py
@@ -73,7 +73,8 @@ def reset(self) -> None:
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
-        errors = torch.abs(y - y_pred) / (torch.abs(y_pred) + torch.abs(y) + 1e-15)
+        eps = torch.tensor(1e-15, dtype=self._double_dtype, device=y.device)
+        errors = torch.abs(y - y_pred) / (torch.abs(y_pred) + torch.abs(y) + eps)
         self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
 
     @sync_all_reduce("_sum_of_errors")
diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index da6665772f28..17bd7d827d40 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -69,6 +69,8 @@ def test_compute(available_device):
 )
 def test_integration(n_times, test_cases, available_device):
     y_pred, y, batch_size = test_cases
+    assert y_pred.dtype == torch.float32
+    assert y.dtype == torch.float32
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
@@ -79,9 +81,11 @@ def update_fn(engine, batch):
     engine = Engine(update_fn)
 
     m = CanberraMetric(device=available_device)
+    print(f"m's dtype: {m._double_dtype}")
     assert m._device == torch.device(available_device)
 
     m.attach(engine, "cm")
+    print(f"m's dtype again: {m._double_dtype}")
 
     canberra = DistanceMetric.get_metric("canberra")
 

From 6f0599de477161b6dbb76a84c90ffe2dffd81d0a Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 24 Apr 2025 21:34:09 +0000
Subject: [PATCH 13/32] more conversions to torch

---
 .../regression/test_median_absolute_error.py  | 36 ++++++--------
 .../test_median_absolute_percentage_error.py  | 49 +++++++++++--------
 .../test_median_relative_absolute_error.py    | 44 +++++++++--------
 .../regression/test_pearson_correlation.py    | 31 +++++++++---
 4 files changed, 89 insertions(+), 71 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_median_absolute_error.py b/tests/ignite/metrics/regression/test_median_absolute_error.py
index 29b8c55f4c90..a58100b38f80 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_error.py
@@ -41,33 +41,29 @@ def test_median_absolute_error(available_device):
     # Size of dataset will be odd for these tests
 
     size = 51
-    np_y_pred = np.random.rand(size)
-    np_y = np.random.rand(size)
-    np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
+    y_pred = torch.rand(size)
+    y = torch.rand(size)
+    expected_median_absolute_error = torch.median(torch.abs((y - y_pred).cpu())).item()
 
     m = MedianAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert np_median_absolute_error == pytest.approx(m.compute())
+    assert expected_median_absolute_error == pytest.approx(m.compute())
 
 
 def test_median_absolute_error_2(available_device):
     np.random.seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+    y = y[torch.randperm(size)]
+    expected_median_absolute_error = torch.median(torch.abs(y.cpu() - y_pred.cpu())).item()
 
     m = MedianAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -76,24 +72,22 @@ def test_median_absolute_error_2(available_device):
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert np_median_absolute_error == pytest.approx(m.compute())
+    assert expected_median_absolute_error == pytest.approx(m.compute())
 
 
 def test_integration_median_absolute_error(available_device):
     np.random.seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+    y = y[torch.randperm(size)]
 
+    expected = torch.median(torch.abs(y.cpu() - y_pred.cpu())).item()
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        return y_pred[idx : idx + batch_size], y[idx : idx + batch_size]
 
     engine = Engine(update_fn)
 
@@ -104,7 +98,7 @@ def update_fn(engine, batch):
     data = list(range(size // batch_size))
     median_absolute_error = engine.run(data, max_epochs=1).metrics["median_absolute_error"]
 
-    assert np_median_absolute_error == pytest.approx(median_absolute_error)
+    assert expected == pytest.approx(median_absolute_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
index ec3858a9125b..347d97e372d0 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
@@ -41,33 +41,37 @@ def test_median_absolute_percentage_error(available_device):
     # Size of dataset will be odd for these tests
 
     size = 51
-    np_y_pred = np.random.rand(size)
-    np_y = np.random.rand(size)
-    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
+    y_pred = torch.rand(size)
+    y = torch.rand(size)
+
+    epsilon = 1e-8
+    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
+    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
 
     m = MedianAbsolutePercentageError(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert np_median_absolute_percentage_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
 def test_median_absolute_percentage_error_2(available_device):
     np.random.seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+
+    indices = torch.randperm(size)
+    y = y[indices]
+
+    epsilon = 1e-8
+    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
+    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
 
     m = MedianAbsolutePercentageError(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -76,24 +80,27 @@ def test_median_absolute_percentage_error_2(available_device):
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert np_median_absolute_percentage_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
 def test_integration_median_absolute_percentage_error(available_device):
     np.random.seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+
+    indices = torch.randperm(size)
+    y = y[indices]
+
+    epsilon = 1e-8
+    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
+    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
 
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        return y_pred[idx : idx + batch_size], y[idx : idx + batch_size]
 
     engine = Engine(update_fn)
 
@@ -104,7 +111,7 @@ def update_fn(engine, batch):
     data = list(range(size // batch_size))
     median_absolute_percentage_error = engine.run(data, max_epochs=1).metrics["median_absolute_percentage_error"]
 
-    assert np_median_absolute_percentage_error == pytest.approx(median_absolute_percentage_error)
+    assert expected == pytest.approx(median_absolute_percentage_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
index 9b711336bd75..526d2f4ad136 100644
--- a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
@@ -41,33 +41,33 @@ def test_median_relative_absolute_error(available_device):
     # Size of dataset will be odd for these tests
 
     size = 51
-    np_y_pred = np.random.rand(size)
-    np_y = np.random.rand(size)
-    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
+    y_pred = torch.rand(size)
+    y = torch.rand(size)
+
+    baseline = torch.abs(y - y.mean())
+    expected = torch.median((torch.abs(y - y_pred) / baseline).cpu()).item()
 
     m = MedianRelativeAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert np_median_absolute_relative_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
 def test_median_relative_absolute_error_2(available_device):
     np.random.seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+    y = y[torch.randperm(size)]
+
+    baseline = torch.abs(y - y.mean())
+    expected = torch.median((torch.abs(y - y_pred) / baseline).cpu()).item()
 
     m = MedianRelativeAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -76,24 +76,26 @@ def test_median_relative_absolute_error_2(available_device):
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert np_median_absolute_relative_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
 def test_integration_median_relative_absolute_error_with_output_transform(available_device):
     np.random.seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+    y = y[torch.randperm(size)]  # shuffle y
+
+    baseline = torch.abs(y - y.mean())
+    expected = torch.median((torch.abs(y - y_pred) / baseline.cpu()).cpu()).item()
 
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        y_true_batch = y[idx : idx + batch_size]
+        y_pred_batch = y_pred[idx : idx + batch_size]
+        return y_pred_batch, y_true_batch
 
     engine = Engine(update_fn)
 
@@ -104,7 +106,7 @@ def update_fn(engine, batch):
     data = list(range(size // batch_size))
     median_absolute_relative_error = engine.run(data, max_epochs=1).metrics["median_absolute_relative_error"]
 
-    assert np_median_absolute_relative_error == pytest.approx(median_absolute_relative_error)
+    assert expected == pytest.approx(median_absolute_relative_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_pearson_correlation.py b/tests/ignite/metrics/regression/test_pearson_correlation.py
index 3b3025f72eeb..e1ff4c995216 100644
--- a/tests/ignite/metrics/regression/test_pearson_correlation.py
+++ b/tests/ignite/metrics/regression/test_pearson_correlation.py
@@ -20,6 +20,25 @@ def np_corr_eps(np_y_pred: np.ndarray, np_y: np.ndarray, eps: float = 1e-8):
     return corr
 
 
+def torch_corr_eps(y_pred: torch.Tensor, y: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
+    y_pred = y_pred.to(dtype=torch.float32)
+    y = y.to(dtype=torch.float32)
+
+    y_pred_mean = y_pred.mean()
+    y_mean = y.mean()
+
+    pred_centered = y_pred - y_pred_mean
+    y_centered = y - y_mean
+
+    cov = torch.mean(pred_centered * y_centered)
+    std_pred = pred_centered.std(unbiased=False)
+    std_y = y_centered.std(unbiased=False)
+
+    denom = torch.clamp(std_pred * std_y, min=eps)
+    corr = cov / denom
+    return corr
+
+
 def scipy_corr(np_y_pred: np.ndarray, np_y: np.ndarray):
     corr = pearsonr(np_y_pred, np_y)
     return corr.statistic
@@ -51,10 +70,8 @@ def test_degenerated_sample(available_device):
     y = torch.tensor([1.0])
     m.update((y_pred, y))
 
-    np_y_pred = y_pred.numpy()
-    np_y = y_pred.numpy()
-    np_res = np_corr_eps(np_y_pred, np_y)
-    assert pytest.approx(np_res) == m.compute()
+    res = torch_corr_eps(y_pred, y)
+    assert pytest.approx(res) == m.compute()
 
     # constant samples
     m.reset()
@@ -62,10 +79,8 @@ def test_degenerated_sample(available_device):
     y = torch.zeros(10).float()
     m.update((y_pred, y))
 
-    np_y_pred = y_pred.numpy()
-    np_y = y_pred.numpy()
-    np_res = np_corr_eps(np_y_pred, np_y)
-    assert pytest.approx(np_res) == m.compute()
+    res = torch_corr_eps(y_pred, y)
+    assert pytest.approx(res) == m.compute()
 
 
 def test_pearson_correlation(available_device):

From 35527d5ce9854d22c3b5ed4cbc690d26e2d1a1a7 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 08:24:33 +0000
Subject: [PATCH 14/32] in _torch_median move output to cpu if mps
 (torch.kthvalue is not supported on MPS)

---
 ignite/metrics/regression/_base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ignite/metrics/regression/_base.py b/ignite/metrics/regression/_base.py
index b4c08b58a4ea..fd12c53ac92a 100644
--- a/ignite/metrics/regression/_base.py
+++ b/ignite/metrics/regression/_base.py
@@ -30,6 +30,9 @@ def _check_output_types(output: Tuple[torch.Tensor, torch.Tensor]) -> None:
 
 
 def _torch_median(output: torch.Tensor) -> float:
+    if output.device.type == "mps":
+        output = output.cpu()
+
     output = output.view(-1)
     len_ = len(output)
 

From c13837e1e26051fd8868b7c1e90169f6dcd73b3a Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 09:02:04 +0000
Subject: [PATCH 15/32] fixing test_degenerated_sample

---
 .../metrics/regression/pearson_correlation.py |  3 +-
 .../regression/test_pearson_correlation.py    | 35 +++++++------------
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/ignite/metrics/regression/pearson_correlation.py b/ignite/metrics/regression/pearson_correlation.py
index 7b90df23f3ab..1a81c6fbf9dc 100644
--- a/ignite/metrics/regression/pearson_correlation.py
+++ b/ignite/metrics/regression/pearson_correlation.py
@@ -119,5 +119,6 @@ def compute(self) -> float:
         y_var = self._sum_of_y_squares / n - y_mean * y_mean
         y_var = torch.clamp(y_var, min=0.0)
 
-        r = cov / torch.clamp(torch.sqrt(y_pred_var * y_var), min=self.eps)
+        denom = torch.sqrt(torch.clamp(y_pred_var * y_var, min=self.eps))
+        r = cov / denom
         return float(r.item())
diff --git a/tests/ignite/metrics/regression/test_pearson_correlation.py b/tests/ignite/metrics/regression/test_pearson_correlation.py
index e1ff4c995216..351b992a135e 100644
--- a/tests/ignite/metrics/regression/test_pearson_correlation.py
+++ b/tests/ignite/metrics/regression/test_pearson_correlation.py
@@ -20,25 +20,6 @@ def np_corr_eps(np_y_pred: np.ndarray, np_y: np.ndarray, eps: float = 1e-8):
     return corr
 
 
-def torch_corr_eps(y_pred: torch.Tensor, y: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
-    y_pred = y_pred.to(dtype=torch.float32)
-    y = y.to(dtype=torch.float32)
-
-    y_pred_mean = y_pred.mean()
-    y_mean = y.mean()
-
-    pred_centered = y_pred - y_pred_mean
-    y_centered = y - y_mean
-
-    cov = torch.mean(pred_centered * y_centered)
-    std_pred = pred_centered.std(unbiased=False)
-    std_y = y_centered.std(unbiased=False)
-
-    denom = torch.clamp(std_pred * std_y, min=eps)
-    corr = cov / denom
-    return corr
-
-
 def scipy_corr(np_y_pred: np.ndarray, np_y: np.ndarray):
     corr = pearsonr(np_y_pred, np_y)
     return corr.statistic
@@ -70,8 +51,12 @@ def test_degenerated_sample(available_device):
     y = torch.tensor([1.0])
     m.update((y_pred, y))
 
-    res = torch_corr_eps(y_pred, y)
-    assert pytest.approx(res) == m.compute()
+    np_y_pred = y_pred.cpu().numpy()
+    np_y = y_pred.cpu().numpy()
+    expected = np_corr_eps(np_y_pred, np_y)
+    actual = m.compute()
+
+    assert pytest.approx(expected) == actual
 
     # constant samples
     m.reset()
@@ -79,8 +64,12 @@ def test_degenerated_sample(available_device):
     y = torch.zeros(10).float()
     m.update((y_pred, y))
 
-    res = torch_corr_eps(y_pred, y)
-    assert pytest.approx(res) == m.compute()
+    np_y_pred = y_pred.cpu().numpy()
+    np_y = y_pred.cpu().numpy()
+    expected = np_corr_eps(np_y_pred, np_y)
+    actual = m.compute()
+
+    assert pytest.approx(expected) == actual
 
 
 def test_pearson_correlation(available_device):

From c85dab1ec3490cc9cb826d7a11e69310f41a5f7a Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 09:28:10 +0000
Subject: [PATCH 16/32] fixing test_degenerated_sample

---
 ignite/metrics/regression/_base.py               | 1 +
 ignite/metrics/regression/pearson_correlation.py | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ignite/metrics/regression/_base.py b/ignite/metrics/regression/_base.py
index fd12c53ac92a..21094eb1b6fa 100644
--- a/ignite/metrics/regression/_base.py
+++ b/ignite/metrics/regression/_base.py
@@ -30,6 +30,7 @@ def _check_output_types(output: Tuple[torch.Tensor, torch.Tensor]) -> None:
 
 
 def _torch_median(output: torch.Tensor) -> float:
+    # torch.kthvalue used later is not supported on MPS
     if output.device.type == "mps":
         output = output.cpu()
 
diff --git a/ignite/metrics/regression/pearson_correlation.py b/ignite/metrics/regression/pearson_correlation.py
index 1a81c6fbf9dc..cfa7a4028358 100644
--- a/ignite/metrics/regression/pearson_correlation.py
+++ b/ignite/metrics/regression/pearson_correlation.py
@@ -119,6 +119,12 @@ def compute(self) -> float:
         y_var = self._sum_of_y_squares / n - y_mean * y_mean
         y_var = torch.clamp(y_var, min=0.0)
 
-        denom = torch.sqrt(torch.clamp(y_pred_var * y_var, min=self.eps))
+        denom = y_pred_var * y_var
+        denom = torch.clamp(denom, min=self.eps)
+        denom = torch.sqrt(denom)
         r = cov / denom
+
+        if torch.isnan(r):
+            return 0.0
+
         return float(r.item())

From c662c4442b7eb92098f8ff77a0957b4cf00af10b Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 09:58:48 +0000
Subject: [PATCH 17/32] rename upper case variables

---
 tests/ignite/metrics/regression/test_canberra_metric.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index 17bd7d827d40..57b2b715e3e0 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -92,9 +92,9 @@ def update_fn(engine, batch):
     data = list(range(y_pred.shape[0] // batch_size))
     cm = engine.run(data, max_epochs=1).metrics["cm"]
 
-    X = y_pred.cpu().numpy().reshape(len(y_pred), -1)
-    Y = y.cpu().numpy().reshape(len(y), -1)
-    expected = np.sum(canberra.pairwise(X, Y).diagonal())
+    pred_np = y_pred.cpu().numpy().reshape(len(y_pred), -1)
+    true_np = y.cpu().numpy().reshape(len(y), -1)
+    expected = np.sum(canberra.pairwise(pred_np, true_np).diagonal())
     assert expected == pytest.approx(cm)
 
 

From e4710645ffb4bcb9dadaebb692b9f58317b9d727 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 10:00:35 +0000
Subject: [PATCH 18/32] change range to 3

---
 tests/ignite/metrics/regression/test_canberra_metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index 57b2b715e3e0..ee4976382a17 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -59,7 +59,7 @@ def test_compute(available_device):
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
 
-@pytest.mark.parametrize("n_times", range(5))
+@pytest.mark.parametrize("n_times", range(3))
 @pytest.mark.parametrize(
     "test_cases",
     [

From 37a046902fb0d11a9bc9578f995ca2700f713daa Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 10:05:44 +0000
Subject: [PATCH 19/32] rewrite test_compute

---
 .../test_fractional_absolute_error.py         | 46 ++++++++-----------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_fractional_absolute_error.py b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
index 6b07d8e8de49..bb5932439eb1 100644
--- a/tests/ignite/metrics/regression/test_fractional_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
@@ -29,38 +29,28 @@ def test_wrong_input_shapes():
 
 
 def test_compute(available_device):
-    a = np.random.randn(4)
-    b = np.random.randn(4)
-    c = np.random.randn(4)
-    d = np.random.randn(4)
-    ground_truth = np.random.randn(4)
+    a = torch.randn(4)
+    b = torch.randn(4)
+    c = torch.randn(4)
+    d = torch.randn(4)
+    ground_truth = torch.randn(4)
 
     m = FractionalAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
-    np_sum = (2 * np.abs((a - ground_truth)) / (np.abs(a) + np.abs(ground_truth))).sum()
-    np_len = len(a)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
-
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
-    np_sum += (2 * np.abs((b - ground_truth)) / (np.abs(b) + np.abs(ground_truth))).sum()
-    np_len += len(b)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
-
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
-    np_sum += (2 * np.abs((c - ground_truth)) / (np.abs(c) + np.abs(ground_truth))).sum()
-    np_len += len(c)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
-
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
-    np_sum += (2 * np.abs((d - ground_truth)) / (np.abs(d) + np.abs(ground_truth))).sum()
-    np_len += len(d)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
+    total_error = 0.0
+    total_len = 0
+
+    for pred in [a, b, c, d]:
+        m.update((pred, ground_truth))
+
+        # Compute fractional absolute error in PyTorch
+        error = 2 * torch.abs(pred - ground_truth) / (torch.abs(pred) + torch.abs(ground_truth))
+        total_error += error.sum().item()
+        total_len += len(pred)
+
+        expected = total_error / total_len
+        assert m.compute() == pytest.approx(expected)
 
 
 def test_integration(available_device):

From 71af57e8451fbab37b5ff0f4402122b087a2dcb2 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 10:07:49 +0000
Subject: [PATCH 20/32] rewrite test_fractional_bias

---
 .../regression/test_fractional_bias.py        | 46 +++++++------------
 1 file changed, 17 insertions(+), 29 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_fractional_bias.py b/tests/ignite/metrics/regression/test_fractional_bias.py
index 8ec9862ea6bb..64e19f3f6feb 100644
--- a/tests/ignite/metrics/regression/test_fractional_bias.py
+++ b/tests/ignite/metrics/regression/test_fractional_bias.py
@@ -1,6 +1,5 @@
 import os
 
-import numpy as np
 import pytest
 import torch
 
@@ -29,38 +28,27 @@ def test_wrong_input_shapes():
 
 
 def test_fractional_bias(available_device):
-    a = np.random.randn(4)
-    b = np.random.randn(4)
-    c = np.random.randn(4)
-    d = np.random.randn(4)
-    ground_truth = np.random.randn(4)
+    a = torch.randn(4)
+    b = torch.randn(4)
+    c = torch.randn(4)
+    d = torch.randn(4)
+    ground_truth = torch.randn(4)
 
     m = FractionalBias(device=available_device)
     assert m._device == torch.device(available_device)
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
-    np_sum = (2 * (ground_truth - a) / (a + ground_truth)).sum()
-    np_len = len(a)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
-
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
-    np_sum += (2 * (ground_truth - b) / (b + ground_truth)).sum()
-    np_len += len(b)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
-
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
-    np_sum += (2 * (ground_truth - c) / (c + ground_truth)).sum()
-    np_len += len(c)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
-
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
-    np_sum += (2 * (ground_truth - d) / (d + ground_truth)).sum()
-    np_len += len(d)
-    np_ans = np_sum / np_len
-    assert m.compute() == pytest.approx(np_ans)
+    total_error = 0.0
+    total_len = 0
+
+    for pred in [a, b, c, d]:
+        m.update((pred, ground_truth))
+
+        error = 2 * (ground_truth - pred) / (pred + ground_truth)
+        total_error += error.sum().item()
+        total_len += len(pred)
+
+        expected = total_error / total_len
+        assert m.compute() == pytest.approx(expected)
 
 
 def test_integration(available_device):

From d59cb6f4a2f798f4ba8c3f8e64d81be3793fb1c9 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 12:57:48 +0000
Subject: [PATCH 21/32] remove prints

---
 tests/ignite/metrics/regression/test_canberra_metric.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index ee4976382a17..993a3bd1f97d 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -81,11 +81,9 @@ def update_fn(engine, batch):
     engine = Engine(update_fn)
 
     m = CanberraMetric(device=available_device)
-    print(f"m's dtype: {m._double_dtype}")
     assert m._device == torch.device(available_device)
 
     m.attach(engine, "cm")
-    print(f"m's dtype again: {m._double_dtype}")
 
     canberra = DistanceMetric.get_metric("canberra")
 

From da2e75d60631bee92d558b05bff0438291097d29 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 13:03:18 +0000
Subject: [PATCH 22/32] rollback eps in canberra_metric.py

---
 ignite/metrics/regression/canberra_metric.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ignite/metrics/regression/canberra_metric.py b/ignite/metrics/regression/canberra_metric.py
index 8fcff16d030b..45abd4abb943 100644
--- a/ignite/metrics/regression/canberra_metric.py
+++ b/ignite/metrics/regression/canberra_metric.py
@@ -73,8 +73,7 @@ def reset(self) -> None:
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
-        eps = torch.tensor(1e-15, dtype=self._double_dtype, device=y.device)
-        errors = torch.abs(y - y_pred) / (torch.abs(y_pred) + torch.abs(y) + eps)
+        errors = torch.abs(y - y_pred) / (torch.abs(y_pred) + torch.abs(y) + 1e-15)
         self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
 
     @sync_all_reduce("_sum_of_errors")

From 0a2f6d4504f16eb299587b18786a4cbe99514738 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 25 Apr 2025 13:45:10 +0000
Subject: [PATCH 23/32] rollback test_epoch_metric.py because the changes are
 moved to a separate branch

---
 tests/ignite/metrics/test_epoch_metric.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ignite/metrics/test_epoch_metric.py b/tests/ignite/metrics/test_epoch_metric.py
index dbc5f2c9237e..5c42957cf57d 100644
--- a/tests/ignite/metrics/test_epoch_metric.py
+++ b/tests/ignite/metrics/test_epoch_metric.py
@@ -103,7 +103,7 @@ def compute_fn(y_preds, y_targets):
     targets = torch.cat([output1[1], output2[1], output3[1]], dim=0)
 
     result = em.compute()
-    assert result == pytest.approx(compute_fn(preds, targets), rel=1e-6)
+    assert result == compute_fn(preds, targets)
 
     em.reset()
     output1 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3), dtype=torch.long))
@@ -117,7 +117,7 @@ def compute_fn(y_preds, y_targets):
     targets = torch.cat([output1[1], output2[1], output3[1]], dim=0)
 
     result = em.compute()
-    assert result == pytest.approx(compute_fn(preds, targets), rel=1e-6)
+    assert result == compute_fn(preds, targets)
 
 
 def test_bad_compute_fn():

From 667332dca16d7071aebcd704ac32dd4f42291574 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Mon, 28 Apr 2025 07:07:29 +0000
Subject: [PATCH 24/32] set sum_of_errors as _double_dtype

---
 ignite/metrics/regression/canberra_metric.py               | 2 +-
 ignite/metrics/regression/fractional_absolute_error.py     | 2 +-
 ignite/metrics/regression/geometric_mean_absolute_error.py | 2 +-
 ignite/metrics/regression/manhattan_distance.py            | 2 +-
 ignite/metrics/regression/mean_error.py                    | 2 +-
 ignite/metrics/regression/mean_normalized_bias.py          | 2 +-
 ignite/metrics/regression/r2_score.py                      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/ignite/metrics/regression/canberra_metric.py b/ignite/metrics/regression/canberra_metric.py
index 45abd4abb943..d061311998b8 100644
--- a/ignite/metrics/regression/canberra_metric.py
+++ b/ignite/metrics/regression/canberra_metric.py
@@ -69,7 +69,7 @@ class CanberraMetric(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
diff --git a/ignite/metrics/regression/fractional_absolute_error.py b/ignite/metrics/regression/fractional_absolute_error.py
index 28c35596a329..a6e2841ff040 100644
--- a/ignite/metrics/regression/fractional_absolute_error.py
+++ b/ignite/metrics/regression/fractional_absolute_error.py
@@ -64,7 +64,7 @@ class FractionalAbsoluteError(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
diff --git a/ignite/metrics/regression/geometric_mean_absolute_error.py b/ignite/metrics/regression/geometric_mean_absolute_error.py
index 3f6f35176fb3..b1a1a3ede309 100644
--- a/ignite/metrics/regression/geometric_mean_absolute_error.py
+++ b/ignite/metrics/regression/geometric_mean_absolute_error.py
@@ -64,7 +64,7 @@ class GeometricMeanAbsoluteError(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
diff --git a/ignite/metrics/regression/manhattan_distance.py b/ignite/metrics/regression/manhattan_distance.py
index dd71ff792670..73acad26c229 100644
--- a/ignite/metrics/regression/manhattan_distance.py
+++ b/ignite/metrics/regression/manhattan_distance.py
@@ -65,7 +65,7 @@ class ManhattanDistance(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output
diff --git a/ignite/metrics/regression/mean_error.py b/ignite/metrics/regression/mean_error.py
index 91a8d90408f5..c28d35ad6133 100644
--- a/ignite/metrics/regression/mean_error.py
+++ b/ignite/metrics/regression/mean_error.py
@@ -61,7 +61,7 @@ class MeanError(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
diff --git a/ignite/metrics/regression/mean_normalized_bias.py b/ignite/metrics/regression/mean_normalized_bias.py
index e289361db7d8..148940867ba0 100644
--- a/ignite/metrics/regression/mean_normalized_bias.py
+++ b/ignite/metrics/regression/mean_normalized_bias.py
@@ -64,7 +64,7 @@ class MeanNormalizedBias(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
diff --git a/ignite/metrics/regression/r2_score.py b/ignite/metrics/regression/r2_score.py
index b4a4c4370d9f..931e92a0a994 100644
--- a/ignite/metrics/regression/r2_score.py
+++ b/ignite/metrics/regression/r2_score.py
@@ -63,7 +63,7 @@ class R2Score(_BaseRegression):
     @reinit__is_reduced
     def reset(self) -> None:
         self._num_examples = 0
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
         self._y_sq_sum = torch.tensor(0.0, device=self._device)
         self._y_sum = torch.tensor(0.0, device=self._device)
 

From 579d0359b314d4771218af2aa8625ac99782e0a5 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Mon, 28 Apr 2025 09:36:41 +0000
Subject: [PATCH 25/32] use torch instead of numpy where possible in
 test_canberra_metric.py

---
 .../regression/test_canberra_metric.py        | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index 993a3bd1f97d..fa67731f8664 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -21,38 +21,38 @@ def test_wrong_input_shapes():
 
 
 def test_compute(available_device):
-    a = np.random.randn(4)
-    b = np.random.randn(4)
-    c = np.random.randn(4)
-    d = np.random.randn(4)
-    ground_truth = np.random.randn(4)
+    a = torch.randn(4)
+    b = torch.randn(4)
+    c = torch.randn(4)
+    d = torch.randn(4)
+    ground_truth = torch.randn(4)
 
     m = CanberraMetric(device=available_device)
     assert m._device == torch.device(available_device)
 
     canberra = DistanceMetric.get_metric("canberra")
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
-    np_sum = (np.abs(ground_truth - a) / (np.abs(a) + np.abs(ground_truth))).sum()
+    m.update((a, ground_truth))
+    np_sum = (torch.abs(ground_truth - a) / (torch.abs(a) + torch.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
-    assert canberra.pairwise([a, ground_truth])[0][1] == pytest.approx(np_sum)
+    assert canberra.pairwise([a.cpu().numpy(), ground_truth.cpu().numpy()])[0][1] == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
-    np_sum += ((np.abs(ground_truth - b)) / (np.abs(b) + np.abs(ground_truth))).sum()
+    m.update((b, ground_truth))
+    np_sum += ((torch.abs(ground_truth - b)) / (torch.abs(b) + torch.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([a, b])
     v2 = np.hstack([ground_truth, ground_truth])
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
-    np_sum += ((np.abs(ground_truth - c)) / (np.abs(c) + np.abs(ground_truth))).sum()
+    m.update((c, ground_truth))
+    np_sum += ((torch.abs(ground_truth - c)) / (torch.abs(c) + torch.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, c])
     v2 = np.hstack([v2, ground_truth])
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
-    np_sum += (np.abs(ground_truth - d) / (np.abs(d) + np.abs(ground_truth))).sum()
+    m.update((d, ground_truth))
+    np_sum += (torch.abs(ground_truth - d) / (torch.abs(d) + torch.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, d])
     v2 = np.hstack([v2, ground_truth])

From e6c96de11785cdfd0f2c77941d9f0dcd83ee33bd Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Tue, 29 Apr 2025 21:29:57 +0000
Subject: [PATCH 26/32] remove double_dtype from metrics

---
 ignite/metrics/regression/canberra_metric.py  |  4 +--
 .../regression/fractional_absolute_error.py   |  4 +--
 ignite/metrics/regression/fractional_bias.py  |  4 +--
 .../geometric_mean_absolute_error.py          |  4 +--
 .../geometric_mean_relative_absolute_error.py |  4 +--
 .../metrics/regression/manhattan_distance.py  |  4 +--
 .../mean_absolute_relative_error.py           |  4 +--
 ignite/metrics/regression/mean_error.py       |  2 +-
 .../regression/mean_normalized_bias.py        |  4 +--
 .../metrics/regression/pearson_correlation.py | 10 +++----
 ignite/metrics/regression/r2_score.py         |  8 +++---
 .../regression/wave_hedges_distance.py        |  2 +-
 .../regression/test_fractional_bias.py        | 17 +++++++++--
 .../test_geometric_mean_absolute_error.py     | 17 ++++++++---
 ..._geometric_mean_relative_absolute_error.py |  8 ++++--
 .../regression/test_manhattan_distance.py     | 18 ++++++++----
 .../metrics/regression/test_mean_error.py     | 17 ++++++++---
 .../regression/test_mean_normalized_bias.py   | 17 ++++++++---
 .../metrics/regression/test_r2_score.py       | 28 +++++++++++++++----
 .../regression/test_wave_hedges_distance.py   | 17 ++++++++---
 20 files changed, 135 insertions(+), 58 deletions(-)

diff --git a/ignite/metrics/regression/canberra_metric.py b/ignite/metrics/regression/canberra_metric.py
index d061311998b8..f8bd2732a386 100644
--- a/ignite/metrics/regression/canberra_metric.py
+++ b/ignite/metrics/regression/canberra_metric.py
@@ -69,12 +69,12 @@ class CanberraMetric(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = torch.abs(y - y_pred) / (torch.abs(y_pred) + torch.abs(y) + 1e-15)
-        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(errors).to(self._device)
 
     @sync_all_reduce("_sum_of_errors")
     def compute(self) -> float:
diff --git a/ignite/metrics/regression/fractional_absolute_error.py b/ignite/metrics/regression/fractional_absolute_error.py
index a6e2841ff040..c66e8e780f70 100644
--- a/ignite/metrics/regression/fractional_absolute_error.py
+++ b/ignite/metrics/regression/fractional_absolute_error.py
@@ -64,13 +64,13 @@ class FractionalAbsoluteError(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = 2 * torch.abs(y.view_as(y_pred) - y_pred) / (torch.abs(y_pred) + torch.abs(y.view_as(y_pred)))
-        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(errors).to(self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_num_examples", "_sum_of_errors")
diff --git a/ignite/metrics/regression/fractional_bias.py b/ignite/metrics/regression/fractional_bias.py
index 3e2f0719ca74..912e2a01997e 100644
--- a/ignite/metrics/regression/fractional_bias.py
+++ b/ignite/metrics/regression/fractional_bias.py
@@ -64,13 +64,13 @@ class FractionalBias(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = 2 * (y.view_as(y_pred) - y_pred) / (y_pred + y.view_as(y_pred) + 1e-30)
-        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(errors).to(self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_sum_of_errors", "_num_examples")
diff --git a/ignite/metrics/regression/geometric_mean_absolute_error.py b/ignite/metrics/regression/geometric_mean_absolute_error.py
index b1a1a3ede309..92f61192839c 100644
--- a/ignite/metrics/regression/geometric_mean_absolute_error.py
+++ b/ignite/metrics/regression/geometric_mean_absolute_error.py
@@ -64,13 +64,13 @@ class GeometricMeanAbsoluteError(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = torch.log(torch.abs(y.view_as(y_pred) - y_pred))
-        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(errors).to(self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_sum_of_errors", "_num_examples")
diff --git a/ignite/metrics/regression/geometric_mean_relative_absolute_error.py b/ignite/metrics/regression/geometric_mean_relative_absolute_error.py
index a9e639589729..8e2bfb9c0457 100644
--- a/ignite/metrics/regression/geometric_mean_relative_absolute_error.py
+++ b/ignite/metrics/regression/geometric_mean_relative_absolute_error.py
@@ -80,8 +80,8 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
 
-        y_pred = y_pred.clone().to(dtype=self._double_dtype, device=self._device)
-        y = y.clone().to(dtype=self._double_dtype, device=self._device)
+        y_pred = y_pred.clone().to(self._device)
+        y = y.clone().to(self._device)
 
         self._predictions.append(y_pred)
         self._targets.append(y)
diff --git a/ignite/metrics/regression/manhattan_distance.py b/ignite/metrics/regression/manhattan_distance.py
index 73acad26c229..9746fece91ea 100644
--- a/ignite/metrics/regression/manhattan_distance.py
+++ b/ignite/metrics/regression/manhattan_distance.py
@@ -65,12 +65,12 @@ class ManhattanDistance(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output
         errors = torch.abs(y - y_pred)
-        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(errors).to(self._device)
 
     @sync_all_reduce("_sum_of_errors")
     def compute(self) -> float:
diff --git a/ignite/metrics/regression/mean_absolute_relative_error.py b/ignite/metrics/regression/mean_absolute_relative_error.py
index a851aa4fb5a7..1551ff9be720 100644
--- a/ignite/metrics/regression/mean_absolute_relative_error.py
+++ b/ignite/metrics/regression/mean_absolute_relative_error.py
@@ -72,9 +72,7 @@ def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         if (y == 0).any():
             raise NotComputableError("The ground truth has 0.")
         absolute_error = torch.abs(y_pred - y.view_as(y_pred)) / torch.abs(y.view_as(y_pred))
-        self._sum_of_absolute_relative_errors += torch.sum(absolute_error).to(
-            dtype=self._double_dtype, device=self._device
-        )
+        self._sum_of_absolute_relative_errors += torch.sum(absolute_error).to(self._device)
         self._num_samples += y.size()[0]
 
     @sync_all_reduce("_sum_of_absolute_relative_errors", "_num_samples")
diff --git a/ignite/metrics/regression/mean_error.py b/ignite/metrics/regression/mean_error.py
index c28d35ad6133..91a8d90408f5 100644
--- a/ignite/metrics/regression/mean_error.py
+++ b/ignite/metrics/regression/mean_error.py
@@ -61,7 +61,7 @@ class MeanError(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
diff --git a/ignite/metrics/regression/mean_normalized_bias.py b/ignite/metrics/regression/mean_normalized_bias.py
index 148940867ba0..9ac2e244dd00 100644
--- a/ignite/metrics/regression/mean_normalized_bias.py
+++ b/ignite/metrics/regression/mean_normalized_bias.py
@@ -64,7 +64,7 @@ class MeanNormalizedBias(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
@@ -74,7 +74,7 @@ def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
             raise NotComputableError("The ground truth has 0.")
 
         errors = (y.view_as(y_pred) - y_pred) / y
-        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(errors).to(self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce("_sum_of_errors", "_num_examples")
diff --git a/ignite/metrics/regression/pearson_correlation.py b/ignite/metrics/regression/pearson_correlation.py
index cfa7a4028358..01ffbbbdf68c 100644
--- a/ignite/metrics/regression/pearson_correlation.py
+++ b/ignite/metrics/regression/pearson_correlation.py
@@ -87,11 +87,11 @@ def reset(self) -> None:
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
-        self._sum_of_y_preds += y_pred.sum().to(dtype=self._double_dtype, device=self._device)
-        self._sum_of_ys += y.sum().to(dtype=self._double_dtype, device=self._device)
-        self._sum_of_y_pred_squares += y_pred.square().sum().to(dtype=self._double_dtype, device=self._device)
-        self._sum_of_y_squares += y.square().sum().to(dtype=self._double_dtype, device=self._device)
-        self._sum_of_products += (y_pred * y).sum().to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_y_preds += y_pred.sum().to(self._device)
+        self._sum_of_ys += y.sum().to(self._device)
+        self._sum_of_y_pred_squares += y_pred.square().sum().to(self._device)
+        self._sum_of_y_squares += y.square().sum().to(self._device)
+        self._sum_of_products += (y_pred * y).sum().to(self._device)
         self._num_examples += y.shape[0]
 
     @sync_all_reduce(
diff --git a/ignite/metrics/regression/r2_score.py b/ignite/metrics/regression/r2_score.py
index 931e92a0a994..f4089a3e2e1c 100644
--- a/ignite/metrics/regression/r2_score.py
+++ b/ignite/metrics/regression/r2_score.py
@@ -63,17 +63,17 @@ class R2Score(_BaseRegression):
     @reinit__is_reduced
     def reset(self) -> None:
         self._num_examples = 0
-        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, device=self._device)
         self._y_sq_sum = torch.tensor(0.0, device=self._device)
         self._y_sum = torch.tensor(0.0, device=self._device)
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output
         self._num_examples += y.shape[0]
-        self._sum_of_errors += torch.sum(torch.pow(y_pred - y, 2)).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(torch.pow(y_pred - y, 2)).to(self._device)
 
-        self._y_sum += torch.sum(y).to(dtype=self._double_dtype, device=self._device)
-        self._y_sq_sum += torch.sum(torch.pow(y, 2)).to(dtype=self._double_dtype, device=self._device)
+        self._y_sum += torch.sum(y).to(self._device)
+        self._y_sq_sum += torch.sum(torch.pow(y, 2)).to(self._device)
 
     @sync_all_reduce("_num_examples", "_sum_of_errors", "_y_sq_sum", "_y_sum")
     def compute(self) -> float:
diff --git a/ignite/metrics/regression/wave_hedges_distance.py b/ignite/metrics/regression/wave_hedges_distance.py
index 0549067e0440..c226a1eb2a86 100644
--- a/ignite/metrics/regression/wave_hedges_distance.py
+++ b/ignite/metrics/regression/wave_hedges_distance.py
@@ -68,7 +68,7 @@ def reset(self) -> None:
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
         y_pred, y = output[0].detach(), output[1].detach()
         errors = torch.abs(y.view_as(y_pred) - y_pred) / (torch.max(y_pred, y.view_as(y_pred)) + 1e-30)
-        self._sum_of_errors += torch.sum(errors).to(dtype=self._double_dtype, device=self._device)
+        self._sum_of_errors += torch.sum(errors).to(self._device)
 
     @sync_all_reduce("_sum_of_errors")
     def compute(self) -> float:
diff --git a/tests/ignite/metrics/regression/test_fractional_bias.py b/tests/ignite/metrics/regression/test_fractional_bias.py
index 64e19f3f6feb..b4f9f8422794 100644
--- a/tests/ignite/metrics/regression/test_fractional_bias.py
+++ b/tests/ignite/metrics/regression/test_fractional_bias.py
@@ -57,7 +57,17 @@ def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
             y_pred_batch = np_y_pred[idx : idx + batch_size]
-            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+            torch_y_pred_batch = (
+                torch.from_numpy(y_pred_batch).to(dtype=torch.float32)
+                if device == "mps"
+                else torch.from_numpy(y_pred_batch)
+            )
+            torch_y_true_batch = (
+                torch.from_numpy(y_true_batch).to(dtype=torch.float32)
+                if device == "mps"
+                else torch.from_numpy(y_true_batch)
+            )
+            return torch_y_pred_batch, torch_y_true_batch
 
         engine = Engine(update_fn)
 
@@ -76,7 +86,10 @@ def update_fn(engine, batch):
         np_len = len(y_pred)
         np_ans = np_sum / np_len
 
-        assert np_ans == pytest.approx(fb)
+        if available_device == "mps":
+            assert np_ans == pytest.approx(fb, rel=1e-5)
+        else:
+            assert np_ans == pytest.approx(fb)
 
     def get_test_cases():
         test_cases = [
diff --git a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
index 1e99e7eec0eb..936c35d2df8a 100644
--- a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
@@ -38,7 +38,13 @@ def test_compute(available_device):
 
     m = GeometricMeanAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
+    torch_ground_truth = (
+        torch.from_numpy(ground_truth).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(ground_truth)
+    )
+    m.update((torch_a, torch_ground_truth))
 
     errors = np.abs(ground_truth - a)
     np_prod = np.multiply.reduce(errors) * np_prod
@@ -46,21 +52,24 @@ def test_compute(available_device):
     np_ans = np.power(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
+    m.update((torch_b, torch_ground_truth))
     errors = np.abs(ground_truth - b)
     np_prod = np.multiply.reduce(errors) * np_prod
     np_len += len(b)
     np_ans = np.power(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
+    m.update((torch_c, torch_ground_truth))
     errors = np.abs(ground_truth - c)
     np_prod = np.multiply.reduce(errors) * np_prod
     np_len += len(c)
     np_ans = np.power(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
+    m.update((torch_d, torch_ground_truth))
     errors = np.abs(ground_truth - d)
     np_prod = np.multiply.reduce(errors) * np_prod
     np_len += len(d)
diff --git a/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py b/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
index b78971e7f123..570f45fffeee 100644
--- a/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
@@ -37,8 +37,12 @@ def test_compute(available_device):
 
     m = GeometricMeanRelativeAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    y_pred = (
+        torch.from_numpy(np_y_pred).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(np_y_pred)
+    )
+    y = torch.from_numpy(np_y).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
diff --git a/tests/ignite/metrics/regression/test_manhattan_distance.py b/tests/ignite/metrics/regression/test_manhattan_distance.py
index 406a66bee571..ce43c9162313 100644
--- a/tests/ignite/metrics/regression/test_manhattan_distance.py
+++ b/tests/ignite/metrics/regression/test_manhattan_distance.py
@@ -31,27 +31,35 @@ def test_mahattan_distance(available_device):
     assert m._device == torch.device(available_device)
 
     manhattan = DistanceMetric.get_metric("manhattan")
-
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
+    torch_ground_truth = (
+        torch.from_numpy(ground_truth).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(ground_truth)
+    )
+    m.update((torch_a, torch_ground_truth))
     np_sum = np.abs(ground_truth - a).sum()
     assert m.compute() == pytest.approx(np_sum)
     assert manhattan.pairwise([a, ground_truth])[0][1] == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
+    m.update((torch_b, torch_ground_truth))
     np_sum += np.abs(ground_truth - b).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([a, b])
     v2 = np.hstack([ground_truth, ground_truth])
     assert manhattan.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
+    m.update((torch_c, torch_ground_truth))
     np_sum += np.abs(ground_truth - c).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, c])
     v2 = np.hstack([v2, ground_truth])
     assert manhattan.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
+    m.update((torch_d, torch_ground_truth))
     np_sum += np.abs(ground_truth - d).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, d])
diff --git a/tests/ignite/metrics/regression/test_mean_error.py b/tests/ignite/metrics/regression/test_mean_error.py
index 61c64e54e31e..e4c943dc3fb7 100644
--- a/tests/ignite/metrics/regression/test_mean_error.py
+++ b/tests/ignite/metrics/regression/test_mean_error.py
@@ -36,25 +36,34 @@ def test_mean_error(available_device):
     m = MeanError(device=available_device)
     assert m._device == torch.device(available_device)
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
+    torch_ground_truth = (
+        torch.from_numpy(ground_truth).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(ground_truth)
+    )
+    m.update((torch_a, torch_ground_truth))
     np_sum = (ground_truth - a).sum()
     np_len = len(a)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
+    m.update((torch_b, torch_ground_truth))
     np_sum += (ground_truth - b).sum()
     np_len += len(b)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
+    m.update((torch_c, torch_ground_truth))
     np_sum += (ground_truth - c).sum()
     np_len += len(c)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
+    m.update((torch_d, torch_ground_truth))
     np_sum += (ground_truth - d).sum()
     np_len += len(d)
     np_ans = np_sum / np_len
diff --git a/tests/ignite/metrics/regression/test_mean_normalized_bias.py b/tests/ignite/metrics/regression/test_mean_normalized_bias.py
index ebded53405ce..438c35d7043c 100644
--- a/tests/ignite/metrics/regression/test_mean_normalized_bias.py
+++ b/tests/ignite/metrics/regression/test_mean_normalized_bias.py
@@ -48,25 +48,34 @@ def test_mean_error(available_device):
     m = MeanNormalizedBias(device=available_device)
     assert m._device == torch.device(available_device)
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
+    torch_ground_truth = (
+        torch.from_numpy(ground_truth).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(ground_truth)
+    )
+    m.update((torch_a, torch_ground_truth))
     np_sum = ((ground_truth - a) / ground_truth).sum()
     np_len = len(a)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
+    m.update((torch_b, torch_ground_truth))
     np_sum += ((ground_truth - b) / ground_truth).sum()
     np_len += len(b)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
+    m.update((torch_c, torch_ground_truth))
     np_sum += ((ground_truth - c) / ground_truth).sum()
     np_len += len(c)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
+    m.update((torch_d, torch_ground_truth))
     np_sum += ((ground_truth - d) / ground_truth).sum()
     np_len += len(d)
     np_ans = np_sum / np_len
diff --git a/tests/ignite/metrics/regression/test_r2_score.py b/tests/ignite/metrics/regression/test_r2_score.py
index 420f56f51a83..423810737d0c 100644
--- a/tests/ignite/metrics/regression/test_r2_score.py
+++ b/tests/ignite/metrics/regression/test_r2_score.py
@@ -34,8 +34,12 @@ def test_r2_score(available_device):
 
     m = R2Score(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    y_pred = (
+        torch.from_numpy(np_y_pred).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(np_y_pred)
+    )
+    y = torch.from_numpy(np_y).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
@@ -52,8 +56,12 @@ def test_r2_score_2(available_device):
 
     m = R2Score(device=available_device)
     assert m._device == torch.device(available_device)
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    y_pred = (
+        torch.from_numpy(np_y_pred).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(np_y_pred)
+    )
+    y = torch.from_numpy(np_y).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -78,7 +86,17 @@ def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
         y_true_batch = np_y[idx : idx + batch_size]
         y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        torch_y_pred_batch = (
+            torch.from_numpy(y_pred_batch).to(dtype=torch.float32)
+            if available_device == "mps"
+            else torch.from_numpy(y_pred_batch)
+        )
+        torch_y_true_batch = (
+            torch.from_numpy(y_true_batch).to(dtype=torch.float32)
+            if available_device == "mps"
+            else torch.from_numpy(y_true_batch)
+        )
+        return torch_y_pred_batch, torch_y_true_batch
 
     engine = Engine(update_fn)
 
diff --git a/tests/ignite/metrics/regression/test_wave_hedges_distance.py b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
index 33e0e4da113f..b85dde8f55ab 100644
--- a/tests/ignite/metrics/regression/test_wave_hedges_distance.py
+++ b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
@@ -29,19 +29,28 @@ def test_compute(available_device):
     m = WaveHedgesDistance(device=available_device)
     assert m._device == torch.device(available_device)
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
+    torch_ground_truth = (
+        torch.from_numpy(ground_truth).to(dtype=torch.float32)
+        if available_device == "mps"
+        else torch.from_numpy(ground_truth)
+    )
+    m.update((torch_a, torch_ground_truth))
     np_sum = (np.abs(ground_truth - a) / np.maximum.reduce([a, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
+    m.update((torch_b, torch_ground_truth))
     np_sum += (np.abs(ground_truth - b) / np.maximum.reduce([b, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
+    m.update((torch_c, torch_ground_truth))
     np_sum += (np.abs(ground_truth - c) / np.maximum.reduce([c, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
+    m.update((torch_d, torch_ground_truth))
     np_sum += (np.abs(ground_truth - d) / np.maximum.reduce([d, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 

From 346e0e18a95170d3033f79ce9e2c28455f24917c Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 2 May 2025 09:30:40 +0000
Subject: [PATCH 27/32] takes into account PR comments

---
 ignite/metrics/regression/fractional_bias.py  |  2 +-
 .../regression/test_canberra_metric.py        |  6 +--
 .../test_geometric_mean_absolute_error.py     | 53 ++++++++-----------
 3 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/ignite/metrics/regression/fractional_bias.py b/ignite/metrics/regression/fractional_bias.py
index 912e2a01997e..6dde4c70bca8 100644
--- a/ignite/metrics/regression/fractional_bias.py
+++ b/ignite/metrics/regression/fractional_bias.py
@@ -64,7 +64,7 @@ class FractionalBias(_BaseRegression):
 
     @reinit__is_reduced
     def reset(self) -> None:
-        self._sum_of_errors = torch.tensor(0.0, device=self._device)
+        self._sum_of_errors = torch.tensor(0.0, dtype=self._double_dtype, device=self._device)
         self._num_examples = 0
 
     def _update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index fa67731f8664..ee77e6f008df 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -69,13 +69,11 @@ def test_compute(available_device):
 )
 def test_integration(n_times, test_cases, available_device):
     y_pred, y, batch_size = test_cases
-    assert y_pred.dtype == torch.float32
-    assert y.dtype == torch.float32
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = y[idx : idx + batch_size].to(dtype=torch.float32)
-        y_pred_batch = y_pred[idx : idx + batch_size].to(dtype=torch.float32)
+        y_true_batch = y[idx : idx + batch_size]
+        y_pred_batch = y_pred[idx : idx + batch_size]
         return y_pred_batch, y_true_batch
 
     engine = Engine(update_fn)
diff --git a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
index 936c35d2df8a..861d31702ee8 100644
--- a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
@@ -29,51 +29,42 @@ def test_wrong_input_shapes():
 
 
 def test_compute(available_device):
-    a = np.random.randn(4)
-    b = np.random.randn(4)
-    c = np.random.randn(4)
-    d = np.random.randn(4)
-    ground_truth = np.random.randn(4)
+    a = torch.randn(4)
+    b = torch.randn(4)
+    c = torch.randn(4)
+    d = torch.randn(4)
+    ground_truth = torch.randn(4)
     np_prod = 1.0
 
     m = GeometricMeanAbsoluteError(device=available_device)
     assert m._device == torch.device(available_device)
-    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
-    torch_ground_truth = (
-        torch.from_numpy(ground_truth).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(ground_truth)
-    )
-    m.update((torch_a, torch_ground_truth))
-
-    errors = np.abs(ground_truth - a)
-    np_prod = np.multiply.reduce(errors) * np_prod
+    m.update((a, ground_truth))
+
+    errors = torch.abs(ground_truth - a)
+    np_prod = torch.prod(errors) * np_prod
     np_len = len(a)
-    np_ans = np.power(np_prod, 1.0 / np_len)
+    np_ans = torch.pow(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
-    m.update((torch_b, torch_ground_truth))
-    errors = np.abs(ground_truth - b)
-    np_prod = np.multiply.reduce(errors) * np_prod
+    m.update((b, ground_truth))
+    errors = torch.abs(ground_truth - b)
+    np_prod = torch.prod(errors) * np_prod
     np_len += len(b)
-    np_ans = np.power(np_prod, 1.0 / np_len)
+    np_ans = torch.pow(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
-    m.update((torch_c, torch_ground_truth))
-    errors = np.abs(ground_truth - c)
-    np_prod = np.multiply.reduce(errors) * np_prod
+    m.update((c, ground_truth))
+    errors = torch.abs(ground_truth - c)
+    np_prod = torch.prod(errors) * np_prod
     np_len += len(c)
-    np_ans = np.power(np_prod, 1.0 / np_len)
+    np_ans = torch.pow(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
-    m.update((torch_d, torch_ground_truth))
-    errors = np.abs(ground_truth - d)
-    np_prod = np.multiply.reduce(errors) * np_prod
+    m.update((d, ground_truth))
+    errors = torch.abs(ground_truth - d)
+    np_prod = torch.prod(errors) * np_prod
     np_len += len(d)
-    np_ans = np.power(np_prod, 1.0 / np_len)
+    np_ans = torch.pow(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
 

From ded98cf96cb672dc403689ef9a6a0497a2e347cf Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Fri, 2 May 2025 13:22:40 +0000
Subject: [PATCH 28/32] refactor integration tests for fractional bias and
 fractional absolute error

---
 .../test_fractional_absolute_error.py         |  57 +++++-----
 .../regression/test_fractional_bias.py        | 100 +++++++++---------
 2 files changed, 75 insertions(+), 82 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_fractional_absolute_error.py b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
index bb5932439eb1..1316b5518a7e 100644
--- a/tests/ignite/metrics/regression/test_fractional_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
@@ -53,44 +53,41 @@ def test_compute(available_device):
         assert m.compute() == pytest.approx(expected)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
-        def update_fn(engine, batch):
-            idx = (engine.state.iteration - 1) * batch_size
-            y_true_batch = np_y[idx : idx + batch_size]
-            y_pred_batch = np_y_pred[idx : idx + batch_size]
-            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+@pytest.mark.parametrize("n_times", range(5))
+@pytest.mark.parametrize(
+    "test_cases",
+    [
+        (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
+        (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
+    ],
+)
+def test_integration_fractional_absolute_error(n_times, test_cases, available_device):
+    y_pred, y, batch_size = test_cases
 
-        engine = Engine(update_fn)
+    np_y = y.numpy().ravel()
+    np_y_pred = y_pred.numpy().ravel()
 
-        m = FractionalAbsoluteError(device=device)
-        assert m._device == torch.device(device)
-        m.attach(engine, "fab")
+    def update_fn(engine, batch):
+        idx = (engine.state.iteration - 1) * batch_size
+        y_true_batch = y[idx : idx + batch_size]
+        y_pred_batch = y_pred[idx : idx + batch_size]
+        return y_pred_batch, y_true_batch
 
-        np_y = y.numpy().ravel()
-        np_y_pred = y_pred.numpy().ravel()
+    engine = Engine(update_fn)
 
-        data = list(range(y_pred.shape[0] // batch_size))
-        fab = engine.run(data, max_epochs=1).metrics["fab"]
+    metric = FractionalAbsoluteError(device=available_device)
+    assert metric._device == torch.device(available_device)
 
-        np_sum = (2 * np.abs((np_y_pred - np_y)) / (np.abs(np_y_pred) + np.abs(np_y))).sum()
-        np_len = len(y_pred)
-        np_ans = np_sum / np_len
+    metric.attach(engine, "fab")
 
-        assert np_ans == pytest.approx(fab)
+    data = list(range(y_pred.shape[0] // batch_size))
+    fab = engine.run(data, max_epochs=1).metrics["fab"]
 
-    def get_test_cases():
-        test_cases = [
-            (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
-            (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
-        ]
-        return test_cases
+    # Expected result using NumPy
+    np_sum = (2 * np.abs(np_y_pred - np_y) / (np.abs(np_y_pred) + np.abs(np_y))).sum()
+    expected = np_sum / len(np_y)
 
-    for _ in range(5):
-        # check multiple random inputs as random exact occurencies are rare
-        test_cases = get_test_cases()
-        for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size, device=available_device)
+    assert expected == pytest.approx(fab)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_fractional_bias.py b/tests/ignite/metrics/regression/test_fractional_bias.py
index b4f9f8422794..20bbf27f2186 100644
--- a/tests/ignite/metrics/regression/test_fractional_bias.py
+++ b/tests/ignite/metrics/regression/test_fractional_bias.py
@@ -51,58 +51,54 @@ def test_fractional_bias(available_device):
         assert m.compute() == pytest.approx(expected)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
-        def update_fn(engine, batch):
-            idx = (engine.state.iteration - 1) * batch_size
-            y_true_batch = np_y[idx : idx + batch_size]
-            y_pred_batch = np_y_pred[idx : idx + batch_size]
-            torch_y_pred_batch = (
-                torch.from_numpy(y_pred_batch).to(dtype=torch.float32)
-                if device == "mps"
-                else torch.from_numpy(y_pred_batch)
-            )
-            torch_y_true_batch = (
-                torch.from_numpy(y_true_batch).to(dtype=torch.float32)
-                if device == "mps"
-                else torch.from_numpy(y_true_batch)
-            )
-            return torch_y_pred_batch, torch_y_true_batch
-
-        engine = Engine(update_fn)
-
-        m = FractionalBias(device=device)
-        assert m._device == torch.device(device)
-
-        m.attach(engine, "fb")
-
-        np_y = y.double().numpy().ravel()
-        np_y_pred = y_pred.double().numpy().ravel()
-
-        data = list(range(y_pred.shape[0] // batch_size))
-        fb = engine.run(data, max_epochs=1).metrics["fb"]
-
-        np_sum = (2 * (np_y - np_y_pred) / (np_y_pred + np_y)).sum()
-        np_len = len(y_pred)
-        np_ans = np_sum / np_len
-
-        if available_device == "mps":
-            assert np_ans == pytest.approx(fb, rel=1e-5)
-        else:
-            assert np_ans == pytest.approx(fb)
-
-    def get_test_cases():
-        test_cases = [
-            (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
-            (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
-        ]
-        return test_cases
-
-    for _ in range(5):
-        # check multiple random inputs as random exact occurencies are rare
-        test_cases = get_test_cases()
-        for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size, device=available_device)
+@pytest.mark.parametrize("n_times", range(5))
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
+        (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
+    ],
+)
+def test_integration_fractional_bias(n_times, test_case, available_device):
+    y_pred, y, batch_size = test_case
+
+    np_y = y.double().numpy().ravel()
+    np_y_pred = y_pred.double().numpy().ravel()
+
+    def update_fn(engine, batch):
+        idx = (engine.state.iteration - 1) * batch_size
+        y_true_batch = np_y[idx : idx + batch_size]
+        y_pred_batch = np_y_pred[idx : idx + batch_size]
+
+        torch_y_pred_batch = (
+            torch.from_numpy(y_pred_batch).to(dtype=torch.float32)
+            if available_device == "mps"
+            else torch.from_numpy(y_pred_batch)
+        )
+        torch_y_true_batch = (
+            torch.from_numpy(y_true_batch).to(dtype=torch.float32)
+            if available_device == "mps"
+            else torch.from_numpy(y_true_batch)
+        )
+
+        return torch_y_pred_batch, torch_y_true_batch
+
+    engine = Engine(update_fn)
+
+    metric = FractionalBias(device=available_device)
+    assert metric._device == torch.device(available_device)
+
+    metric.attach(engine, "fb")
+
+    data = list(range(y_pred.shape[0] // batch_size))
+    fb = engine.run(data, max_epochs=1).metrics["fb"]
+
+    expected = (2 * (np_y - np_y_pred) / (np_y_pred + np_y)).sum() / len(np_y)
+
+    if available_device == "mps":
+        assert expected == pytest.approx(fb, rel=1e-5)
+    else:
+        assert expected == pytest.approx(fb)
 
 
 def test_error_is_not_nan(available_device):

From 63baad667a5a23b2632db193962b6ca0fadc02e4 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Sat, 3 May 2025 14:20:42 +0000
Subject: [PATCH 29/32] remove modifications in test

---
 .../regression/test_canberra_metric.py        |  92 ++++++------
 .../test_fractional_absolute_error.py         | 101 +++++++------
 .../regression/test_fractional_bias.py        | 133 +++++++++---------
 .../test_geometric_mean_absolute_error.py     |  55 ++++----
 ..._geometric_mean_relative_absolute_error.py |  18 +--
 .../regression/test_kendall_correlation.py    |  10 +-
 .../regression/test_manhattan_distance.py     |  37 ++---
 .../regression/test_maximum_absolute_error.py |  14 +-
 .../test_mean_absolute_relative_error.py      |  14 +-
 .../metrics/regression/test_mean_error.py     |  31 ++--
 .../regression/test_mean_normalized_bias.py   |  29 ++--
 .../regression/test_median_absolute_error.py  |  51 +++----
 .../test_median_absolute_percentage_error.py  |  64 ++++-----
 .../test_median_relative_absolute_error.py    |  59 ++++----
 .../regression/test_pearson_correlation.py    |  40 +++---
 .../metrics/regression/test_r2_score.py       |  43 ++----
 .../regression/test_spearman_correlation.py   |  10 +-
 .../regression/test_wave_hedges_distance.py   |  27 ++--
 18 files changed, 368 insertions(+), 460 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_canberra_metric.py b/tests/ignite/metrics/regression/test_canberra_metric.py
index ee77e6f008df..eaaee884768a 100644
--- a/tests/ignite/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/metrics/regression/test_canberra_metric.py
@@ -20,83 +20,83 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute(available_device):
-    a = torch.randn(4)
-    b = torch.randn(4)
-    c = torch.randn(4)
-    d = torch.randn(4)
-    ground_truth = torch.randn(4)
+def test_compute():
+    a = np.random.randn(4)
+    b = np.random.randn(4)
+    c = np.random.randn(4)
+    d = np.random.randn(4)
+    ground_truth = np.random.randn(4)
 
-    m = CanberraMetric(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = CanberraMetric()
 
     canberra = DistanceMetric.get_metric("canberra")
 
-    m.update((a, ground_truth))
-    np_sum = (torch.abs(ground_truth - a) / (torch.abs(a) + torch.abs(ground_truth))).sum()
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    np_sum = (np.abs(ground_truth - a) / (np.abs(a) + np.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
-    assert canberra.pairwise([a.cpu().numpy(), ground_truth.cpu().numpy()])[0][1] == pytest.approx(np_sum)
+    assert canberra.pairwise([a, ground_truth])[0][1] == pytest.approx(np_sum)
 
-    m.update((b, ground_truth))
-    np_sum += ((torch.abs(ground_truth - b)) / (torch.abs(b) + torch.abs(ground_truth))).sum()
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    np_sum += ((np.abs(ground_truth - b)) / (np.abs(b) + np.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([a, b])
     v2 = np.hstack([ground_truth, ground_truth])
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    m.update((c, ground_truth))
-    np_sum += ((torch.abs(ground_truth - c)) / (torch.abs(c) + torch.abs(ground_truth))).sum()
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    np_sum += ((np.abs(ground_truth - c)) / (np.abs(c) + np.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, c])
     v2 = np.hstack([v2, ground_truth])
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    m.update((d, ground_truth))
-    np_sum += (torch.abs(ground_truth - d) / (torch.abs(d) + torch.abs(ground_truth))).sum()
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    np_sum += (np.abs(ground_truth - d) / (np.abs(d) + np.abs(ground_truth))).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, d])
     v2 = np.hstack([v2, ground_truth])
     assert canberra.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
 
-@pytest.mark.parametrize("n_times", range(3))
-@pytest.mark.parametrize(
-    "test_cases",
-    [
-        (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
-        (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
-    ],
-)
-def test_integration(n_times, test_cases, available_device):
-    y_pred, y, batch_size = test_cases
+def test_integration():
+    def _test(y_pred, y, batch_size):
+        def update_fn(engine, batch):
+            idx = (engine.state.iteration - 1) * batch_size
+            y_true_batch = np_y[idx : idx + batch_size]
+            y_pred_batch = np_y_pred[idx : idx + batch_size]
+            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
 
-    def update_fn(engine, batch):
-        idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = y[idx : idx + batch_size]
-        y_pred_batch = y_pred[idx : idx + batch_size]
-        return y_pred_batch, y_true_batch
+        engine = Engine(update_fn)
 
-    engine = Engine(update_fn)
+        m = CanberraMetric()
+        m.attach(engine, "cm")
 
-    m = CanberraMetric(device=available_device)
-    assert m._device == torch.device(available_device)
+        np_y = y.numpy().ravel()
+        np_y_pred = y_pred.numpy().ravel()
 
-    m.attach(engine, "cm")
+        canberra = DistanceMetric.get_metric("canberra")
 
-    canberra = DistanceMetric.get_metric("canberra")
+        data = list(range(y_pred.shape[0] // batch_size))
+        cm = engine.run(data, max_epochs=1).metrics["cm"]
 
-    data = list(range(y_pred.shape[0] // batch_size))
-    cm = engine.run(data, max_epochs=1).metrics["cm"]
+        assert canberra.pairwise([np_y_pred, np_y])[0][1] == pytest.approx(cm)
 
-    pred_np = y_pred.cpu().numpy().reshape(len(y_pred), -1)
-    true_np = y.cpu().numpy().reshape(len(y), -1)
-    expected = np.sum(canberra.pairwise(pred_np, true_np).diagonal())
-    assert expected == pytest.approx(cm)
+    def get_test_cases():
+        test_cases = [
+            (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
+            (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
+        ]
+        return test_cases
 
+    for _ in range(5):
+        # check multiple random inputs as random exact occurencies are rare
+        test_cases = get_test_cases()
+        for y_pred, y, batch_size in test_cases:
+            _test(y_pred, y, batch_size)
 
-def test_error_is_not_nan(available_device):
-    m = CanberraMetric(device=available_device)
-    assert m._device == torch.device(available_device)
+
+def test_error_is_not_nan():
+    m = CanberraMetric()
     m.update((torch.zeros(4), torch.zeros(4)))
     assert not (torch.isnan(m._sum_of_errors).any() or torch.isinf(m._sum_of_errors).any()), m._sum_of_errors
 
diff --git a/tests/ignite/metrics/regression/test_fractional_absolute_error.py b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
index 1316b5518a7e..c1c3b080576f 100644
--- a/tests/ignite/metrics/regression/test_fractional_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_fractional_absolute_error.py
@@ -28,66 +28,77 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute(available_device):
-    a = torch.randn(4)
-    b = torch.randn(4)
-    c = torch.randn(4)
-    d = torch.randn(4)
-    ground_truth = torch.randn(4)
+def test_compute():
+    a = np.random.randn(4)
+    b = np.random.randn(4)
+    c = np.random.randn(4)
+    d = np.random.randn(4)
+    ground_truth = np.random.randn(4)
 
-    m = FractionalAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = FractionalAbsoluteError()
 
-    total_error = 0.0
-    total_len = 0
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    np_sum = (2 * np.abs((a - ground_truth)) / (np.abs(a) + np.abs(ground_truth))).sum()
+    np_len = len(a)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
 
-    for pred in [a, b, c, d]:
-        m.update((pred, ground_truth))
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    np_sum += (2 * np.abs((b - ground_truth)) / (np.abs(b) + np.abs(ground_truth))).sum()
+    np_len += len(b)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
 
-        # Compute fractional absolute error in PyTorch
-        error = 2 * torch.abs(pred - ground_truth) / (torch.abs(pred) + torch.abs(ground_truth))
-        total_error += error.sum().item()
-        total_len += len(pred)
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    np_sum += (2 * np.abs((c - ground_truth)) / (np.abs(c) + np.abs(ground_truth))).sum()
+    np_len += len(c)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
 
-        expected = total_error / total_len
-        assert m.compute() == pytest.approx(expected)
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    np_sum += (2 * np.abs((d - ground_truth)) / (np.abs(d) + np.abs(ground_truth))).sum()
+    np_len += len(d)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
 
 
-@pytest.mark.parametrize("n_times", range(5))
-@pytest.mark.parametrize(
-    "test_cases",
-    [
-        (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
-        (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
-    ],
-)
-def test_integration_fractional_absolute_error(n_times, test_cases, available_device):
-    y_pred, y, batch_size = test_cases
+def test_integration():
+    def _test(y_pred, y, batch_size):
+        def update_fn(engine, batch):
+            idx = (engine.state.iteration - 1) * batch_size
+            y_true_batch = np_y[idx : idx + batch_size]
+            y_pred_batch = np_y_pred[idx : idx + batch_size]
+            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
 
-    np_y = y.numpy().ravel()
-    np_y_pred = y_pred.numpy().ravel()
+        engine = Engine(update_fn)
 
-    def update_fn(engine, batch):
-        idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = y[idx : idx + batch_size]
-        y_pred_batch = y_pred[idx : idx + batch_size]
-        return y_pred_batch, y_true_batch
+        m = FractionalAbsoluteError()
+        m.attach(engine, "fab")
 
-    engine = Engine(update_fn)
+        np_y = y.numpy().ravel()
+        np_y_pred = y_pred.numpy().ravel()
 
-    metric = FractionalAbsoluteError(device=available_device)
-    assert metric._device == torch.device(available_device)
+        data = list(range(y_pred.shape[0] // batch_size))
+        fab = engine.run(data, max_epochs=1).metrics["fab"]
 
-    metric.attach(engine, "fab")
+        np_sum = (2 * np.abs((np_y_pred - np_y)) / (np.abs(np_y_pred) + np.abs(np_y))).sum()
+        np_len = len(y_pred)
+        np_ans = np_sum / np_len
 
-    data = list(range(y_pred.shape[0] // batch_size))
-    fab = engine.run(data, max_epochs=1).metrics["fab"]
+        assert np_ans == pytest.approx(fab)
 
-    # Expected result using NumPy
-    np_sum = (2 * np.abs(np_y_pred - np_y) / (np.abs(np_y_pred) + np.abs(np_y))).sum()
-    expected = np_sum / len(np_y)
+    def get_test_cases():
+        test_cases = [
+            (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
+            (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
+        ]
+        return test_cases
 
-    assert expected == pytest.approx(fab)
+    for _ in range(5):
+        # check multiple random inputs as random exact occurencies are rare
+        test_cases = get_test_cases()
+        for y_pred, y, batch_size in test_cases:
+            _test(y_pred, y, batch_size)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_fractional_bias.py b/tests/ignite/metrics/regression/test_fractional_bias.py
index 20bbf27f2186..bf78d4870d5b 100644
--- a/tests/ignite/metrics/regression/test_fractional_bias.py
+++ b/tests/ignite/metrics/regression/test_fractional_bias.py
@@ -1,5 +1,6 @@
 import os
 
+import numpy as np
 import pytest
 import torch
 
@@ -27,83 +28,81 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_fractional_bias(available_device):
-    a = torch.randn(4)
-    b = torch.randn(4)
-    c = torch.randn(4)
-    d = torch.randn(4)
-    ground_truth = torch.randn(4)
+def test_fractional_bias():
+    a = np.random.randn(4)
+    b = np.random.randn(4)
+    c = np.random.randn(4)
+    d = np.random.randn(4)
+    ground_truth = np.random.randn(4)
 
-    m = FractionalBias(device=available_device)
-    assert m._device == torch.device(available_device)
-
-    total_error = 0.0
-    total_len = 0
-
-    for pred in [a, b, c, d]:
-        m.update((pred, ground_truth))
-
-        error = 2 * (ground_truth - pred) / (pred + ground_truth)
-        total_error += error.sum().item()
-        total_len += len(pred)
-
-        expected = total_error / total_len
-        assert m.compute() == pytest.approx(expected)
-
-
-@pytest.mark.parametrize("n_times", range(5))
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
-        (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
-    ],
-)
-def test_integration_fractional_bias(n_times, test_case, available_device):
-    y_pred, y, batch_size = test_case
-
-    np_y = y.double().numpy().ravel()
-    np_y_pred = y_pred.double().numpy().ravel()
-
-    def update_fn(engine, batch):
-        idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-
-        torch_y_pred_batch = (
-            torch.from_numpy(y_pred_batch).to(dtype=torch.float32)
-            if available_device == "mps"
-            else torch.from_numpy(y_pred_batch)
-        )
-        torch_y_true_batch = (
-            torch.from_numpy(y_true_batch).to(dtype=torch.float32)
-            if available_device == "mps"
-            else torch.from_numpy(y_true_batch)
-        )
+    m = FractionalBias()
 
-        return torch_y_pred_batch, torch_y_true_batch
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
+    np_sum = (2 * (ground_truth - a) / (a + ground_truth)).sum()
+    np_len = len(a)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
+
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    np_sum += (2 * (ground_truth - b) / (b + ground_truth)).sum()
+    np_len += len(b)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
+
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    np_sum += (2 * (ground_truth - c) / (c + ground_truth)).sum()
+    np_len += len(c)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
+
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    np_sum += (2 * (ground_truth - d) / (d + ground_truth)).sum()
+    np_len += len(d)
+    np_ans = np_sum / np_len
+    assert m.compute() == pytest.approx(np_ans)
+
+
+def test_integration():
+    def _test(y_pred, y, batch_size):
+        def update_fn(engine, batch):
+            idx = (engine.state.iteration - 1) * batch_size
+            y_true_batch = np_y[idx : idx + batch_size]
+            y_pred_batch = np_y_pred[idx : idx + batch_size]
+            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+
+        engine = Engine(update_fn)
+
+        m = FractionalBias()
+        m.attach(engine, "fb")
 
-    engine = Engine(update_fn)
+        np_y = y.double().numpy().ravel()
+        np_y_pred = y_pred.double().numpy().ravel()
 
-    metric = FractionalBias(device=available_device)
-    assert metric._device == torch.device(available_device)
+        data = list(range(y_pred.shape[0] // batch_size))
+        fb = engine.run(data, max_epochs=1).metrics["fb"]
 
-    metric.attach(engine, "fb")
+        np_sum = (2 * (np_y - np_y_pred) / (np_y_pred + np_y)).sum()
+        np_len = len(y_pred)
+        np_ans = np_sum / np_len
 
-    data = list(range(y_pred.shape[0] // batch_size))
-    fb = engine.run(data, max_epochs=1).metrics["fb"]
+        assert np_ans == pytest.approx(fb)
 
-    expected = (2 * (np_y - np_y_pred) / (np_y_pred + np_y)).sum() / len(np_y)
+    def get_test_cases():
+        test_cases = [
+            (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
+            (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
+        ]
+        return test_cases
 
-    if available_device == "mps":
-        assert expected == pytest.approx(fb, rel=1e-5)
-    else:
-        assert expected == pytest.approx(fb)
+    for _ in range(5):
+        # check multiple random inputs as random exact occurencies are rare
+        test_cases = get_test_cases()
+        for y_pred, y, batch_size in test_cases:
+            _test(y_pred, y, batch_size)
 
 
-def test_error_is_not_nan(available_device):
-    m = FractionalBias(device=available_device)
-    assert m._device == torch.device(available_device)
+def test_error_is_not_nan():
+    m = FractionalBias()
     m.update((torch.zeros(4), torch.zeros(4)))
     assert not (torch.isnan(m._sum_of_errors).any() or torch.isinf(m._sum_of_errors).any()), m._sum_of_errors
 
diff --git a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
index 861d31702ee8..05f023691a54 100644
--- a/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_geometric_mean_absolute_error.py
@@ -28,48 +28,47 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute(available_device):
-    a = torch.randn(4)
-    b = torch.randn(4)
-    c = torch.randn(4)
-    d = torch.randn(4)
-    ground_truth = torch.randn(4)
+def test_compute():
+    a = np.random.randn(4)
+    b = np.random.randn(4)
+    c = np.random.randn(4)
+    d = np.random.randn(4)
+    ground_truth = np.random.randn(4)
     np_prod = 1.0
 
-    m = GeometricMeanAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
-    m.update((a, ground_truth))
+    m = GeometricMeanAbsoluteError()
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
 
-    errors = torch.abs(ground_truth - a)
-    np_prod = torch.prod(errors) * np_prod
+    errors = np.abs(ground_truth - a)
+    np_prod = np.multiply.reduce(errors) * np_prod
     np_len = len(a)
-    np_ans = torch.pow(np_prod, 1.0 / np_len)
+    np_ans = np.power(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((b, ground_truth))
-    errors = torch.abs(ground_truth - b)
-    np_prod = torch.prod(errors) * np_prod
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
+    errors = np.abs(ground_truth - b)
+    np_prod = np.multiply.reduce(errors) * np_prod
     np_len += len(b)
-    np_ans = torch.pow(np_prod, 1.0 / np_len)
+    np_ans = np.power(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((c, ground_truth))
-    errors = torch.abs(ground_truth - c)
-    np_prod = torch.prod(errors) * np_prod
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
+    errors = np.abs(ground_truth - c)
+    np_prod = np.multiply.reduce(errors) * np_prod
     np_len += len(c)
-    np_ans = torch.pow(np_prod, 1.0 / np_len)
+    np_ans = np.power(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
-    m.update((d, ground_truth))
-    errors = torch.abs(ground_truth - d)
-    np_prod = torch.prod(errors) * np_prod
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
+    errors = np.abs(ground_truth - d)
+    np_prod = np.multiply.reduce(errors) * np_prod
     np_len += len(d)
-    np_ans = torch.pow(np_prod, 1.0 / np_len)
+    np_ans = np.power(np_prod, 1.0 / np_len)
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
+def test_integration():
+    def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -78,9 +77,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = GeometricMeanAbsoluteError(device=device)
-        assert m._device == torch.device(device)
-
+        m = GeometricMeanAbsoluteError()
         m.attach(engine, "gmae")
 
         np_y = y.numpy().ravel()
diff --git a/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py b/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
index 570f45fffeee..9f7b14422e48 100644
--- a/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_geometric_mean_relative_absolute_error.py
@@ -29,20 +29,15 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute(available_device):
+def test_compute():
     size = 51
     np_y_pred = np.random.rand(size)
     np_y = np.random.rand(size)
     np_gmrae = np.exp(np.log(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean())).mean())
 
-    m = GeometricMeanRelativeAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
-    y_pred = (
-        torch.from_numpy(np_y_pred).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(np_y_pred)
-    )
-    y = torch.from_numpy(np_y).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(np_y)
+    m = GeometricMeanRelativeAbsoluteError()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
@@ -50,7 +45,7 @@ def test_compute(available_device):
     assert np_gmrae == pytest.approx(m.compute())
 
 
-def test_integration(available_device):
+def test_integration():
     y_pred = torch.rand(size=(100,))
     y = torch.rand(size=(100,))
 
@@ -64,8 +59,7 @@ def update_fn(engine, batch):
 
     engine = Engine(update_fn)
 
-    m = GeometricMeanRelativeAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = GeometricMeanRelativeAbsoluteError()
     m.attach(engine, "gmrae")
 
     np_y = y.numpy().ravel()
diff --git a/tests/ignite/metrics/regression/test_kendall_correlation.py b/tests/ignite/metrics/regression/test_kendall_correlation.py
index 84e2f905c192..5dd55b0691b3 100644
--- a/tests/ignite/metrics/regression/test_kendall_correlation.py
+++ b/tests/ignite/metrics/regression/test_kendall_correlation.py
@@ -59,15 +59,14 @@ def test_wrong_variant():
 
 
 @pytest.mark.parametrize("variant", ["b", "c"])
-def test_kendall_correlation(variant: str, available_device):
+def test_kendall_correlation(variant: str):
     a = np.random.randn(4).astype(np.float32)
     b = np.random.randn(4).astype(np.float32)
     c = np.random.randn(4).astype(np.float32)
     d = np.random.randn(4).astype(np.float32)
     ground_truth = np.random.randn(4).astype(np.float32)
 
-    m = KendallRankCorrelation(variant=variant, device=available_device)
-    assert m._device == torch.device(available_device)
+    m = KendallRankCorrelation(variant=variant)
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_ans = kendalltau(a, ground_truth, variant=variant).statistic
@@ -100,7 +99,7 @@ def test_case(request):
 
 @pytest.mark.parametrize("n_times", range(5))
 @pytest.mark.parametrize("variant", ["b", "c"])
-def test_integration(n_times: int, variant: str, test_case: Tuple[Tensor, Tensor, int], available_device):
+def test_integration(n_times: int, variant: str, test_case: Tuple[Tensor, Tensor, int]):
     y_pred, y, batch_size = test_case
 
     np_y = y.numpy().ravel()
@@ -114,8 +113,7 @@ def update_fn(engine: Engine, batch):
 
     engine = Engine(update_fn)
 
-    m = KendallRankCorrelation(variant=variant, device=available_device)
-    assert m._device == torch.device(available_device)
+    m = KendallRankCorrelation(variant=variant)
     m.attach(engine, "kendall_tau")
 
     data = list(range(y_pred.shape[0] // batch_size))
diff --git a/tests/ignite/metrics/regression/test_manhattan_distance.py b/tests/ignite/metrics/regression/test_manhattan_distance.py
index ce43c9162313..50bdf321164b 100644
--- a/tests/ignite/metrics/regression/test_manhattan_distance.py
+++ b/tests/ignite/metrics/regression/test_manhattan_distance.py
@@ -20,46 +20,37 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mahattan_distance(available_device):
+def test_mahattan_distance():
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = ManhattanDistance(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = ManhattanDistance()
 
     manhattan = DistanceMetric.get_metric("manhattan")
-    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
-    torch_ground_truth = (
-        torch.from_numpy(ground_truth).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(ground_truth)
-    )
-    m.update((torch_a, torch_ground_truth))
+
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = np.abs(ground_truth - a).sum()
     assert m.compute() == pytest.approx(np_sum)
     assert manhattan.pairwise([a, ground_truth])[0][1] == pytest.approx(np_sum)
 
-    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
-    m.update((torch_b, torch_ground_truth))
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
     np_sum += np.abs(ground_truth - b).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([a, b])
     v2 = np.hstack([ground_truth, ground_truth])
     assert manhattan.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
-    m.update((torch_c, torch_ground_truth))
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
     np_sum += np.abs(ground_truth - c).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, c])
     v2 = np.hstack([v2, ground_truth])
     assert manhattan.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
-    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
-    m.update((torch_d, torch_ground_truth))
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
     np_sum += np.abs(ground_truth - d).sum()
     assert m.compute() == pytest.approx(np_sum)
     v1 = np.hstack([v1, d])
@@ -67,8 +58,8 @@ def test_mahattan_distance(available_device):
     assert manhattan.pairwise([v1, v2])[0][1] == pytest.approx(np_sum)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
+def test_integration():
+    def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -77,8 +68,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = ManhattanDistance(device=device)
-        assert m._device == torch.device(device)
+        m = ManhattanDistance()
         m.attach(engine, "md")
 
         np_y = y.numpy().ravel()
@@ -102,12 +92,11 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size, device=available_device)
+            _test(y_pred, y, batch_size)
 
 
-def test_error_is_not_nan(available_device):
-    m = ManhattanDistance(device=available_device)
-    assert m._device == torch.device(available_device)
+def test_error_is_not_nan():
+    m = ManhattanDistance()
     m.update((torch.zeros(4), torch.zeros(4)))
     assert not (torch.isnan(m._sum_of_errors).any() or torch.isinf(m._sum_of_errors).any()), m._sum_of_errors
 
diff --git a/tests/ignite/metrics/regression/test_maximum_absolute_error.py b/tests/ignite/metrics/regression/test_maximum_absolute_error.py
index 39558e943bc3..e5e0fb4369ff 100644
--- a/tests/ignite/metrics/regression/test_maximum_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_maximum_absolute_error.py
@@ -28,15 +28,14 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_maximum_absolute_error(available_device):
+def test_maximum_absolute_error():
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = MaximumAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MaximumAbsoluteError()
 
     np_ans = -1
 
@@ -61,8 +60,8 @@ def test_maximum_absolute_error(available_device):
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
+def test_integration():
+    def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -71,8 +70,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MaximumAbsoluteError(device=device)
-        assert m._device == torch.device(device)
+        m = MaximumAbsoluteError()
         m.attach(engine, "mae")
 
         np_y = y.numpy().ravel()
@@ -96,7 +94,7 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size, device=available_device)
+            _test(y_pred, y, batch_size)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py b/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py
index 98f1635aef5f..25b39860ed5b 100644
--- a/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py
+++ b/tests/ignite/metrics/regression/test_mean_absolute_relative_error.py
@@ -21,15 +21,14 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mean_absolute_relative_error(available_device):
+def test_mean_absolute_relative_error():
     a = torch.rand(4)
     b = torch.rand(4)
     c = torch.rand(4)
     d = torch.rand(4)
     ground_truth = torch.rand(4)
 
-    m = MeanAbsoluteRelativeError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MeanAbsoluteRelativeError()
 
     m.update((a, ground_truth))
     abs_error_a = torch.sum(torch.abs(ground_truth - a) / torch.abs(ground_truth))
@@ -79,8 +78,8 @@ def test_zero_sample():
         m.compute()
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
+def test_integration():
+    def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -89,8 +88,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MeanAbsoluteRelativeError(device=device)
-        assert m._device == torch.device(device)
+        m = MeanAbsoluteRelativeError()
         m.attach(engine, "mare")
 
         np_y = y.numpy().ravel()
@@ -116,7 +114,7 @@ def get_test_cases():
         # check multiple random inputs as random exact occurencies are rare
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size, device=available_device)
+            _test(y_pred, y, batch_size)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_mean_error.py b/tests/ignite/metrics/regression/test_mean_error.py
index e4c943dc3fb7..e0e7fc975606 100644
--- a/tests/ignite/metrics/regression/test_mean_error.py
+++ b/tests/ignite/metrics/regression/test_mean_error.py
@@ -26,52 +26,42 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mean_error(available_device):
+def test_mean_error():
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = MeanError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MeanError()
 
-    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
-    torch_ground_truth = (
-        torch.from_numpy(ground_truth).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(ground_truth)
-    )
-    m.update((torch_a, torch_ground_truth))
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = (ground_truth - a).sum()
     np_len = len(a)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
-    m.update((torch_b, torch_ground_truth))
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
     np_sum += (ground_truth - b).sum()
     np_len += len(b)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
-    m.update((torch_c, torch_ground_truth))
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
     np_sum += (ground_truth - c).sum()
     np_len += len(c)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
-    m.update((torch_d, torch_ground_truth))
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
     np_sum += (ground_truth - d).sum()
     np_len += len(d)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
+def test_integration():
+    def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -80,8 +70,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MeanError(device=device)
-        assert m._device == torch.device(device)
+        m = MeanError()
         m.attach(engine, "me")
 
         np_y = y.numpy().ravel()
@@ -106,7 +95,7 @@ def get_test_cases():
     for _ in range(5):
         test_cases = get_test_cases()
         for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size, device=available_device)
+            _test(y_pred, y, batch_size)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_mean_normalized_bias.py b/tests/ignite/metrics/regression/test_mean_normalized_bias.py
index 438c35d7043c..66129ee7d595 100644
--- a/tests/ignite/metrics/regression/test_mean_normalized_bias.py
+++ b/tests/ignite/metrics/regression/test_mean_normalized_bias.py
@@ -38,52 +38,42 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_mean_error(available_device):
+def test_mean_error():
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = MeanNormalizedBias(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MeanNormalizedBias()
 
-    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
-    torch_ground_truth = (
-        torch.from_numpy(ground_truth).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(ground_truth)
-    )
-    m.update((torch_a, torch_ground_truth))
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = ((ground_truth - a) / ground_truth).sum()
     np_len = len(a)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
-    m.update((torch_b, torch_ground_truth))
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
     np_sum += ((ground_truth - b) / ground_truth).sum()
     np_len += len(b)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
-    m.update((torch_c, torch_ground_truth))
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
     np_sum += ((ground_truth - c) / ground_truth).sum()
     np_len += len(c)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
-    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
-    m.update((torch_d, torch_ground_truth))
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
     np_sum += ((ground_truth - d) / ground_truth).sum()
     np_len += len(d)
     np_ans = np_sum / np_len
     assert m.compute() == pytest.approx(np_ans)
 
 
-def test_integration(available_device):
-    def _test(y_pred, y, batch_size, device="cpu"):
+def test_integration():
+    def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
             y_true_batch = np_y[idx : idx + batch_size]
@@ -92,8 +82,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = MeanNormalizedBias(device=device)
-        assert m._device == torch.device(device)
+        m = MeanNormalizedBias()
         m.attach(engine, "mnb")
 
         np_y = y.numpy().ravel()
diff --git a/tests/ignite/metrics/regression/test_median_absolute_error.py b/tests/ignite/metrics/regression/test_median_absolute_error.py
index a58100b38f80..7ea373e46a7e 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_error.py
@@ -34,36 +34,38 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_absolute_error(available_device):
+def test_median_absolute_error():
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
     # Size of dataset will be odd for these tests
 
     size = 51
-    y_pred = torch.rand(size)
-    y = torch.rand(size)
-    expected_median_absolute_error = torch.median(torch.abs((y - y_pred).cpu())).item()
+    np_y_pred = np.random.rand(size)
+    np_y = np.random.rand(size)
+    np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
 
-    m = MedianAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianAbsoluteError()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert expected_median_absolute_error == pytest.approx(m.compute())
+    assert np_median_absolute_error == pytest.approx(m.compute())
 
 
-def test_median_absolute_error_2(available_device):
+def test_median_absolute_error_2():
     np.random.seed(1)
     size = 105
-    y_pred = torch.rand(size, 1)
-    y = torch.rand(size, 1)
-    y = y[torch.randperm(size)]
-    expected_median_absolute_error = torch.median(torch.abs(y.cpu() - y_pred.cpu())).item()
+    np_y_pred = np.random.rand(size, 1)
+    np_y = np.random.rand(size, 1)
+    np.random.shuffle(np_y)
+    np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
 
-    m = MedianAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianAbsoluteError()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -72,33 +74,34 @@ def test_median_absolute_error_2(available_device):
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert expected_median_absolute_error == pytest.approx(m.compute())
+    assert np_median_absolute_error == pytest.approx(m.compute())
 
 
-def test_integration_median_absolute_error(available_device):
+def test_integration_median_absolute_error():
     np.random.seed(1)
     size = 105
-    y_pred = torch.rand(size, 1)
-    y = torch.rand(size, 1)
-    y = y[torch.randperm(size)]
+    np_y_pred = np.random.rand(size, 1)
+    np_y = np.random.rand(size, 1)
+    np.random.shuffle(np_y)
+    np_median_absolute_error = np.median(np.abs(np_y - np_y_pred))
 
-    expected = torch.median(torch.abs(y.cpu() - y_pred.cpu())).item()
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        return y_pred[idx : idx + batch_size], y[idx : idx + batch_size]
+        y_true_batch = np_y[idx : idx + batch_size]
+        y_pred_batch = np_y_pred[idx : idx + batch_size]
+        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
 
     engine = Engine(update_fn)
 
-    m = MedianAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianAbsoluteError()
     m.attach(engine, "median_absolute_error")
 
     data = list(range(size // batch_size))
     median_absolute_error = engine.run(data, max_epochs=1).metrics["median_absolute_error"]
 
-    assert expected == pytest.approx(median_absolute_error)
+    assert np_median_absolute_error == pytest.approx(median_absolute_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
index 347d97e372d0..fa8c549a5070 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
@@ -34,44 +34,38 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_absolute_percentage_error(available_device):
+def test_median_absolute_percentage_error():
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
     # Size of dataset will be odd for these tests
 
     size = 51
-    y_pred = torch.rand(size)
-    y = torch.rand(size)
+    np_y_pred = np.random.rand(size)
+    np_y = np.random.rand(size)
+    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
 
-    epsilon = 1e-8
-    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
-    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
-
-    m = MedianAbsolutePercentageError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianAbsolutePercentageError()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert expected == pytest.approx(m.compute())
+    assert np_median_absolute_percentage_error == pytest.approx(m.compute())
 
 
-def test_median_absolute_percentage_error_2(available_device):
+def test_median_absolute_percentage_error_2():
     np.random.seed(1)
     size = 105
-    y_pred = torch.rand(size, 1)
-    y = torch.rand(size, 1)
-
-    indices = torch.randperm(size)
-    y = y[indices]
-
-    epsilon = 1e-8
-    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
-    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
+    np_y_pred = np.random.rand(size, 1)
+    np_y = np.random.rand(size, 1)
+    np.random.shuffle(np_y)
+    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
 
-    m = MedianAbsolutePercentageError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianAbsolutePercentageError()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -80,38 +74,34 @@ def test_median_absolute_percentage_error_2(available_device):
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert expected == pytest.approx(m.compute())
+    assert np_median_absolute_percentage_error == pytest.approx(m.compute())
 
 
-def test_integration_median_absolute_percentage_error(available_device):
+def test_integration_median_absolute_percentage_error():
     np.random.seed(1)
     size = 105
-    y_pred = torch.rand(size, 1)
-    y = torch.rand(size, 1)
-
-    indices = torch.randperm(size)
-    y = y[indices]
-
-    epsilon = 1e-8
-    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
-    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
+    np_y_pred = np.random.rand(size, 1)
+    np_y = np.random.rand(size, 1)
+    np.random.shuffle(np_y)
+    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
 
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        return y_pred[idx : idx + batch_size], y[idx : idx + batch_size]
+        y_true_batch = np_y[idx : idx + batch_size]
+        y_pred_batch = np_y_pred[idx : idx + batch_size]
+        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
 
     engine = Engine(update_fn)
 
-    m = MedianAbsolutePercentageError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianAbsolutePercentageError()
     m.attach(engine, "median_absolute_percentage_error")
 
     data = list(range(size // batch_size))
     median_absolute_percentage_error = engine.run(data, max_epochs=1).metrics["median_absolute_percentage_error"]
 
-    assert expected == pytest.approx(median_absolute_percentage_error)
+    assert np_median_absolute_percentage_error == pytest.approx(median_absolute_percentage_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
index 526d2f4ad136..b2574632c15c 100644
--- a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
@@ -34,40 +34,38 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_relative_absolute_error(available_device):
+def test_median_relative_absolute_error():
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
     # Size of dataset will be odd for these tests
 
     size = 51
-    y_pred = torch.rand(size)
-    y = torch.rand(size)
+    np_y_pred = np.random.rand(size)
+    np_y = np.random.rand(size)
+    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
 
-    baseline = torch.abs(y - y.mean())
-    expected = torch.median((torch.abs(y - y_pred) / baseline).cpu()).item()
-
-    m = MedianRelativeAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianRelativeAbsoluteError()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert expected == pytest.approx(m.compute())
+    assert np_median_absolute_relative_error == pytest.approx(m.compute())
 
 
-def test_median_relative_absolute_error_2(available_device):
+def test_median_relative_absolute_error_2():
     np.random.seed(1)
     size = 105
-    y_pred = torch.rand(size, 1)
-    y = torch.rand(size, 1)
-    y = y[torch.randperm(size)]
-
-    baseline = torch.abs(y - y.mean())
-    expected = torch.median((torch.abs(y - y_pred) / baseline).cpu()).item()
+    np_y_pred = np.random.rand(size, 1)
+    np_y = np.random.rand(size, 1)
+    np.random.shuffle(np_y)
+    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
 
-    m = MedianRelativeAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianRelativeAbsoluteError()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -76,37 +74,34 @@ def test_median_relative_absolute_error_2(available_device):
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert expected == pytest.approx(m.compute())
+    assert np_median_absolute_relative_error == pytest.approx(m.compute())
 
 
-def test_integration_median_relative_absolute_error_with_output_transform(available_device):
+def test_integration_median_relative_absolute_error_with_output_transform():
     np.random.seed(1)
     size = 105
-    y_pred = torch.rand(size, 1)
-    y = torch.rand(size, 1)
-    y = y[torch.randperm(size)]  # shuffle y
-
-    baseline = torch.abs(y - y.mean())
-    expected = torch.median((torch.abs(y - y_pred) / baseline.cpu()).cpu()).item()
+    np_y_pred = np.random.rand(size, 1)
+    np_y = np.random.rand(size, 1)
+    np.random.shuffle(np_y)
+    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
 
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = y[idx : idx + batch_size]
-        y_pred_batch = y_pred[idx : idx + batch_size]
-        return y_pred_batch, y_true_batch
+        y_true_batch = np_y[idx : idx + batch_size]
+        y_pred_batch = np_y_pred[idx : idx + batch_size]
+        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
 
     engine = Engine(update_fn)
 
-    m = MedianRelativeAbsoluteError(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = MedianRelativeAbsoluteError()
     m.attach(engine, "median_absolute_relative_error")
 
     data = list(range(size // batch_size))
     median_absolute_relative_error = engine.run(data, max_epochs=1).metrics["median_absolute_relative_error"]
 
-    assert expected == pytest.approx(median_absolute_relative_error)
+    assert np_median_absolute_relative_error == pytest.approx(median_absolute_relative_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_pearson_correlation.py b/tests/ignite/metrics/regression/test_pearson_correlation.py
index 351b992a135e..b16a54c246e8 100644
--- a/tests/ignite/metrics/regression/test_pearson_correlation.py
+++ b/tests/ignite/metrics/regression/test_pearson_correlation.py
@@ -43,20 +43,17 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_degenerated_sample(available_device):
+def test_degenerated_sample():
     # one sample
-    m = PearsonCorrelation(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = PearsonCorrelation()
     y_pred = torch.tensor([1.0])
     y = torch.tensor([1.0])
     m.update((y_pred, y))
 
-    np_y_pred = y_pred.cpu().numpy()
-    np_y = y_pred.cpu().numpy()
-    expected = np_corr_eps(np_y_pred, np_y)
-    actual = m.compute()
-
-    assert pytest.approx(expected) == actual
+    np_y_pred = y_pred.numpy()
+    np_y = y_pred.numpy()
+    np_res = np_corr_eps(np_y_pred, np_y)
+    assert pytest.approx(np_res) == m.compute()
 
     # constant samples
     m.reset()
@@ -64,23 +61,20 @@ def test_degenerated_sample(available_device):
     y = torch.zeros(10).float()
     m.update((y_pred, y))
 
-    np_y_pred = y_pred.cpu().numpy()
-    np_y = y_pred.cpu().numpy()
-    expected = np_corr_eps(np_y_pred, np_y)
-    actual = m.compute()
-
-    assert pytest.approx(expected) == actual
+    np_y_pred = y_pred.numpy()
+    np_y = y_pred.numpy()
+    np_res = np_corr_eps(np_y_pred, np_y)
+    assert pytest.approx(np_res) == m.compute()
 
 
-def test_pearson_correlation(available_device):
+def test_pearson_correlation():
     a = np.random.randn(4).astype(np.float32)
     b = np.random.randn(4).astype(np.float32)
     c = np.random.randn(4).astype(np.float32)
     d = np.random.randn(4).astype(np.float32)
     ground_truth = np.random.randn(4).astype(np.float32)
 
-    m = PearsonCorrelation(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = PearsonCorrelation()
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_ans = scipy_corr(a, ground_truth)
@@ -112,7 +106,7 @@ def test_case(request):
 
 
 @pytest.mark.parametrize("n_times", range(5))
-def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int], available_device):
+def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int]):
     y_pred, y, batch_size = test_case
 
     def update_fn(engine: Engine, batch):
@@ -123,8 +117,7 @@ def update_fn(engine: Engine, batch):
 
     engine = Engine(update_fn)
 
-    m = PearsonCorrelation(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = PearsonCorrelation()
     m.attach(engine, "corr")
 
     np_y = y.numpy().ravel()
@@ -138,9 +131,8 @@ def update_fn(engine: Engine, batch):
     assert pytest.approx(np_ans, rel=2e-4) == corr
 
 
-def test_accumulator_detached(available_device):
-    corr = PearsonCorrelation(device=available_device)
-    assert corr._device == torch.device(available_device)
+def test_accumulator_detached():
+    corr = PearsonCorrelation()
 
     y_pred = torch.tensor([2.0, 3.0], requires_grad=True)
     y = torch.tensor([-2.0, -1.0])
diff --git a/tests/ignite/metrics/regression/test_r2_score.py b/tests/ignite/metrics/regression/test_r2_score.py
index 423810737d0c..62ad0e14938f 100644
--- a/tests/ignite/metrics/regression/test_r2_score.py
+++ b/tests/ignite/metrics/regression/test_r2_score.py
@@ -27,19 +27,14 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_r2_score(available_device):
+def test_r2_score():
     size = 51
     np_y_pred = np.random.rand(size)
     np_y = np.random.rand(size)
 
-    m = R2Score(device=available_device)
-    assert m._device == torch.device(available_device)
-    y_pred = (
-        torch.from_numpy(np_y_pred).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(np_y_pred)
-    )
-    y = torch.from_numpy(np_y).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(np_y)
+    m = R2Score()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     m.update((y_pred, y))
@@ -47,21 +42,16 @@ def test_r2_score(available_device):
     assert r2_score(np_y, np_y_pred) == pytest.approx(m.compute())
 
 
-def test_r2_score_2(available_device):
+def test_r2_score_2():
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
     np_y = np.random.rand(size, 1)
     np.random.shuffle(np_y)
 
-    m = R2Score(device=available_device)
-    assert m._device == torch.device(available_device)
-    y_pred = (
-        torch.from_numpy(np_y_pred).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(np_y_pred)
-    )
-    y = torch.from_numpy(np_y).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(np_y)
+    m = R2Score()
+    y_pred = torch.from_numpy(np_y_pred)
+    y = torch.from_numpy(np_y)
 
     m.reset()
     batch_size = 16
@@ -73,7 +63,7 @@ def test_r2_score_2(available_device):
     assert r2_score(np_y, np_y_pred) == pytest.approx(m.compute())
 
 
-def test_integration_r2_score(available_device):
+def test_integration_r2_score():
     np.random.seed(1)
     size = 105
     np_y_pred = np.random.rand(size, 1)
@@ -86,22 +76,11 @@ def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
         y_true_batch = np_y[idx : idx + batch_size]
         y_pred_batch = np_y_pred[idx : idx + batch_size]
-        torch_y_pred_batch = (
-            torch.from_numpy(y_pred_batch).to(dtype=torch.float32)
-            if available_device == "mps"
-            else torch.from_numpy(y_pred_batch)
-        )
-        torch_y_true_batch = (
-            torch.from_numpy(y_true_batch).to(dtype=torch.float32)
-            if available_device == "mps"
-            else torch.from_numpy(y_true_batch)
-        )
-        return torch_y_pred_batch, torch_y_true_batch
+        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
 
     engine = Engine(update_fn)
 
-    m = R2Score(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = R2Score()
     m.attach(engine, "r2_score")
 
     data = list(range(size // batch_size))
diff --git a/tests/ignite/metrics/regression/test_spearman_correlation.py b/tests/ignite/metrics/regression/test_spearman_correlation.py
index d9c4e47fa359..4aac6221f629 100644
--- a/tests/ignite/metrics/regression/test_spearman_correlation.py
+++ b/tests/ignite/metrics/regression/test_spearman_correlation.py
@@ -53,15 +53,14 @@ def test_wrong_y_dtype():
         metric.update((y_pred, y))
 
 
-def test_spearman_correlation(available_device):
+def test_spearman_correlation():
     a = np.random.randn(4).astype(np.float32)
     b = np.random.randn(4).astype(np.float32)
     c = np.random.randn(4).astype(np.float32)
     d = np.random.randn(4).astype(np.float32)
     ground_truth = np.random.randn(4).astype(np.float32)
 
-    m = SpearmanRankCorrelation(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = SpearmanRankCorrelation()
 
     m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_ans = spearmanr(a, ground_truth).statistic
@@ -93,7 +92,7 @@ def test_case(request):
 
 
 @pytest.mark.parametrize("n_times", range(5))
-def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int], available_device):
+def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int]):
     y_pred, y, batch_size = test_case
 
     np_y = y.numpy().ravel()
@@ -107,8 +106,7 @@ def update_fn(engine: Engine, batch):
 
     engine = Engine(update_fn)
 
-    m = SpearmanRankCorrelation(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = SpearmanRankCorrelation()
     m.attach(engine, "spearman_corr")
 
     data = list(range(y_pred.shape[0] // batch_size))
diff --git a/tests/ignite/metrics/regression/test_wave_hedges_distance.py b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
index b85dde8f55ab..376dd70b8c22 100644
--- a/tests/ignite/metrics/regression/test_wave_hedges_distance.py
+++ b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
@@ -19,43 +19,33 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute(available_device):
+def test_compute():
     a = np.random.randn(4)
     b = np.random.randn(4)
     c = np.random.randn(4)
     d = np.random.randn(4)
     ground_truth = np.random.randn(4)
 
-    m = WaveHedgesDistance(device=available_device)
-    assert m._device == torch.device(available_device)
+    m = WaveHedgesDistance()
 
-    torch_a = torch.from_numpy(a).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(a)
-    torch_ground_truth = (
-        torch.from_numpy(ground_truth).to(dtype=torch.float32)
-        if available_device == "mps"
-        else torch.from_numpy(ground_truth)
-    )
-    m.update((torch_a, torch_ground_truth))
+    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
     np_sum = (np.abs(ground_truth - a) / np.maximum.reduce([a, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 
-    torch_b = torch.from_numpy(b).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(b)
-    m.update((torch_b, torch_ground_truth))
+    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
     np_sum += (np.abs(ground_truth - b) / np.maximum.reduce([b, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 
-    torch_c = torch.from_numpy(c).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(c)
-    m.update((torch_c, torch_ground_truth))
+    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
     np_sum += (np.abs(ground_truth - c) / np.maximum.reduce([c, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 
-    torch_d = torch.from_numpy(d).to(dtype=torch.float32) if available_device == "mps" else torch.from_numpy(d)
-    m.update((torch_d, torch_ground_truth))
+    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
     np_sum += (np.abs(ground_truth - d) / np.maximum.reduce([d, ground_truth])).sum()
     assert m.compute() == pytest.approx(np_sum)
 
 
-def test_integration(available_device):
+def test_integration():
     def _test(y_pred, y, batch_size):
         def update_fn(engine, batch):
             idx = (engine.state.iteration - 1) * batch_size
@@ -65,8 +55,7 @@ def update_fn(engine, batch):
 
         engine = Engine(update_fn)
 
-        m = WaveHedgesDistance(device=available_device)
-        assert m._device == torch.device(available_device)
+        m = WaveHedgesDistance()
         m.attach(engine, "whd")
 
         np_y = y.numpy().ravel()

From 45af2f9e87e6affcb87d2fc210786ad83fdfa535 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Sat, 3 May 2025 17:14:32 +0000
Subject: [PATCH 30/32] test_median_absolute_percentage_error.py
 test_median_relative_absolute_error.py test_pearson_correlation.py
 test_r2_score.py test_spearman_correlation.py test_wave_hedges_distance.py

---
 .../test_median_absolute_percentage_error.py  |  66 +++++-----
 .../test_median_relative_absolute_error.py    |  61 ++++-----
 .../regression/test_pearson_correlation.py    |  71 +++++-----
 .../metrics/regression/test_r2_score.py       |  70 ++++++----
 .../regression/test_spearman_correlation.py   |  70 +++++-----
 .../regression/test_wave_hedges_distance.py   | 122 +++++++++---------
 6 files changed, 251 insertions(+), 209 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
index fa8c549a5070..2c0340779187 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
@@ -34,38 +34,43 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_absolute_percentage_error():
+def test_median_absolute_percentage_error(available_device):
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
     # Size of dataset will be odd for these tests
 
     size = 51
-    np_y_pred = np.random.rand(size)
-    np_y = np.random.rand(size)
-    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
+    y_pred = torch.rand(size)
+    y = torch.rand(size)
 
-    m = MedianAbsolutePercentageError()
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    epsilon = 1e-8
+    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
+    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
+
+    m = MedianAbsolutePercentageError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert np_median_absolute_percentage_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
-def test_median_absolute_percentage_error_2():
-    np.random.seed(1)
+def test_median_absolute_percentage_error_2(available_device):
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
 
-    m = MedianAbsolutePercentageError()
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    indices = torch.randperm(size)
+    y = y[indices]
+
+    epsilon = 1e-8
+    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
+    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
+
+    m = MedianAbsolutePercentageError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.reset()
     batch_size = 16
@@ -74,34 +79,37 @@ def test_median_absolute_percentage_error_2():
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert np_median_absolute_percentage_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
-def test_integration_median_absolute_percentage_error():
-    np.random.seed(1)
+def test_integration_median_absolute_percentage_error(available_device):
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_percentage_error = 100.0 * np.median(np.abs(np_y - np_y_pred) / np.abs(np_y))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+
+    indices = torch.randperm(size)
+    y = y[indices]
+
+    epsilon = 1e-8
+    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
+    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
 
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        return y_pred[idx : idx + batch_size], y[idx : idx + batch_size]
 
     engine = Engine(update_fn)
 
-    m = MedianAbsolutePercentageError()
+    m = MedianAbsolutePercentageError(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "median_absolute_percentage_error")
 
     data = list(range(size // batch_size))
     median_absolute_percentage_error = engine.run(data, max_epochs=1).metrics["median_absolute_percentage_error"]
 
-    assert np_median_absolute_percentage_error == pytest.approx(median_absolute_percentage_error)
+    assert expected == pytest.approx(median_absolute_percentage_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
index b2574632c15c..fde7e0bd0d11 100644
--- a/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
+++ b/tests/ignite/metrics/regression/test_median_relative_absolute_error.py
@@ -34,38 +34,39 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4), torch.rand(4, 1, 2)))
 
 
-def test_median_relative_absolute_error():
+def test_median_relative_absolute_error(available_device):
     # See https://github.com/torch/torch7/pull/182
     # For even number of elements, PyTorch returns middle element
     # NumPy returns average of middle elements
     # Size of dataset will be odd for these tests
 
     size = 51
-    np_y_pred = np.random.rand(size)
-    np_y = np.random.rand(size)
-    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
+    y_pred = torch.rand(size)
+    y = torch.rand(size)
 
-    m = MedianRelativeAbsoluteError()
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    baseline = torch.abs(y - y.mean())
+    expected = torch.median((torch.abs(y - y_pred) / baseline).cpu()).item()
+
+    m = MedianRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert np_median_absolute_relative_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
-def test_median_relative_absolute_error_2():
-    np.random.seed(1)
+def test_median_relative_absolute_error_2(available_device):
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+    y = y[torch.randperm(size)]
 
-    m = MedianRelativeAbsoluteError()
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    baseline = torch.abs(y - y.mean())
+    expected = torch.median((torch.abs(y - y_pred) / baseline).cpu()).item()
+
+    m = MedianRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.reset()
     batch_size = 16
@@ -74,34 +75,36 @@ def test_median_relative_absolute_error_2():
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert np_median_absolute_relative_error == pytest.approx(m.compute())
+    assert expected == pytest.approx(m.compute())
 
 
-def test_integration_median_relative_absolute_error_with_output_transform():
-    np.random.seed(1)
+def test_integration_median_relative_absolute_error_with_output_transform(available_device):
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
-    np_median_absolute_relative_error = np.median(np.abs(np_y - np_y_pred) / np.abs(np_y - np_y.mean()))
+    y_pred = torch.rand(size, 1)
+    y = torch.rand(size, 1)
+    y = y[torch.randperm(size)]  # shuffle y
+
+    baseline = torch.abs(y - y.mean())
+    expected = torch.median((torch.abs(y - y_pred) / baseline.cpu()).cpu()).item()
 
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        y_true_batch = y[idx : idx + batch_size]
+        y_pred_batch = y_pred[idx : idx + batch_size]
+        return y_pred_batch, y_true_batch
 
     engine = Engine(update_fn)
 
-    m = MedianRelativeAbsoluteError()
+    m = MedianRelativeAbsoluteError(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "median_absolute_relative_error")
 
     data = list(range(size // batch_size))
     median_absolute_relative_error = engine.run(data, max_epochs=1).metrics["median_absolute_relative_error"]
 
-    assert np_median_absolute_relative_error == pytest.approx(median_absolute_relative_error)
+    assert expected == pytest.approx(median_absolute_relative_error)
 
 
 def _test_distrib_compute(device):
diff --git a/tests/ignite/metrics/regression/test_pearson_correlation.py b/tests/ignite/metrics/regression/test_pearson_correlation.py
index b16a54c246e8..cc8c8a348e86 100644
--- a/tests/ignite/metrics/regression/test_pearson_correlation.py
+++ b/tests/ignite/metrics/regression/test_pearson_correlation.py
@@ -43,17 +43,20 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_degenerated_sample():
+def test_degenerated_sample(available_device):
     # one sample
-    m = PearsonCorrelation()
+    m = PearsonCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
     y_pred = torch.tensor([1.0])
     y = torch.tensor([1.0])
     m.update((y_pred, y))
 
-    np_y_pred = y_pred.numpy()
-    np_y = y_pred.numpy()
-    np_res = np_corr_eps(np_y_pred, np_y)
-    assert pytest.approx(np_res) == m.compute()
+    np_y_pred = y_pred.cpu().numpy()
+    np_y = y_pred.cpu().numpy()
+    expected = np_corr_eps(np_y_pred, np_y)
+    actual = m.compute()
+
+    assert pytest.approx(expected) == actual
 
     # constant samples
     m.reset()
@@ -61,36 +64,36 @@ def test_degenerated_sample():
     y = torch.zeros(10).float()
     m.update((y_pred, y))
 
-    np_y_pred = y_pred.numpy()
-    np_y = y_pred.numpy()
-    np_res = np_corr_eps(np_y_pred, np_y)
-    assert pytest.approx(np_res) == m.compute()
+    np_y_pred = y_pred.cpu().numpy()
+    np_y = y_pred.cpu().numpy()
+    expected = np_corr_eps(np_y_pred, np_y)
+    actual = m.compute()
 
+    assert pytest.approx(expected) == actual
 
-def test_pearson_correlation():
-    a = np.random.randn(4).astype(np.float32)
-    b = np.random.randn(4).astype(np.float32)
-    c = np.random.randn(4).astype(np.float32)
-    d = np.random.randn(4).astype(np.float32)
-    ground_truth = np.random.randn(4).astype(np.float32)
 
-    m = PearsonCorrelation()
+def test_pearson_correlation(available_device):
+    torch.manual_seed(1)
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
-    np_ans = scipy_corr(a, ground_truth)
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    inputs = [torch.randn(4) for _ in range(4)]
+    ground_truth = torch.randn(4)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
-    np_ans = scipy_corr(np.concatenate([a, b]), np.concatenate([ground_truth] * 2))
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    m = PearsonCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
-    np_ans = scipy_corr(np.concatenate([a, b, c]), np.concatenate([ground_truth] * 3))
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    all_preds = []
+    all_targets = []
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
-    np_ans = scipy_corr(np.concatenate([a, b, c, d]), np.concatenate([ground_truth] * 4))
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    for i, pred in enumerate(inputs, 1):
+        m.update((pred, ground_truth))
+        all_preds.append(pred)
+        all_targets.append(ground_truth)
+
+        pred_concat = torch.cat(all_preds).cpu().numpy()
+        target_concat = torch.cat(all_targets).cpu().numpy()
+        expected = pearsonr(pred_concat, target_concat)[0]
+
+        assert m.compute() == pytest.approx(expected, rel=1e-4)
 
 
 @pytest.fixture(params=list(range(2)))
@@ -106,7 +109,7 @@ def test_case(request):
 
 
 @pytest.mark.parametrize("n_times", range(5))
-def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int]):
+def test_integration_pearson_correlation(n_times, test_case: Tuple[Tensor, Tensor, int], available_device):
     y_pred, y, batch_size = test_case
 
     def update_fn(engine: Engine, batch):
@@ -117,7 +120,8 @@ def update_fn(engine: Engine, batch):
 
     engine = Engine(update_fn)
 
-    m = PearsonCorrelation()
+    m = PearsonCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "corr")
 
     np_y = y.numpy().ravel()
@@ -131,8 +135,9 @@ def update_fn(engine: Engine, batch):
     assert pytest.approx(np_ans, rel=2e-4) == corr
 
 
-def test_accumulator_detached():
-    corr = PearsonCorrelation()
+def test_accumulator_detached(available_device):
+    corr = PearsonCorrelation(device=available_device)
+    assert corr._device == torch.device(available_device)
 
     y_pred = torch.tensor([2.0, 3.0], requires_grad=True)
     y = torch.tensor([-2.0, -1.0])
diff --git a/tests/ignite/metrics/regression/test_r2_score.py b/tests/ignite/metrics/regression/test_r2_score.py
index 62ad0e14938f..c417183b4b98 100644
--- a/tests/ignite/metrics/regression/test_r2_score.py
+++ b/tests/ignite/metrics/regression/test_r2_score.py
@@ -1,6 +1,5 @@
 import os
 
-import numpy as np
 import pytest
 import torch
 from sklearn.metrics import r2_score
@@ -27,31 +26,41 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_r2_score():
+def test_r2_score(available_device):
+    torch.manual_seed(42)
     size = 51
-    np_y_pred = np.random.rand(size)
-    np_y = np.random.rand(size)
 
-    m = R2Score()
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    y_pred = torch.rand(size, dtype=torch.float32)
+    y = torch.rand(size, dtype=torch.float32)
+
+    if available_device == "mps":
+        y_pred = y_pred.to("mps")
+        y = y.to("mps")
+
+    m = R2Score(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.reset()
     m.update((y_pred, y))
 
-    assert r2_score(np_y, np_y_pred) == pytest.approx(m.compute())
+    expected = r2_score(y.cpu().numpy(), y_pred.cpu().numpy())
+    assert m.compute() == pytest.approx(expected)
 
 
-def test_r2_score_2():
-    np.random.seed(1)
+def test_r2_score_2(available_device):
+    torch.manual_seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
+    y_pred = torch.rand(size, 1, dtype=torch.float32)
+    y = torch.rand(size, 1, dtype=torch.float32)
 
-    m = R2Score()
-    y_pred = torch.from_numpy(np_y_pred)
-    y = torch.from_numpy(np_y)
+    y = y[torch.randperm(size)]
+
+    if available_device == "mps":
+        y_pred = y_pred.to("mps")
+        y = y.to("mps")
+
+    m = R2Score(device=available_device)
+    assert m._device == torch.device(available_device)
 
     m.reset()
     batch_size = 16
@@ -60,33 +69,40 @@ def test_r2_score_2():
         idx = i * batch_size
         m.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
 
-    assert r2_score(np_y, np_y_pred) == pytest.approx(m.compute())
+    expected = r2_score(y.cpu().numpy(), y_pred.cpu().numpy())
+    assert m.compute() == pytest.approx(expected)
 
 
-def test_integration_r2_score():
-    np.random.seed(1)
+def test_integration_r2_score(available_device):
+    torch.manual_seed(1)
     size = 105
-    np_y_pred = np.random.rand(size, 1)
-    np_y = np.random.rand(size, 1)
-    np.random.shuffle(np_y)
+    y_pred = torch.rand(size, 1, dtype=torch.float32)
+    y = torch.rand(size, 1, dtype=torch.float32)
+
+    # Shuffle targets
+    y = y[torch.randperm(size)]
+
+    if available_device == "mps":
+        y_pred = y_pred.to("mps")
+        y = y.to("mps")
 
     batch_size = 15
 
     def update_fn(engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        return y_pred[idx : idx + batch_size], y[idx : idx + batch_size]
 
     engine = Engine(update_fn)
 
-    m = R2Score()
+    m = R2Score(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "r2_score")
 
     data = list(range(size // batch_size))
     r_squared = engine.run(data, max_epochs=1).metrics["r2_score"]
 
-    assert r2_score(np_y, np_y_pred) == pytest.approx(r_squared)
+    expected = r2_score(y.cpu().numpy(), y_pred.cpu().numpy())
+    assert r_squared == pytest.approx(expected)
 
 
 def _test_distrib_compute(device, tol=1e-6):
diff --git a/tests/ignite/metrics/regression/test_spearman_correlation.py b/tests/ignite/metrics/regression/test_spearman_correlation.py
index 4aac6221f629..9848aa8645b5 100644
--- a/tests/ignite/metrics/regression/test_spearman_correlation.py
+++ b/tests/ignite/metrics/regression/test_spearman_correlation.py
@@ -1,6 +1,5 @@
 from typing import Tuple
 
-import numpy as np
 import pytest
 
 import torch
@@ -53,30 +52,42 @@ def test_wrong_y_dtype():
         metric.update((y_pred, y))
 
 
-def test_spearman_correlation():
-    a = np.random.randn(4).astype(np.float32)
-    b = np.random.randn(4).astype(np.float32)
-    c = np.random.randn(4).astype(np.float32)
-    d = np.random.randn(4).astype(np.float32)
-    ground_truth = np.random.randn(4).astype(np.float32)
+def test_spearman_correlation(available_device):
+    torch.manual_seed(0)
 
-    m = SpearmanRankCorrelation()
+    a = torch.randn(4)
+    b = torch.randn(4)
+    c = torch.randn(4)
+    d = torch.randn(4)
+    ground_truth = torch.randn(4)
 
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
-    np_ans = spearmanr(a, ground_truth).statistic
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    m = SpearmanRankCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
 
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
-    np_ans = spearmanr(np.concatenate([a, b]), np.concatenate([ground_truth] * 2)).statistic
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    m.update((a, ground_truth))
+    expected = spearmanr(a.numpy(), ground_truth.numpy()).statistic
+    assert m.compute() == pytest.approx(expected, rel=1e-4)
 
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
-    np_ans = spearmanr(np.concatenate([a, b, c]), np.concatenate([ground_truth] * 3)).statistic
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    m.update((b, ground_truth))
+    expected = spearmanr(
+        torch.cat([a, b]).numpy(),
+        torch.cat([ground_truth, ground_truth]).numpy(),
+    ).statistic
+    assert m.compute() == pytest.approx(expected, rel=1e-4)
 
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
-    np_ans = spearmanr(np.concatenate([a, b, c, d]), np.concatenate([ground_truth] * 4)).statistic
-    assert m.compute() == pytest.approx(np_ans, rel=1e-4)
+    m.update((c, ground_truth))
+    expected = spearmanr(
+        torch.cat([a, b, c]).numpy(),
+        torch.cat([ground_truth] * 3).numpy(),
+    ).statistic
+    assert m.compute() == pytest.approx(expected, rel=1e-4)
+
+    m.update((d, ground_truth))
+    expected = spearmanr(
+        torch.cat([a, b, c, d]).numpy(),
+        torch.cat([ground_truth] * 4).numpy(),
+    ).statistic
+    assert m.compute() == pytest.approx(expected, rel=1e-4)
 
 
 @pytest.fixture(params=list(range(2)))
@@ -92,29 +103,28 @@ def test_case(request):
 
 
 @pytest.mark.parametrize("n_times", range(5))
-def test_integration(n_times, test_case: Tuple[Tensor, Tensor, int]):
+def test_integration_spearman_correlation(n_times, test_case: Tuple[Tensor, Tensor, int], available_device):
     y_pred, y, batch_size = test_case
 
-    np_y = y.numpy().ravel()
-    np_y_pred = y_pred.numpy().ravel()
-
     def update_fn(engine: Engine, batch):
         idx = (engine.state.iteration - 1) * batch_size
-        y_true_batch = np_y[idx : idx + batch_size]
-        y_pred_batch = np_y_pred[idx : idx + batch_size]
-        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
+        y_true_batch = y[idx : idx + batch_size]
+        y_pred_batch = y_pred[idx : idx + batch_size]
+        return y_pred_batch, y_true_batch
 
     engine = Engine(update_fn)
 
-    m = SpearmanRankCorrelation()
+    m = SpearmanRankCorrelation(device=available_device)
+    assert m._device == torch.device(available_device)
     m.attach(engine, "spearman_corr")
 
     data = list(range(y_pred.shape[0] // batch_size))
     corr = engine.run(data, max_epochs=1).metrics["spearman_corr"]
 
-    np_ans = spearmanr(np_y_pred, np_y).statistic
+    # Convert only for computing the expected value
+    expected = spearmanr(y_pred.numpy().ravel(), y.numpy().ravel()).statistic
 
-    assert pytest.approx(np_ans, rel=2e-4) == corr
+    assert pytest.approx(expected, rel=2e-4) == corr
 
 
 @pytest.mark.usefixtures("distributed")
diff --git a/tests/ignite/metrics/regression/test_wave_hedges_distance.py b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
index 376dd70b8c22..818c49e19d8a 100644
--- a/tests/ignite/metrics/regression/test_wave_hedges_distance.py
+++ b/tests/ignite/metrics/regression/test_wave_hedges_distance.py
@@ -19,67 +19,67 @@ def test_wrong_input_shapes():
         m.update((torch.rand(4, 1), torch.rand(4)))
 
 
-def test_compute():
-    a = np.random.randn(4)
-    b = np.random.randn(4)
-    c = np.random.randn(4)
-    d = np.random.randn(4)
-    ground_truth = np.random.randn(4)
-
-    m = WaveHedgesDistance()
-
-    m.update((torch.from_numpy(a), torch.from_numpy(ground_truth)))
-    np_sum = (np.abs(ground_truth - a) / np.maximum.reduce([a, ground_truth])).sum()
-    assert m.compute() == pytest.approx(np_sum)
-
-    m.update((torch.from_numpy(b), torch.from_numpy(ground_truth)))
-    np_sum += (np.abs(ground_truth - b) / np.maximum.reduce([b, ground_truth])).sum()
-    assert m.compute() == pytest.approx(np_sum)
-
-    m.update((torch.from_numpy(c), torch.from_numpy(ground_truth)))
-    np_sum += (np.abs(ground_truth - c) / np.maximum.reduce([c, ground_truth])).sum()
-    assert m.compute() == pytest.approx(np_sum)
-
-    m.update((torch.from_numpy(d), torch.from_numpy(ground_truth)))
-    np_sum += (np.abs(ground_truth - d) / np.maximum.reduce([d, ground_truth])).sum()
-    assert m.compute() == pytest.approx(np_sum)
-
-
-def test_integration():
-    def _test(y_pred, y, batch_size):
-        def update_fn(engine, batch):
-            idx = (engine.state.iteration - 1) * batch_size
-            y_true_batch = np_y[idx : idx + batch_size]
-            y_pred_batch = np_y_pred[idx : idx + batch_size]
-            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)
-
-        engine = Engine(update_fn)
-
-        m = WaveHedgesDistance()
-        m.attach(engine, "whd")
-
-        np_y = y.numpy().ravel()
-        np_y_pred = y_pred.numpy().ravel()
-
-        data = list(range(y_pred.shape[0] // batch_size))
-        whd = engine.run(data, max_epochs=1).metrics["whd"]
-
-        np_sum = (np.abs(np_y - np_y_pred) / np.maximum.reduce([np_y_pred, np_y])).sum()
-
-        assert np_sum == pytest.approx(whd)
-
-    def get_test_cases():
-        test_cases = [
-            (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
-            (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
-        ]
-        return test_cases
-
-    for _ in range(5):
-        # check multiple random inputs as random exact occurencies are rare
-        test_cases = get_test_cases()
-        for y_pred, y, batch_size in test_cases:
-            _test(y_pred, y, batch_size)
+def test_compute(available_device):
+    a = torch.randn(4)
+    b = torch.randn(4)
+    c = torch.randn(4)
+    d = torch.randn(4)
+    ground_truth = torch.randn(4)
+
+    m = WaveHedgesDistance(device=available_device)
+    assert m._device == torch.device(available_device)
+
+    def compute_sum(x):
+        return torch.sum(torch.abs(ground_truth - x) / torch.maximum(ground_truth, x))
+
+    m.update((a, ground_truth))
+    torch_sum = compute_sum(a)
+    assert m.compute() == pytest.approx(torch_sum.item())
+
+    m.update((b, ground_truth))
+    torch_sum += compute_sum(b)
+    assert m.compute() == pytest.approx(torch_sum.item())
+
+    m.update((c, ground_truth))
+    torch_sum += compute_sum(c)
+    assert m.compute() == pytest.approx(torch_sum.item())
+
+    m.update((d, ground_truth))
+    torch_sum += compute_sum(d)
+    assert m.compute() == pytest.approx(torch_sum.item())
+
+
+@pytest.mark.parametrize("n_times", range(5))
+@pytest.mark.parametrize(
+    "y_pred, y, batch_size",
+    [
+        (torch.rand(size=(100,)), torch.rand(size=(100,)), 10),
+        (torch.rand(size=(100, 1)), torch.rand(size=(100, 1)), 20),
+    ],
+)
+def test_integration_wave_hedges_distance(n_times, y_pred, y, batch_size, available_device):
+    y_pred = y_pred.to(available_device)
+    y = y.to(available_device)
+
+    def update_fn(engine, batch):
+        idx = (engine.state.iteration - 1) * batch_size
+        return y_pred[idx : idx + batch_size], y[idx : idx + batch_size]
+
+    engine = Engine(update_fn)
+
+    m = WaveHedgesDistance(device=available_device)
+    assert m._device == torch.device(available_device)
+    m.attach(engine, "whd")
+
+    data = list(range(y_pred.shape[0] // batch_size))
+    whd = engine.run(data, max_epochs=1).metrics["whd"]
+
+    # Flatten for comparison if needed
+    flat_pred = y_pred.view(-1).cpu()
+    flat_true = y.view(-1).cpu()
+    expected = torch.sum(torch.abs(flat_true - flat_pred) / torch.maximum(flat_true, flat_pred))
+
+    assert whd == pytest.approx(expected.item())
 
 
 def _test_distrib_compute(device):

From 5d0f1c1537e415699472ba82b246f5dcecad183d Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Sun, 4 May 2025 17:08:15 +0000
Subject: [PATCH 31/32] revert "if torch.isnan(r)" check in
 pearson_correlation.py

---
 ignite/metrics/regression/pearson_correlation.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/ignite/metrics/regression/pearson_correlation.py b/ignite/metrics/regression/pearson_correlation.py
index 01ffbbbdf68c..2abe8a40e171 100644
--- a/ignite/metrics/regression/pearson_correlation.py
+++ b/ignite/metrics/regression/pearson_correlation.py
@@ -119,12 +119,5 @@ def compute(self) -> float:
         y_var = self._sum_of_y_squares / n - y_mean * y_mean
         y_var = torch.clamp(y_var, min=0.0)
 
-        denom = y_pred_var * y_var
-        denom = torch.clamp(denom, min=self.eps)
-        denom = torch.sqrt(denom)
-        r = cov / denom
-
-        if torch.isnan(r):
-            return 0.0
-
+        r = cov / torch.clamp(torch.sqrt(y_pred_var * y_var), min=self.eps)
         return float(r.item())

From 731c223b8fdcce642d0ca90a0d7a6742260374d9 Mon Sep 17 00:00:00 2001
From: Sergey Epifanov <banzaitokyo@gmail.com>
Date: Mon, 5 May 2025 15:24:48 +0200
Subject: [PATCH 32/32] Update
 tests/ignite/metrics/regression/test_median_absolute_percentage_error.py

Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 .../regression/test_median_absolute_percentage_error.py        | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
index 2c0340779187..307c713f3655 100644
--- a/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
+++ b/tests/ignite/metrics/regression/test_median_absolute_percentage_error.py
@@ -45,8 +45,7 @@ def test_median_absolute_percentage_error(available_device):
     y = torch.rand(size)
 
     epsilon = 1e-8
-    safe_y = torch.where(y == 0, torch.full_like(y, epsilon), y)
-    expected = torch.median(torch.abs((y - y_pred) / safe_y).cpu()).item() * 100.0
+    expected = torch.median(torch.abs((y - y_pred) / (y + epsilon)).cpu()).item() * 100.0
 
     m = MedianAbsolutePercentageError(device=available_device)
     assert m._device == torch.device(available_device)