From 301b8e50cfceb84dd99f675938e76f4fd98cd07d Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 1 Jun 2021 23:15:41 +0800
Subject: [PATCH 01/22] [DLMED] add metric base class

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/config/__init__.py         |  2 +-
 monai/config/type_definitions.py | 12 +++++++++--
 monai/metrics/metric.py          | 35 ++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 3 deletions(-)
 create mode 100644 monai/metrics/metric.py

diff --git a/monai/config/__init__.py b/monai/config/__init__.py
index f1c7707d1f..f874951b88 100644
--- a/monai/config/__init__.py
+++ b/monai/config/__init__.py
@@ -18,4 +18,4 @@
     print_gpu_info,
     print_system_info,
 )
-from .type_definitions import DtypeLike, IndexSelection, KeysCollection, NdarrayTensor
+from .type_definitions import DtypeLike, IndexSelection, KeysCollection, NdarrayTensor, TensorList
diff --git a/monai/config/type_definitions.py b/monai/config/type_definitions.py
index daa9b10052..52dc06e6a9 100644
--- a/monai/config/type_definitions.py
+++ b/monai/config/type_definitions.py
@@ -9,12 +9,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Collection, Hashable, Iterable, TypeVar, Union
+from typing import Collection, Hashable, Iterable, Sequence, TypeVar, Union
 
 import numpy as np
 import torch
 
-__all__ = ["KeysCollection", "IndexSelection", "DtypeLike", "NdarrayTensor"]
+__all__ = ["KeysCollection", "IndexSelection", "DtypeLike", "NdarrayTensor", "TensorList"]
 
 """Commonly used concepts
 This module provides naming and type specifications for commonly used concepts
@@ -55,6 +55,7 @@
 container must be iterable.
 """
 
+
 DtypeLike = Union[
     np.dtype,
     type,
@@ -67,3 +68,10 @@
 # Generic type which can represent either a numpy.ndarray or a torch.Tensor
 # Unlike Union can create a dependence between parameter(s) / return(s)
 NdarrayTensor = TypeVar("NdarrayTensor", np.ndarray, torch.Tensor)
+
+
+TensorList = Union[torch.Tensor, Sequence[torch.Tensor]]
+"""TensorList
+
+The TensorList type is used for defining `batch-first Tensor` or `list of channel-first Tensor`.
+"""
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
new file mode 100644
index 0000000000..d0b4ef53e6
--- /dev/null
+++ b/monai/metrics/metric.py
@@ -0,0 +1,35 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+import torch
+
+from monai.config import TensorList
+
+
+class Metric:
+    def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
+        if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
+            # if y_pred or y is a list of channel-first data, add batch dim and compute metric
+            ret = [self._apply(p_.unsqueeze(0), y_.unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
+        else:
+            ret = self._apply(y_pred, y)
+        return self._reduce(ret)
+    
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
+    def _reduce(self, data: Any):
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
+    def compute(self):
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")

From 4f63d2f3be240f381b4924330399f63ddfa2ed3b Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 12:13:42 +0800
Subject: [PATCH 02/22] [DLMED] update meandice and auc

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/__init__.py      |  1 +
 monai/metrics/meandice.py      | 14 +++++++++-----
 monai/metrics/metric.py        | 21 +++++++++++++--------
 monai/metrics/rocauc.py        | 25 +++++++++++++++++++++----
 tests/test_compute_meandice.py | 10 +++++++++-
 5 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/monai/metrics/__init__.py b/monai/metrics/__init__.py
index 3113090c62..8dbee1fa7e 100644
--- a/monai/metrics/__init__.py
+++ b/monai/metrics/__init__.py
@@ -13,6 +13,7 @@
 from .froc import compute_fp_tp_probs, compute_froc_curve_data, compute_froc_score
 from .hausdorff_distance import HausdorffDistanceMetric, compute_hausdorff_distance, compute_percent_hausdorff_distance
 from .meandice import DiceMetric, compute_meandice
+from .metric import Metric
 from .regression import MAEMetric, MSEMetric, PSNRMetric, RMSEMetric
 from .rocauc import compute_roc_auc
 from .surface_distance import SurfaceDistanceMetric, compute_average_surface_distance
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index 9d27fff56f..b57ec30de0 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -16,9 +16,10 @@
 
 from monai.metrics.utils import do_metric_reduction, ignore_background
 from monai.utils import MetricReduction
+from .metric import Metric
 
 
-class DiceMetric:
+class DiceMetric(Metric):
     """
     Compute average Dice loss between two tensors. It can support both multi-classes and multi-labels tasks.
     Input `y_pred` (BNHW[D] where N is number of classes) is compared with ground truth `y` (BNHW[D]).
@@ -42,12 +43,13 @@ def __init__(
         self,
         include_background: bool = True,
         reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
+        batch_reduce: bool = True,
     ) -> None:
-        super().__init__()
+        super().__init__(batch_reduce=batch_reduce)
         self.include_background = include_background
         self.reduction = reduction
 
-    def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -68,14 +70,16 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
         if dims < 3:
             raise ValueError("y_pred should have at least three dimensions.")
         # compute dice (BxC) for each channel for each batch
-        f = compute_meandice(
+        return compute_meandice(
             y_pred=y_pred,
             y=y,
             include_background=self.include_background,
         )
 
+    def reduce(self, data):
+        data = torch.cat(data, dim=0) if isinstance(data, list) else data
         # do metric reduction
-        f, not_nans = do_metric_reduction(f, self.reduction)
+        f, not_nans = do_metric_reduction(data, self.reduction)
         return f, not_nans
 
 
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index d0b4ef53e6..819e755e2f 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
+from typing import Any, Optional
 
 import torch
 
@@ -17,19 +17,24 @@
 
 
 class Metric:
+    def __init__(self, batch_reduce: bool = False) -> None:
+        self.batch_reduce = batch_reduce
+
     def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
         if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
-            ret = [self._apply(p_.unsqueeze(0), y_.unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
+            if y is not None:
+                ret = [self._apply(p_.unsqueeze(0), y_.unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
+            else:
+                ret = [self._apply(p_.unsqueeze(0), None) for p_ in y_pred]
         else:
             ret = self._apply(y_pred, y)
-        return self._reduce(ret)
-    
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
-        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+        if self.batch_reduce:
+            ret = self.reduce(ret)
+        return ret
 
-    def _reduce(self, data: Any):
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
-    def compute(self):
+    def reduce(self, data: Any):
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index a6d70b6dd8..ddab9a82a0 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -15,9 +15,26 @@
 import torch
 
 from monai.utils import Average
+from .metric import Metric
 
 
-def _calculate(y: torch.Tensor, y_pred: torch.Tensor) -> float:
+class ROCAUCMetric(Metric):
+    def __init__(self, average: Union[Average, str] = Average.MACRO, batch_reduce: bool = False) -> None:
+        super().__init__(batch_reduce=batch_reduce)
+        self.average = average
+
+    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
+        return y_pred, y
+
+    def reduce(self, data):
+        y_pred, y = data
+        y_pred = torch.cat(y_pred, dim=0) if isinstance(y_pred, list) else y_pred
+        y = torch.cat(y, dim=0) if isinstance(y, list) else y
+        # compute final value and do metric reduction
+        return compute_roc_auc(y_pred=y_pred, y=y, average=self.average)
+
+
+def _calculate(y_pred: torch.Tensor, y: torch.Tensor) -> float:
     if not (y.ndimension() == y_pred.ndimension() == 1 and len(y) == len(y_pred)):
         raise AssertionError("y and y_pred must be 1 dimension data with same length.")
     if not y.unique().equal(torch.tensor([0, 1], dtype=y.dtype, device=y.device)):
@@ -96,16 +113,16 @@ def compute_roc_auc(
         y = y.squeeze(dim=-1)
 
     if y_pred_ndim == 1:
-        return _calculate(y, y_pred)
+        return _calculate(y_pred, y)
 
     if y.shape != y_pred.shape:
         raise AssertionError("data shapes of y_pred and y do not match.")
 
     average = Average(average)
     if average == Average.MICRO:
-        return _calculate(y.flatten(), y_pred.flatten())
+        return _calculate(y_pred.flatten(), y.flatten())
     y, y_pred = y.transpose(0, 1), y_pred.transpose(0, 1)
-    auc_values = [_calculate(y_, y_pred_) for y_, y_pred_ in zip(y, y_pred)]
+    auc_values = [_calculate(y_pred_, y_) for y_pred_, y_ in zip(y_pred, y)]
     if average == Average.NONE:
         return auc_values
     if average == Average.MACRO:
diff --git a/tests/test_compute_meandice.py b/tests/test_compute_meandice.py
index 64f38dcdb8..3cc9faa44c 100644
--- a/tests/test_compute_meandice.py
+++ b/tests/test_compute_meandice.py
@@ -167,6 +167,14 @@
     [[1.0000, 1.0000], [1.0000, 1.0000]],
 ]
 
+TEST_CASE_10 = [
+    {
+        "y": [torch.ones((2, 3, 3)), torch.ones((2, 3, 3))],
+        "y_pred": [torch.ones((2, 3, 3)), torch.ones((2, 3, 3))],
+    },
+    [[1.0000, 1.0000], [1.0000, 1.0000]],
+]
+
 
 class TestComputeMeanDice(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_9])
@@ -180,7 +188,7 @@ def test_nans(self, input_data, expected_value):
         self.assertTrue(np.allclose(np.isnan(result.cpu().numpy()), expected_value))
 
     # DiceMetric class tests
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_10])
     def test_value_class(self, input_data, expected_value):
 
         # same test as for compute_meandice

From 9571f536fe58cf51c2e794b972ef0a119e7baa3a Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 13:00:43 +0800
Subject: [PATCH 03/22] [DLMED] extract reduce API

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/meandice.py      | 3 +--
 monai/metrics/metric.py        | 5 -----
 monai/metrics/rocauc.py        | 4 ++--
 tests/test_compute_meandice.py | 6 ++++--
 4 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index b57ec30de0..9ed1f688bd 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -43,9 +43,8 @@ def __init__(
         self,
         include_background: bool = True,
         reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
-        batch_reduce: bool = True,
     ) -> None:
-        super().__init__(batch_reduce=batch_reduce)
+        super().__init__()
         self.include_background = include_background
         self.reduction = reduction
 
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 819e755e2f..e8d84f5445 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -17,9 +17,6 @@
 
 
 class Metric:
-    def __init__(self, batch_reduce: bool = False) -> None:
-        self.batch_reduce = batch_reduce
-
     def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
         if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
@@ -29,8 +26,6 @@ def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
                 ret = [self._apply(p_.unsqueeze(0), None) for p_ in y_pred]
         else:
             ret = self._apply(y_pred, y)
-        if self.batch_reduce:
-            ret = self.reduce(ret)
         return ret
 
     def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index ddab9a82a0..1e096419e3 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -19,8 +19,8 @@
 
 
 class ROCAUCMetric(Metric):
-    def __init__(self, average: Union[Average, str] = Average.MACRO, batch_reduce: bool = False) -> None:
-        super().__init__(batch_reduce=batch_reduce)
+    def __init__(self, average: Union[Average, str] = Average.MACRO) -> None:
+        super().__init__()
         self.average = average
 
     def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
diff --git a/tests/test_compute_meandice.py b/tests/test_compute_meandice.py
index 3cc9faa44c..a32d7ef894 100644
--- a/tests/test_compute_meandice.py
+++ b/tests/test_compute_meandice.py
@@ -196,14 +196,16 @@ def test_value_class(self, input_data, expected_value):
         vals["y_pred"] = input_data.pop("y_pred")
         vals["y"] = input_data.pop("y")
         dice_metric = DiceMetric(**input_data, reduction="none")
-        result, _ = dice_metric(**vals)
+        result = dice_metric(**vals)
+        result, _ = dice_metric.reduce(result)
         np.testing.assert_allclose(result.cpu().numpy(), expected_value, atol=1e-4)
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8])
     def test_nans_class(self, params, input_data, expected_value):
 
         dice_metric = DiceMetric(**params)
-        result, _ = dice_metric(**input_data)
+        result = dice_metric(**input_data)
+        result, _ = dice_metric.reduce(result)
         np.testing.assert_allclose(result.cpu().numpy(), expected_value, atol=1e-4)
 
 

From c31d1ae7f0d6aad9d2257d37efe06756f9722e19 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 13:17:01 +0800
Subject: [PATCH 04/22] [DLMED] update regression metrics

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/meandice.py                |  3 ++-
 monai/metrics/metric.py                  |  5 ++++-
 monai/metrics/regression.py              | 18 ++++++++---------
 tests/test_compute_regression_metrics.py | 25 +++++++++++++++---------
 4 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index 9ed1f688bd..0b6f67f3dd 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -14,6 +14,7 @@
 
 import torch
 
+from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, ignore_background
 from monai.utils import MetricReduction
 from .metric import Metric
@@ -75,7 +76,7 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             include_background=self.include_background,
         )
 
-    def reduce(self, data):
+    def reduce(self, data: TensorList):
         data = torch.cat(data, dim=0) if isinstance(data, list) else data
         # do metric reduction
         f, not_nans = do_metric_reduction(data, self.reduction)
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index e8d84f5445..38dfd63de5 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -10,13 +10,14 @@
 # limitations under the License.
 
 from typing import Any, Optional
+from abc import ABC, abstractmethod
 
 import torch
 
 from monai.config import TensorList
 
 
-class Metric:
+class Metric(ABC):
     def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
         if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
@@ -28,8 +29,10 @@ def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
             ret = self._apply(y_pred, y)
         return ret
 
+    @abstractmethod
     def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
+    @abstractmethod
     def reduce(self, data: Any):
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index 78c256f9e8..cd6564a960 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -10,23 +10,26 @@
 # limitations under the License.
 
 import math
-from abc import ABC, abstractmethod
+from abc import abstractmethod
 from functools import partial
 from typing import Any, Union
 
 import torch
 
+from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction
 from monai.utils import MetricReduction
+from .metric import Metric
 
 
-class RegressionMetric(ABC):
+class RegressionMetric(Metric):
     def __init__(self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN) -> None:
         super().__init__()
         self.reduction = reduction
 
-    def _reduce(self, f: torch.Tensor):
-        return do_metric_reduction(f, self.reduction)
+    def reduce(self, data: TensorList):
+        data = torch.cat(data, dim=0) if isinstance(data, list) else data
+        return do_metric_reduction(data, self.reduction)
 
     def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
         if y_pred.shape != y.shape:
@@ -42,12 +45,9 @@ def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
     def _compute_metric(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
-    def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         self._check_shape(y_pred, y)
-        out = self._compute_metric(y_pred, y)
-        y, not_nans = self._reduce(out)
-        return y, not_nans
-
+        return self._compute_metric(y_pred, y)
 
 class MSEMetric(RegressionMetric):
     r"""Compute Mean Squared Error between two tensors using function:
diff --git a/tests/test_compute_regression_metrics.py b/tests/test_compute_regression_metrics.py
index 20d37a1d70..b10633ac17 100644
--- a/tests/test_compute_regression_metrics.py
+++ b/tests/test_compute_regression_metrics.py
@@ -65,16 +65,20 @@ def test_shape_reduction(self):
 
                     # iterate over regression metrics, check shape for diff. reduction func
                     for mt_fn in metrics:
-                        out_tensor, _ = mt_fn(reduction="mean")(in_tensor, in_tensor)
+                        mt = mt_fn(reduction="mean")
+                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 1)
 
-                        out_tensor, _ = mt_fn(reduction="sum")(in_tensor, in_tensor)
+                        mt = mt_fn(reduction="sum")
+                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 0)
 
-                        out_tensor, _ = mt_fn(reduction="mean_channel")(in_tensor, in_tensor)
+                        mt = mt_fn(reduction="mean_channel")
+                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 1 and out_tensor.shape[0] == batch)
 
-                        out_tensor, _ = mt_fn(reduction="sum_channel")(in_tensor, in_tensor)
+                        mt = mt_fn(reduction="sum_channel")
+                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 1 and out_tensor.shape[0] == batch)
 
     def test_compare_numpy(self):
@@ -101,7 +105,8 @@ def test_compare_numpy(self):
 
                     # check metrics
                     for mt_fn, mt_fn_np in zip(metrics, metrics_np):
-                        out_tensor, _ = mt_fn(reduction="mean")(y_pred=in_tensor_a, y=in_tensor_b)
+                        mt = mt_fn(reduction="mean")
+                        out_tensor, _ = mt.reduce(mt(y_pred=in_tensor_a, y=in_tensor_b))
                         out_np = mt_fn_np(y_pred=in_tensor_a.cpu().numpy(), y=in_tensor_b.cpu().numpy())
 
                         np.testing.assert_allclose(out_tensor.cpu().numpy(), out_np, atol=1e-4)
@@ -118,14 +123,14 @@ def test_ill_shape(self):
         with self.assertRaises(ValueError):
             in_tensor = torch.rand((basedim,)).to(device)
             for mt_fn in metrics:
-                out_tensor, _ = mt_fn()(in_tensor, in_tensor)
+                out_tensor = mt_fn()(in_tensor, in_tensor)
 
         # different shape for pred/target
         with self.assertRaises(ValueError):
             in_tensor_a = torch.rand((basedim,)).to(device)
             in_tensor_b = torch.rand((basedim, basedim)).to(device)
             for mt_fn in metrics:
-                out_tensor, _ = mt_fn()(y_pred=in_tensor_a, y=in_tensor_b)
+                out_tensor = mt_fn()(y_pred=in_tensor_a, y=in_tensor_b)
 
     def test_same_input(self):
         set_determinism(seed=123)
@@ -148,7 +153,8 @@ def test_same_input(self):
 
                     # check metrics
                     for mt_fn, rs in zip(metrics, results):
-                        out_tensor, _ = mt_fn(reduction="mean")(in_tensor, in_tensor)
+                        mt = mt_fn(reduction="mean")
+                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
                         np.testing.assert_allclose(out_tensor.cpu(), rs, atol=1e-4)
 
     def test_diff_input(self):
@@ -173,7 +179,8 @@ def test_diff_input(self):
 
                     # check metrics
                     for mt_fn, rs in zip(metrics, results):
-                        out_tensor, _ = mt_fn(reduction="mean")(in_tensor_a, in_tensor_b)
+                        mt = mt_fn(reduction="mean")
+                        out_tensor, _ = mt.reduce(mt(in_tensor_a, in_tensor_b))
                         np.testing.assert_allclose(out_tensor.cpu(), rs, atol=1e-4)
 
 

From 587195773d9ab5f0c79c53d4d9c785e45b12b800 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 17:02:22 +0800
Subject: [PATCH 05/22] [DLMED] update all the other metrics and enhance unit
 tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/__init__.py              |  2 +-
 monai/metrics/confusion_matrix.py      | 44 ++++++++++++--------------
 monai/metrics/hausdorff_distance.py    | 12 ++++---
 monai/metrics/rocauc.py                |  5 +--
 monai/metrics/surface_distance.py      | 12 ++++---
 tests/test_compute_confusion_matrix.py | 14 +++++---
 tests/test_compute_roc_auc.py          | 10 +++++-
 tests/test_hausdorff_distance.py       |  4 +--
 tests/test_surface_distance.py         |  9 +++---
 9 files changed, 67 insertions(+), 45 deletions(-)

diff --git a/monai/metrics/__init__.py b/monai/metrics/__init__.py
index 8dbee1fa7e..51d0d03b0c 100644
--- a/monai/metrics/__init__.py
+++ b/monai/metrics/__init__.py
@@ -15,6 +15,6 @@
 from .meandice import DiceMetric, compute_meandice
 from .metric import Metric
 from .regression import MAEMetric, MSEMetric, PSNRMetric, RMSEMetric
-from .rocauc import compute_roc_auc
+from .rocauc import ROCAUCMetric, compute_roc_auc
 from .surface_distance import SurfaceDistanceMetric, compute_average_surface_distance
 from .utils import do_metric_reduction, get_mask_edges, get_surface_distance, ignore_background
diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py
index 9c15b320eb..d7769cc739 100644
--- a/monai/metrics/confusion_matrix.py
+++ b/monai/metrics/confusion_matrix.py
@@ -14,11 +14,13 @@
 
 import torch
 
+from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, ignore_background
-from monai.utils import MetricReduction
+from monai.utils import ensure_tuple, MetricReduction
+from .metric import Metric
 
 
-class ConfusionMatrixMetric:
+class ConfusionMatrixMetric(Metric):
     """
     Compute confusion matrix related metrics. This function supports to calculate all metrics mentioned in:
     `Confusion matrix <https://en.wikipedia.org/wiki/Confusion_matrix>`_.
@@ -43,10 +45,8 @@ class ConfusionMatrixMetric:
             Except for input only one metric, multiple metrics are also supported via input a sequence of metric names, such as
             ("sensitivity", "precision", "recall"), if ``compute_sample`` is ``True``, multiple ``f`` and ``not_nans`` will be
             returned with the same order as input names when calling the class.
-        compute_sample: if ``True``, each sample's metric will be computed first. If ``False``, the confusion matrix for each image
-            (the output of function ``get_confusion_matrix``) will be returned. In this way, users should achieve the confusion
-            matrixes for all images during an epoch and then use ``compute_confusion_matrix_metric`` to calculate the metric.
-            Defaults to ``False``.
+        compute_sample: when reducing, if ``True``, each sample's metric will be computed first. If ``False``,
+            compute reduction first, defaults to ``False``.
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
             ``"mean_channel"``, ``"sum_channel"``}
             Define the mode to reduce computation result of 1 batch data. Reduction will only be employed when
@@ -63,11 +63,11 @@ def __init__(
     ) -> None:
         super().__init__()
         self.include_background = include_background
-        self.metric_name = metric_name
+        self.metric_name = ensure_tuple(metric_name)
         self.compute_sample = compute_sample
         self.reduction = reduction
 
-    def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         """
         Args:
             y_pred: input data to compute. It must be one-hot format and first dim is batch.
@@ -92,27 +92,25 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
                 warnings.warn("As for classification task, compute_sample should be False.")
                 self.compute_sample = False
 
-        confusion_matrix = get_confusion_matrix(
+        return get_confusion_matrix(
             y_pred=y_pred,
             y=y,
             include_background=self.include_background,
         )
 
-        if self.compute_sample:
-            if isinstance(self.metric_name, str):
-                confusion_matrix = compute_confusion_matrix_metric(self.metric_name, confusion_matrix)
-                f, not_nans = do_metric_reduction(confusion_matrix, self.reduction)
-                return f, not_nans
-            if len(self.metric_name) < 1:
-                raise ValueError("the sequence should at least has on metric name.")
-            results = []
-            for metric_name in self.metric_name:
-                sub_confusion_matrix = compute_confusion_matrix_metric(metric_name, confusion_matrix)
+    def reduce(self, data: TensorList):
+        data = torch.cat(data, dim=0) if isinstance(data, list) else data
+        results = []
+        for metric_name in self.metric_name:
+            if self.compute_sample:           
+                sub_confusion_matrix = compute_confusion_matrix_metric(metric_name, data)
                 f, not_nans = do_metric_reduction(sub_confusion_matrix, self.reduction)
-                results.append(f)
-                results.append(not_nans)
-            return results
-        return confusion_matrix
+            else:
+                f, not_nans = do_metric_reduction(data, self.reduction)
+                f = compute_confusion_matrix_metric(metric_name, f)
+            results.append(f)
+            results.append(not_nans)
+        return results
 
 
 def get_confusion_matrix(
diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py
index 9617c0365a..c80aa8db19 100644
--- a/monai/metrics/hausdorff_distance.py
+++ b/monai/metrics/hausdorff_distance.py
@@ -15,13 +15,15 @@
 import numpy as np
 import torch
 
+from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, get_mask_edges, get_surface_distance, ignore_background
 from monai.utils import MetricReduction
+from .metric import Metric
 
 __all__ = ["HausdorffDistanceMetric", "compute_hausdorff_distance", "compute_percent_hausdorff_distance"]
 
 
-class HausdorffDistanceMetric:
+class HausdorffDistanceMetric(Metric):
     """
     Compute Hausdorff Distance between two tensors. It can support both multi-classes and multi-labels tasks.
     It supports both directed and non-directed Hausdorff distance calculation. In addition, specify the `percentile`
@@ -62,7 +64,7 @@ def __init__(
         self.directed = directed
         self.reduction = reduction
 
-    def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -83,7 +85,7 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
         if dims < 3:
             raise ValueError("y_pred should have at least three dimensions.")
         # compute (BxC) for each channel for each batch
-        f = compute_hausdorff_distance(
+        return compute_hausdorff_distance(
             y_pred=y_pred,
             y=y,
             include_background=self.include_background,
@@ -92,8 +94,10 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
             directed=self.directed,
         )
 
+    def reduce(self, data: TensorList):
+        data = torch.cat(data, dim=0) if isinstance(data, list) else data
         # do metric reduction
-        f, not_nans = do_metric_reduction(f, self.reduction)
+        f, not_nans = do_metric_reduction(data, self.reduction)
         return f, not_nans
 
 
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index 1e096419e3..76fffbd9dd 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -9,11 +9,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Union, cast
+from typing import Tuple, Union, cast
 
 import numpy as np
 import torch
 
+from monai.config import TensorList
 from monai.utils import Average
 from .metric import Metric
 
@@ -26,7 +27,7 @@ def __init__(self, average: Union[Average, str] = Average.MACRO) -> None:
     def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         return y_pred, y
 
-    def reduce(self, data):
+    def reduce(self, data: Tuple[TensorList, TensorList]):
         y_pred, y = data
         y_pred = torch.cat(y_pred, dim=0) if isinstance(y_pred, list) else y_pred
         y = torch.cat(y, dim=0) if isinstance(y, list) else y
diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py
index d4b2a84572..d9d37681b5 100644
--- a/monai/metrics/surface_distance.py
+++ b/monai/metrics/surface_distance.py
@@ -15,11 +15,13 @@
 import numpy as np
 import torch
 
+from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, get_mask_edges, get_surface_distance, ignore_background
 from monai.utils import MetricReduction
+from .metric import Metric
 
 
-class SurfaceDistanceMetric:
+class SurfaceDistanceMetric(Metric):
     """
     Compute Surface Distance between two tensors. It can support both multi-classes and multi-labels tasks.
     It supports both symmetric and asymmetric surface distance calculation.
@@ -53,7 +55,7 @@ def __init__(
         self.symmetric = symmetric
         self.reduction = reduction
 
-    def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -74,7 +76,7 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
         if dims < 3:
             raise ValueError("y_pred should have at least three dimensions.")
         # compute (BxC) for each channel for each batch
-        f = compute_average_surface_distance(
+        return compute_average_surface_distance(
             y_pred=y_pred,
             y=y,
             include_background=self.include_background,
@@ -82,8 +84,10 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor):
             distance_metric=self.distance_metric,
         )
 
+    def reduce(self, data: TensorList):
+        data = torch.cat(data, dim=0) if isinstance(data, list) else data
         # do metric reduction
-        f, not_nans = do_metric_reduction(f, self.reduction)
+        f, not_nans = do_metric_reduction(data, self.reduction)
         return f, not_nans
 
 
diff --git a/tests/test_compute_confusion_matrix.py b/tests/test_compute_confusion_matrix.py
index 56ca5371ab..174c48baea 100644
--- a/tests/test_compute_confusion_matrix.py
+++ b/tests/test_compute_confusion_matrix.py
@@ -16,7 +16,7 @@
 import torch
 from parameterized import parameterized
 
-from monai.metrics import ConfusionMatrixMetric, get_confusion_matrix
+from monai.metrics import ConfusionMatrixMetric, get_confusion_matrix, do_metric_reduction, compute_confusion_matrix_metric
 
 # input data
 data: Dict[Any, Any] = {
@@ -59,6 +59,8 @@
     "y": torch.tensor([[1, 0, 0], [0, 1, 0]]),
     "compute_sample": False,
     "include_background": True,
+    "metric_name": "tpr",
+    "reduction": "mean_channel",
 }
 
 # 1. test confusion matrix
@@ -224,7 +226,7 @@ def test_compute_sample(self, input_data, expected_value):
         vals["y_pred"] = params.pop("y_pred")
         vals["y"] = params.pop("y")
         metric = ConfusionMatrixMetric(**params)
-        result, _ = metric(**vals)
+        result, _ = metric.reduce(metric(**vals))
         np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
 
     @parameterized.expand(TEST_CASES_COMPUTE_SAMPLE_MULTI_METRICS)
@@ -234,7 +236,7 @@ def test_compute_sample_multiple_metrics(self, input_data, expected_values):
         vals["y_pred"] = params.pop("y_pred")
         vals["y"] = params.pop("y")
         metric = ConfusionMatrixMetric(**params)
-        results = metric(**vals)
+        results = metric.reduce(metric(**vals))
         for idx in range(0, len(results), 2):
             result = results[idx]
             expected_value = expected_values[int(idx / 2)]
@@ -247,7 +249,7 @@ def test_compute_sample_with_nan(self, input_data, expected_value, expected_not_
         vals["y_pred"] = params.pop("y_pred")
         vals["y"] = params.pop("y")
         metric = ConfusionMatrixMetric(**params)
-        result, not_nans = metric(**vals)
+        result, not_nans = metric.reduce(metric(**vals))
         np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
         np.testing.assert_allclose(not_nans, expected_not_nans, atol=1e-4, rtol=1e-4)
 
@@ -260,6 +262,10 @@ def test_clf_with_nan(self, input_data, expected_value):
         metric = ConfusionMatrixMetric(**params)
         result = metric(**vals)
         np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
+        result, _ = metric.reduce(result)
+        expected_value, _ = do_metric_reduction(expected_value, "mean_channel")
+        expected_value = compute_confusion_matrix_metric("tpr", expected_value)
+        np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_compute_roc_auc.py b/tests/test_compute_roc_auc.py
index 10141ce0a7..3a91648e2a 100644
--- a/tests/test_compute_roc_auc.py
+++ b/tests/test_compute_roc_auc.py
@@ -15,7 +15,7 @@
 import torch
 from parameterized import parameterized
 
-from monai.metrics import compute_roc_auc
+from monai.metrics import ROCAUCMetric, compute_roc_auc
 from monai.transforms import Activations, AsDiscrete
 
 TEST_CASE_1 = [
@@ -90,6 +90,14 @@ def test_value(self, y_pred, y, softmax, to_onehot, average, expected_value):
         result = compute_roc_auc(y_pred=y_pred, y=y, average=average)
         np.testing.assert_allclose(expected_value, result, rtol=1e-5)
 
+    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7])
+    def test_class_value(self, y_pred, y, softmax, to_onehot, average, expected_value):
+        y_pred = Activations(softmax=softmax)(y_pred)
+        y = AsDiscrete(to_onehot=to_onehot, n_classes=2)(y)
+        metric = ROCAUCMetric(average=average)
+        result = metric.reduce(metric(y_pred=y_pred, y=y))
+        np.testing.assert_allclose(expected_value, result, rtol=1e-5)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_hausdorff_distance.py b/tests/test_hausdorff_distance.py
index 465900c12a..a19e928e7d 100644
--- a/tests/test_hausdorff_distance.py
+++ b/tests/test_hausdorff_distance.py
@@ -131,7 +131,7 @@ def test_value(self, input_data, expected_value):
                 batch, n_class = 2, 3
                 batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
                 batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
-                result, _ = hd_metric(batch_seg_1, batch_seg_2)
+                result, _ = hd_metric.reduce(hd_metric(batch_seg_1, batch_seg_2))
                 expected_value_curr = expected_value[ct]
                 np.testing.assert_allclose(expected_value_curr, result, rtol=1e-7)
                 ct += 1
@@ -144,7 +144,7 @@ def test_nans(self, input_data):
         hd_metric = HausdorffDistanceMetric(include_background=False)
         batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0)
         batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0)
-        result, not_nans = hd_metric(batch_seg_1, batch_seg_2)
+        result, not_nans = hd_metric.reduce(hd_metric(batch_seg_1, batch_seg_2))
         np.testing.assert_allclose(0, result, rtol=1e-7)
         np.testing.assert_allclose(0, not_nans, rtol=1e-7)
 
diff --git a/tests/test_surface_distance.py b/tests/test_surface_distance.py
index db90c87938..53c56531a8 100644
--- a/tests/test_surface_distance.py
+++ b/tests/test_surface_distance.py
@@ -136,7 +136,7 @@ def test_value(self, input_data, expected_value):
             batch, n_class = 2, 3
             batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
             batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
-            result, _ = sur_metric(batch_seg_1, batch_seg_2)
+            result, _ = sur_metric.reduce(sur_metric(batch_seg_1, batch_seg_2))
             expected_value_curr = expected_value[ct]
             np.testing.assert_allclose(expected_value_curr, result, rtol=1e-7)
             ct += 1
@@ -147,9 +147,10 @@ def test_nans(self, input_data):
         seg_1 = torch.tensor(seg_1)
         seg_2 = torch.tensor(seg_2)
         sur_metric = SurfaceDistanceMetric(include_background=False)
-        batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0)
-        batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0)
-        result, not_nans = sur_metric(batch_seg_1, batch_seg_2)
+        # test list of channel-first Tensor
+        batch_seg_1 = [seg_1.unsqueeze(0)]
+        batch_seg_2 = [seg_2.unsqueeze(0)]
+        result, not_nans = sur_metric.reduce(sur_metric(batch_seg_1, batch_seg_2))
         np.testing.assert_allclose(0, result, rtol=1e-7)
         np.testing.assert_allclose(0, not_nans, rtol=1e-7)
 

From 566a3cb044f6cfb2e956b62348b84b1a5232f0e8 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 19:30:31 +0800
Subject: [PATCH 06/22] [DLMED] add doc-strings and update unit tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/confusion_matrix.py       | 16 ++++-----
 monai/handlers/hausdorff_distance.py     |  6 +++-
 monai/handlers/iteration_metric.py       | 23 +++----------
 monai/handlers/mean_dice.py              |  9 ++---
 monai/handlers/regression_metrics.py     | 33 +++++++++++-------
 monai/handlers/roc_auc.py                | 19 +++++-----
 monai/handlers/surface_distance.py       |  6 +++-
 monai/metrics/confusion_matrix.py        | 13 ++++---
 monai/metrics/hausdorff_distance.py      | 12 ++++---
 monai/metrics/meandice.py                | 10 ++++--
 monai/metrics/metric.py                  | 42 ++++++++++++++++++++--
 monai/metrics/regression.py              | 15 ++++++--
 monai/metrics/rocauc.py                  | 44 +++++++++++++++++++++---
 monai/metrics/surface_distance.py        | 10 +++---
 tests/test_handler_confusion_matrix.py   |  6 ++--
 tests/test_handler_hausdorff_distance.py |  3 +-
 tests/test_handler_mean_dice.py          |  4 +--
 tests/test_handler_rocauc.py             |  2 ++
 tests/test_handler_surface_distance.py   |  3 +-
 19 files changed, 187 insertions(+), 89 deletions(-)

diff --git a/monai/handlers/confusion_matrix.py b/monai/handlers/confusion_matrix.py
index 551fd29199..f33151f832 100644
--- a/monai/handlers/confusion_matrix.py
+++ b/monai/handlers/confusion_matrix.py
@@ -9,13 +9,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Callable, Union
+from typing import Callable, Union
 
 import torch
 
 from monai.handlers.iteration_metric import IterationMetric
-from monai.metrics import ConfusionMatrixMetric, compute_confusion_matrix_metric
-from monai.metrics.utils import MetricReduction, do_metric_reduction
+from monai.metrics import ConfusionMatrixMetric
+from monai.metrics.utils import MetricReduction
 
 
 class ConfusionMatrix(IterationMetric):
@@ -30,6 +30,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -47,6 +48,9 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: TP/TN/FP/FN of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         See also:
             :py:meth:`monai.metrics.confusion_matrix`
@@ -55,7 +59,7 @@ def __init__(
             include_background=include_background,
             metric_name=metric_name,
             compute_sample=False,
-            reduction=MetricReduction.NONE,
+            reduction=reduction,
         )
         self.metric_name = metric_name
         super().__init__(
@@ -64,7 +68,3 @@ def __init__(
             device=device,
             save_details=save_details,
         )
-
-    def _reduce(self, scores) -> Any:
-        confusion_matrix, _ = do_metric_reduction(scores, MetricReduction.MEAN)
-        return compute_confusion_matrix_metric(self.metric_name, confusion_matrix)
diff --git a/monai/handlers/hausdorff_distance.py b/monai/handlers/hausdorff_distance.py
index 042a587852..c806081ab5 100644
--- a/monai/handlers/hausdorff_distance.py
+++ b/monai/handlers/hausdorff_distance.py
@@ -32,6 +32,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -48,6 +49,9 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: hausdorff distance
                 of every image. default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         """
         super().__init__(output_transform, device=device)
@@ -56,7 +60,7 @@ def __init__(
             distance_metric=distance_metric,
             percentile=percentile,
             directed=directed,
-            reduction=MetricReduction.NONE,
+            reduction=reduction,
         )
         super().__init__(
             metric_fn=metric_fn,
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 434dd483ed..bb8396022c 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -14,8 +14,7 @@
 import torch
 
 from monai.handlers.utils import evenly_divisible_all_gather
-from monai.metrics import do_metric_reduction
-from monai.utils import MetricReduction, exact_version, optional_import
+from monai.utils import exact_version, optional_import
 
 idist, _ = optional_import("ignite", "0.4.4", exact_version, "distributed")
 Metric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
@@ -78,19 +77,7 @@ def update(self, output: Sequence[torch.Tensor]) -> None:
 
         y_pred, y = output
 
-        def _compute(y_pred, y):
-            if isinstance(y_pred, torch.Tensor):
-                y_pred = y_pred.detach()
-            if isinstance(y, torch.Tensor):
-                y = y.detach()
-            score = self.metric_fn(y_pred, y)
-            return score[0] if isinstance(score, (tuple, list)) else score
-
-        if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
-            # if y_pred or y is a list of channel-first data, add batch dim and compute metric, then concat the scores
-            score = torch.cat([_compute(p_.unsqueeze(0), y_.unsqueeze(0)) for p_, y_ in zip(y_pred, y)], dim=0)
-        else:
-            score = _compute(y_pred, y)
+        score = self.metric_fn(y_pred, y)
         self._scores.append(score.to(self._device))
 
     def compute(self) -> Any:
@@ -116,7 +103,8 @@ def compute(self) -> Any:
         result: torch.Tensor = torch.zeros(1)
         if idist.get_rank() == 0:
             # run compute_fn on zero rank only
-            result = self._reduce(_scores)
+            result = self.metric_fn.reduce(_scores)
+            result = result[0] if isinstance(result, (list, tuple)) else result
 
         if ws > 1:
             # broadcast result to all processes
@@ -124,9 +112,6 @@ def compute(self) -> Any:
 
         return result.item() if isinstance(result, torch.Tensor) else result
 
-    def _reduce(self, scores) -> Any:
-        return do_metric_reduction(scores, MetricReduction.MEAN)[0]
-
     def attach(self, engine: Engine, name: str) -> None:
         """
         Attaches current metric to provided engine. On the end of engine's run,
diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py
index 6d51c534cf..f11f0b729f 100644
--- a/monai/handlers/mean_dice.py
+++ b/monai/handlers/mean_dice.py
@@ -29,6 +29,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -39,14 +40,14 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: mean dice of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         See also:
             :py:meth:`monai.metrics.meandice.compute_meandice`
         """
-        metric_fn = DiceMetric(
-            include_background=include_background,
-            reduction=MetricReduction.NONE,
-        )
+        metric_fn = DiceMetric(include_background=include_background, reduction=reduction)
         super().__init__(
             metric_fn=metric_fn,
             output_transform=output_transform,
diff --git a/monai/handlers/regression_metrics.py b/monai/handlers/regression_metrics.py
index 2320203ff6..3129f2eb59 100644
--- a/monai/handlers/regression_metrics.py
+++ b/monai/handlers/regression_metrics.py
@@ -28,6 +28,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -36,13 +37,14 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: mean squared error of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         See also:
             :py:class:`monai.metrics.MSEMetric`
         """
-        metric_fn = MSEMetric(
-            reduction=MetricReduction.NONE,
-        )
+        metric_fn = MSEMetric(reduction=reduction)
         super().__init__(
             metric_fn=metric_fn,
             output_transform=output_transform,
@@ -61,6 +63,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -69,13 +72,14 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: mean absolute error of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         See also:
             :py:class:`monai.metrics.MAEMetric`
         """
-        metric_fn = MAEMetric(
-            reduction=MetricReduction.NONE,
-        )
+        metric_fn = MAEMetric(reduction=reduction)
         super().__init__(
             metric_fn=metric_fn,
             output_transform=output_transform,
@@ -94,6 +98,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -102,13 +107,14 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: root mean squared error of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         See also:
             :py:class:`monai.metrics.RMSEMetric`
         """
-        metric_fn = RMSEMetric(
-            reduction=MetricReduction.NONE,
-        )
+        metric_fn = RMSEMetric(reduction=reduction)
         super().__init__(
             metric_fn=metric_fn,
             output_transform=output_transform,
@@ -128,6 +134,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -138,14 +145,14 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: PSNR of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         See also:
             :py:class:`monai.metrics.PSNRMetric`
         """
-        metric_fn = PSNRMetric(
-            max_val=max_val,
-            reduction=MetricReduction.NONE,
-        )
+        metric_fn = PSNRMetric(max_val=max_val, reduction=reduction)
         super().__init__(
             metric_fn=metric_fn,
             output_transform=output_transform,
diff --git a/monai/handlers/roc_auc.py b/monai/handlers/roc_auc.py
index 8011dab8db..86eb2fe213 100644
--- a/monai/handlers/roc_auc.py
+++ b/monai/handlers/roc_auc.py
@@ -9,16 +9,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Callable, Union
+from typing import Any, Callable, Tuple, Union
 
 import torch
 
 from monai.handlers.utils import evenly_divisible_all_gather
-from monai.metrics import compute_roc_auc
+from monai.metrics import ROCAUCMetric
 from monai.utils import Average, exact_version, optional_import
 
 idist, _ = optional_import("ignite", "0.4.4", exact_version, "distributed")
 EpochMetric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "EpochMetric")
+reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.4", exact_version, "reinit__is_reduced")
 
 
 class ROCAUC(EpochMetric):  # type: ignore[valid-type, misc]  # due to optional_import
@@ -56,21 +57,19 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
     ) -> None:
-        def _compute_fn(pred, label):
-            return compute_roc_auc(
-                y_pred=pred,
-                y=label,
-                average=Average(average),
-            )
-
+        self.metric = ROCAUCMetric(average=Average(average))
         self._is_reduced: bool = False
         super().__init__(
-            compute_fn=_compute_fn,
+            compute_fn=lambda p, y: self.metric.reduce(data=(p, y)),
             output_transform=output_transform,
             check_compute_fn=False,
             device=device,
         )
 
+    @reinit__is_reduced
+    def update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
+        super().update(output=self.metric(output[0], output[1]))
+
     def compute(self) -> Any:
         _prediction_tensor = torch.cat(self._predictions, dim=0)
         _target_tensor = torch.cat(self._targets, dim=0)
diff --git a/monai/handlers/surface_distance.py b/monai/handlers/surface_distance.py
index 7c2322354a..730507adc8 100644
--- a/monai/handlers/surface_distance.py
+++ b/monai/handlers/surface_distance.py
@@ -31,6 +31,7 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
+        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
     ) -> None:
         """
 
@@ -45,13 +46,16 @@ def __init__(
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: surface dice
                 of every image. default to True, will save to `engine.state.metric_details` dict with the metric name as key.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+                ``"mean_channel"``, ``"sum_channel"``}
+                Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         """
         metric_fn = SurfaceDistanceMetric(
             include_background=include_background,
             symmetric=symmetric,
             distance_metric=distance_metric,
-            reduction=MetricReduction.NONE,
+            reduction=reduction,
         )
         super().__init__(
             metric_fn=metric_fn,
diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py
index d7769cc739..f8f11fbd49 100644
--- a/monai/metrics/confusion_matrix.py
+++ b/monai/metrics/confusion_matrix.py
@@ -14,7 +14,6 @@
 
 import torch
 
-from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, ignore_background
 from monai.utils import ensure_tuple, MetricReduction
 from .metric import Metric
@@ -49,8 +48,6 @@ class ConfusionMatrixMetric(Metric):
             compute reduction first, defaults to ``False``.
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
             ``"mean_channel"``, ``"sum_channel"``}
-            Define the mode to reduce computation result of 1 batch data. Reduction will only be employed when
-            ``compute_sample`` is ``True``. Defaults to ``"mean"``.
 
     """
 
@@ -98,8 +95,14 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             include_background=self.include_background,
         )
 
-    def reduce(self, data: TensorList):
-        data = torch.cat(data, dim=0) if isinstance(data, list) else data
+    def reduce(self, data: torch.Tensor):
+        """
+        Execute reduction for the confusion matrix values, the `data` usually is a Tensor of shape [BC4],
+        Where, the third dimension represents the number of true positive, false positive, true negative
+        and false negative values for each channel of each sample within the input batch. Where, B equals
+        to the batch size and C equals to the number of classes that need to be computed.
+
+        """
         results = []
         for metric_name in self.metric_name:
             if self.compute_sample:           
diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py
index c80aa8db19..46c67a6fda 100644
--- a/monai/metrics/hausdorff_distance.py
+++ b/monai/metrics/hausdorff_distance.py
@@ -15,7 +15,6 @@
 import numpy as np
 import torch
 
-from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, get_mask_edges, get_surface_distance, ignore_background
 from monai.utils import MetricReduction
 from .metric import Metric
@@ -31,7 +30,7 @@ class HausdorffDistanceMetric(Metric):
     Input `y_pred` (BNHW[D] where N is number of classes) is compared with ground truth `y` (BNHW[D]).
     `y_preds` is expected to have binarized predictions and `y` should be in one-hot format.
     You can use suitable transforms in ``monai.transforms.post`` first to achieve binarized values.
-
+    `y_preds` and `y` can also be a list of Tensor with shape: [CHW[D]].
     The implementation refers to `DeepMind's implementation <https://github.com/deepmind/surface-distance>`_.
 
     Args:
@@ -45,7 +44,7 @@ class HausdorffDistanceMetric(Metric):
         directed: whether to calculate directed Hausdorff distance. Defaults to ``False``.
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
             ``"mean_channel"``, ``"sum_channel"``}
-            Define the mode to reduce computation result of 1 batch data. Defaults to ``"mean"``.
+            Define the mode to reduce computation result. Defaults to ``"mean"``.
 
     """
 
@@ -94,8 +93,11 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             directed=self.directed,
         )
 
-    def reduce(self, data: TensorList):
-        data = torch.cat(data, dim=0) if isinstance(data, list) else data
+    def reduce(self, data: torch.Tensor):
+        """
+        Execute reduction logic for the output of `compute_hausdorff_distance`.
+
+        """
         # do metric reduction
         f, not_nans = do_metric_reduction(data, self.reduction)
         return f, not_nans
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index 0b6f67f3dd..8ccfd3b490 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -14,7 +14,6 @@
 
 import torch
 
-from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, ignore_background
 from monai.utils import MetricReduction
 from .metric import Metric
@@ -30,13 +29,14 @@ class DiceMetric(Metric):
     the first category (channel index 0) which is by convention assumed to be background. If the non-background
     segmentations are small compared to the total image size they can get overwhelmed by the signal from the
     background so excluding it in such cases helps convergence.
+    `y_preds` and `y` can also be a list of Tensor with shape: [CHW[D]].
 
     Args:
         include_background: whether to skip Dice computation on the first channel of
             the predicted output. Defaults to ``True``.
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
             ``"mean_channel"``, ``"sum_channel"``}
-            Define the mode to reduce computation result of 1 batch data. Defaults to ``"mean"``.
+            Define the mode to reduce computation result. Defaults to ``"mean"``.
 
     """
 
@@ -76,7 +76,11 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             include_background=self.include_background,
         )
 
-    def reduce(self, data: TensorList):
+    def reduce(self, data: torch.Tensor):
+        """
+        Execute reduction logic for the output of `compute_meandice`.
+
+        """
         data = torch.cat(data, dim=0) if isinstance(data, list) else data
         # do metric reduction
         f, not_nans = do_metric_reduction(data, self.reduction)
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 38dfd63de5..31c19bb0bb 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -18,21 +18,57 @@
 
 
 class Metric(ABC):
+    """
+    Base class of Metrics interface.
+    `__call__` is supposed to compute independent logic for several samples of `y_pred` and `y`(optional).
+    Ususally, subclass only needs to implement the `_apply` function for computation process.
+    And `reduce` is supposed to execute reduction for the final result, it can be used for 1 batch data
+    or for the accumulated overall data.
+
+    """
     def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
+        """
+        Execute basic computation for model prediction and ground truth.
+        It can support  both `list of channel-first Tensor` and `batch-first Tensor`.
+        And users can execute on every batch of data, then accumulate the results, or
+        accumulate the original `y_pred` and `y`, then execute on the accumulated data.
+
+        Args:
+            y_pred: the model prediction data to compute, must be a list of `channel-first` Tensor
+                or a `batch-first` Tensor.
+            y: the ground truth to compute, must be a list of `channel-first` Tensor
+                or a `batch-first` Tensor.
+
+        """
         if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
             if y is not None:
-                ret = [self._apply(p_.unsqueeze(0), y_.unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
+                ret = [self._apply(p_.detach().unsqueeze(0), y_.detach().unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
             else:
-                ret = [self._apply(p_.unsqueeze(0), None) for p_ in y_pred]
+                ret = [self._apply(p_.detach().unsqueeze(0), None) for p_ in y_pred]
+            # concat the list of results
+            if isinstance(ret[0], torch.Tensor):
+                ret = torch.cat(ret, dim=0)
+            elif isinstance(ret[0], (list, tuple)) and all([isinstance(i, torch.Tensor) for i in ret[0]]):
+                # if _apply() returned not only 1 Tensor, concat them separately
+                ret = [torch.cat([k[i] for k in ret], dim=0) for i in range(len(ret[0]))]
         else:
-            ret = self._apply(y_pred, y)
+            ret = self._apply(y_pred.detach(), y.detach())
         return ret
 
     @abstractmethod
     def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+        """
+        Actual computation logic of the metric, input data should be `batch-first` Tensor.
+
+        """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
     @abstractmethod
     def reduce(self, data: Any):
+        """
+        Execute reduction operation for the metric results. Users can call it for the batch data of every iteration
+        or accumulte the results of every iteration and call it for the final output.
+
+        """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index cd6564a960..2bd3271699 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -16,18 +16,29 @@
 
 import torch
 
-from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction
 from monai.utils import MetricReduction
 from .metric import Metric
 
 
 class RegressionMetric(Metric):
+    """
+    Base class for regression metrics.
+    Input `y_pred` (BCHW[D] where C is number of channels) is compared with ground truth `y` (BCHW[D]).
+    Both `y_pred` and `y` are expected to be real-valued, where `y_pred` is output from a regression model.
+    `y_preds` and `y` can also be a list of Tensor with shape: [CHW[D]].
+
+    Args:
+        reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
+            ``"mean_channel"``, ``"sum_channel"``}
+            Define the mode to reduce computation result. Defaults to ``"mean"``.
+
+    """
     def __init__(self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN) -> None:
         super().__init__()
         self.reduction = reduction
 
-    def reduce(self, data: TensorList):
+    def reduce(self, data: torch.Tensor):
         data = torch.cat(data, dim=0) if isinstance(data, list) else data
         return do_metric_reduction(data, self.reduction)
 
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index 76fffbd9dd..493227bef4 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -14,12 +14,31 @@
 import numpy as np
 import torch
 
-from monai.config import TensorList
 from monai.utils import Average
 from .metric import Metric
 
 
 class ROCAUCMetric(Metric):
+    """
+    Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC). Referring to:
+    `sklearn.metrics.roc_auc_score <https://scikit-learn.org/stable/modules/generated/
+    sklearn.metrics.roc_auc_score.html#sklearn.metrics.roc_auc_score>`_.
+    The input `y_pred` and `y` can be a list of `channel-first` Tensor or a `batch-first` Tensor.
+
+    Args:
+        average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``}
+            Type of averaging performed if not binary classification.
+            Defaults to ``"macro"``.
+
+            - ``"macro"``: calculate metrics for each label, and find their unweighted mean.
+                This does not take label imbalance into account.
+            - ``"weighted"``: calculate metrics for each label, and find their average,
+                weighted by support (the number of true instances for each label).
+            - ``"micro"``: calculate metrics globally by considering each element of the label
+                indicator matrix as a label.
+            - ``"none"``: the scores for each class are returned.
+
+    """
     def __init__(self, average: Union[Average, str] = Average.MACRO) -> None:
         super().__init__()
         self.average = average
@@ -27,10 +46,27 @@ def __init__(self, average: Union[Average, str] = Average.MACRO) -> None:
     def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         return y_pred, y
 
-    def reduce(self, data: Tuple[TensorList, TensorList]):
+    def reduce(self, data: Tuple[torch.Tensor, torch.Tensor]):
+        """
+        As AUC metric needs to execute on the overall data, so usually users accumulate `y_pred` and `y`
+        of every iteration, then execute real computation and reduction on the accumulated data.
+        For example::
+
+            y_pred = []
+            y = []
+            metric = ROCAUCMetric(average=Average.MACRO)
+
+            for batch in dataloader:
+                image, label = batch
+                pred = model(image)
+                pred_, y_ = metric(pred, label)
+                y.append(y_)
+                y_pred.append(pred_)
+
+            result = metric.reduce(torch.cat(y_pred, dim=0), torch.cat(y, dim=0))
+
+        """
         y_pred, y = data
-        y_pred = torch.cat(y_pred, dim=0) if isinstance(y_pred, list) else y_pred
-        y = torch.cat(y, dim=0) if isinstance(y, list) else y
         # compute final value and do metric reduction
         return compute_roc_auc(y_pred=y_pred, y=y, average=self.average)
 
diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py
index d9d37681b5..aed50191f7 100644
--- a/monai/metrics/surface_distance.py
+++ b/monai/metrics/surface_distance.py
@@ -15,7 +15,6 @@
 import numpy as np
 import torch
 
-from monai.config import TensorList
 from monai.metrics.utils import do_metric_reduction, get_mask_edges, get_surface_distance, ignore_background
 from monai.utils import MetricReduction
 from .metric import Metric
@@ -38,7 +37,7 @@ class SurfaceDistanceMetric(Metric):
             the metric used to compute surface distance. Defaults to ``"euclidean"``.
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
             ``"mean_channel"``, ``"sum_channel"``}
-            Define the mode to reduce computation result of 1 batch data. Defaults to ``"mean"``.
+            Define the mode to reduce computation result. Defaults to ``"mean"``.
 
     """
 
@@ -84,8 +83,11 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             distance_metric=self.distance_metric,
         )
 
-    def reduce(self, data: TensorList):
-        data = torch.cat(data, dim=0) if isinstance(data, list) else data
+    def reduce(self, data: torch.Tensor):
+        """
+        Execute reduction logic for the output of `compute_average_surface_distance`.
+
+        """
         # do metric reduction
         f, not_nans = do_metric_reduction(data, self.reduction)
         return f, not_nans
diff --git a/tests/test_handler_confusion_matrix.py b/tests/test_handler_confusion_matrix.py
index 0524676763..0c6e36066b 100644
--- a/tests/test_handler_confusion_matrix.py
+++ b/tests/test_handler_confusion_matrix.py
@@ -58,9 +58,9 @@ class TestHandlerConfusionMatrix(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_compute(self, input_params, expected_avg):
         metric = ConfusionMatrix(**input_params)
-
-        y_pred = torch.Tensor([[[0], [1]], [[1], [0]]])
-        y = torch.Tensor([[[0], [1]], [[0], [1]]])
+        # test input a list of channel-first tensor
+        y_pred = [torch.Tensor([[0], [1]]), torch.Tensor([[1], [0]])]
+        y = [torch.Tensor([[0], [1]]), torch.Tensor([[0], [1]])]
         metric.update([y_pred, y])
 
         y_pred = torch.Tensor([[[0], [1]], [[1], [0]]])
diff --git a/tests/test_handler_hausdorff_distance.py b/tests/test_handler_hausdorff_distance.py
index c0d2e723ca..bbc36cc2b5 100644
--- a/tests/test_handler_hausdorff_distance.py
+++ b/tests/test_handler_hausdorff_distance.py
@@ -49,7 +49,8 @@ def create_spherical_seg_3d(
 
 
 sampler_sphere = torch.Tensor(create_spherical_seg_3d(radius=20, centre=(20, 20, 20))).unsqueeze(0).unsqueeze(0)
-sampler_sphere_gt = torch.Tensor(create_spherical_seg_3d(radius=20, centre=(10, 20, 20))).unsqueeze(0).unsqueeze(0)
+# test input a list of channel-first tensor
+sampler_sphere_gt = [torch.Tensor(create_spherical_seg_3d(radius=20, centre=(10, 20, 20))).unsqueeze(0)]
 sampler_sphere_zeros = torch.zeros_like(sampler_sphere)
 
 TEST_SAMPLE_1 = [sampler_sphere, sampler_sphere_gt]
diff --git a/tests/test_handler_mean_dice.py b/tests/test_handler_mean_dice.py
index 6b4bea594e..57c8cf4722 100644
--- a/tests/test_handler_mean_dice.py
+++ b/tests/test_handler_mean_dice.py
@@ -34,8 +34,8 @@ def _val_func(engine, batch):
 
         engine = Engine(_val_func)
         dice_metric.attach(engine=engine, name="mean_dice")
-
-        y_pred = torch.Tensor([[[0], [1]], [[1], [0]]])
+        # test input a list of channel-first tensor
+        y_pred = [torch.Tensor([[0], [1]]), torch.Tensor([[1], [0]])]
         y = torch.Tensor([[[0], [1]], [[0], [1]]])
         dice_metric.update([y_pred, y])
 
diff --git a/tests/test_handler_rocauc.py b/tests/test_handler_rocauc.py
index 04e4d3edb3..196079d032 100644
--- a/tests/test_handler_rocauc.py
+++ b/tests/test_handler_rocauc.py
@@ -34,6 +34,8 @@ def test_compute(self):
         y = torch.Tensor([[0], [1]])
         y_pred = act(y_pred)
         y = to_onehot(y)
+        # test a list of channel-first tensors
+        y_pred, y = [i for i in y_pred], [i for i in y]
         auc_metric.update([y_pred, y])
 
         auc = auc_metric.compute()
diff --git a/tests/test_handler_surface_distance.py b/tests/test_handler_surface_distance.py
index fbd86edb03..82cdb50d90 100644
--- a/tests/test_handler_surface_distance.py
+++ b/tests/test_handler_surface_distance.py
@@ -49,7 +49,8 @@ def create_spherical_seg_3d(
 
 
 sampler_sphere = torch.Tensor(create_spherical_seg_3d(radius=20, centre=(20, 20, 20))).unsqueeze(0).unsqueeze(0)
-sampler_sphere_gt = torch.Tensor(create_spherical_seg_3d(radius=20, centre=(10, 20, 20))).unsqueeze(0).unsqueeze(0)
+# test input a list of channel-first tensor
+sampler_sphere_gt = [torch.Tensor(create_spherical_seg_3d(radius=20, centre=(10, 20, 20))).unsqueeze(0)]
 sampler_sphere_zeros = torch.zeros_like(sampler_sphere)
 
 TEST_SAMPLE_1 = [sampler_sphere, sampler_sphere_gt]

From 0b35531a31ee6349aad6759dc07f903e1cbc84c4 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Wed, 2 Jun 2021 11:34:16 +0000
Subject: [PATCH 07/22] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/metrics/confusion_matrix.py      | 5 +++--
 monai/metrics/hausdorff_distance.py    | 1 +
 monai/metrics/meandice.py              | 1 +
 monai/metrics/metric.py                | 3 ++-
 monai/metrics/regression.py            | 3 +++
 monai/metrics/rocauc.py                | 2 ++
 monai/metrics/surface_distance.py      | 1 +
 tests/test_compute_confusion_matrix.py | 7 ++++++-
 8 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py
index f8f11fbd49..084879b324 100644
--- a/monai/metrics/confusion_matrix.py
+++ b/monai/metrics/confusion_matrix.py
@@ -15,7 +15,8 @@
 import torch
 
 from monai.metrics.utils import do_metric_reduction, ignore_background
-from monai.utils import ensure_tuple, MetricReduction
+from monai.utils import MetricReduction, ensure_tuple
+
 from .metric import Metric
 
 
@@ -105,7 +106,7 @@ def reduce(self, data: torch.Tensor):
         """
         results = []
         for metric_name in self.metric_name:
-            if self.compute_sample:           
+            if self.compute_sample:
                 sub_confusion_matrix = compute_confusion_matrix_metric(metric_name, data)
                 f, not_nans = do_metric_reduction(sub_confusion_matrix, self.reduction)
             else:
diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py
index 46c67a6fda..643f13b9d8 100644
--- a/monai/metrics/hausdorff_distance.py
+++ b/monai/metrics/hausdorff_distance.py
@@ -17,6 +17,7 @@
 
 from monai.metrics.utils import do_metric_reduction, get_mask_edges, get_surface_distance, ignore_background
 from monai.utils import MetricReduction
+
 from .metric import Metric
 
 __all__ = ["HausdorffDistanceMetric", "compute_hausdorff_distance", "compute_percent_hausdorff_distance"]
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index 8ccfd3b490..7e73b3d332 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -16,6 +16,7 @@
 
 from monai.metrics.utils import do_metric_reduction, ignore_background
 from monai.utils import MetricReduction
+
 from .metric import Metric
 
 
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 31c19bb0bb..a09c8827ad 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -9,8 +9,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Optional
 from abc import ABC, abstractmethod
+from typing import Any, Optional
 
 import torch
 
@@ -26,6 +26,7 @@ class Metric(ABC):
     or for the accumulated overall data.
 
     """
+
     def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
         """
         Execute basic computation for model prediction and ground truth.
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index 2bd3271699..f7c22c4573 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -18,6 +18,7 @@
 
 from monai.metrics.utils import do_metric_reduction
 from monai.utils import MetricReduction
+
 from .metric import Metric
 
 
@@ -34,6 +35,7 @@ class RegressionMetric(Metric):
             Define the mode to reduce computation result. Defaults to ``"mean"``.
 
     """
+
     def __init__(self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN) -> None:
         super().__init__()
         self.reduction = reduction
@@ -60,6 +62,7 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
         self._check_shape(y_pred, y)
         return self._compute_metric(y_pred, y)
 
+
 class MSEMetric(RegressionMetric):
     r"""Compute Mean Squared Error between two tensors using function:
 
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index 493227bef4..3c81a0b21b 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -15,6 +15,7 @@
 import torch
 
 from monai.utils import Average
+
 from .metric import Metric
 
 
@@ -39,6 +40,7 @@ class ROCAUCMetric(Metric):
             - ``"none"``: the scores for each class are returned.
 
     """
+
     def __init__(self, average: Union[Average, str] = Average.MACRO) -> None:
         super().__init__()
         self.average = average
diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py
index aed50191f7..51a82a29c7 100644
--- a/monai/metrics/surface_distance.py
+++ b/monai/metrics/surface_distance.py
@@ -17,6 +17,7 @@
 
 from monai.metrics.utils import do_metric_reduction, get_mask_edges, get_surface_distance, ignore_background
 from monai.utils import MetricReduction
+
 from .metric import Metric
 
 
diff --git a/tests/test_compute_confusion_matrix.py b/tests/test_compute_confusion_matrix.py
index 174c48baea..ca07685d8d 100644
--- a/tests/test_compute_confusion_matrix.py
+++ b/tests/test_compute_confusion_matrix.py
@@ -16,7 +16,12 @@
 import torch
 from parameterized import parameterized
 
-from monai.metrics import ConfusionMatrixMetric, get_confusion_matrix, do_metric_reduction, compute_confusion_matrix_metric
+from monai.metrics import (
+    ConfusionMatrixMetric,
+    compute_confusion_matrix_metric,
+    do_metric_reduction,
+    get_confusion_matrix,
+)
 
 # input data
 data: Dict[Any, Any] = {

From 29b87e0576210fb7799ad2d249bece7891cc779a Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 21:00:01 +0800
Subject: [PATCH 08/22] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/test_handler_rocauc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_handler_rocauc.py b/tests/test_handler_rocauc.py
index 196079d032..36bb499cba 100644
--- a/tests/test_handler_rocauc.py
+++ b/tests/test_handler_rocauc.py
@@ -35,7 +35,7 @@ def test_compute(self):
         y_pred = act(y_pred)
         y = to_onehot(y)
         # test a list of channel-first tensors
-        y_pred, y = [i for i in y_pred], [i for i in y]
+        y_pred, y = list(y_pred), list(y)
         auc_metric.update([y_pred, y])
 
         auc = auc_metric.compute()

From 9f9139cf0e6b52de191b301b4d350cb2af2bf522 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 22:04:02 +0800
Subject: [PATCH 09/22] [DLMED] fix pytype issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/metric.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index a09c8827ad..3cdb2c2f7a 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -54,7 +54,8 @@ def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
                 # if _apply() returned not only 1 Tensor, concat them separately
                 ret = [torch.cat([k[i] for k in ret], dim=0) for i in range(len(ret[0]))]
         else:
-            ret = self._apply(y_pred.detach(), y.detach())
+            y = y.detach() if y is not None else y
+            ret = self._apply(y_pred.detach(), y)
         return ret
 
     @abstractmethod

From f07413f461fa35e891c288bc1007d295ad1ccb9e Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 23:46:50 +0800
Subject: [PATCH 10/22] [DLMED] fix all the mypy issues

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/hausdorff_distance.py |  1 -
 monai/handlers/iteration_metric.py   |  7 ++++---
 monai/metrics/confusion_matrix.py    |  8 +++++---
 monai/metrics/hausdorff_distance.py  |  6 ++++--
 monai/metrics/meandice.py            |  9 +++++----
 monai/metrics/metric.py              | 26 ++++++++++++++++----------
 monai/metrics/regression.py          |  7 ++++---
 monai/metrics/rocauc.py              |  4 ++--
 monai/metrics/surface_distance.py    |  8 +++++---
 9 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/monai/handlers/hausdorff_distance.py b/monai/handlers/hausdorff_distance.py
index c806081ab5..713e1c8d3a 100644
--- a/monai/handlers/hausdorff_distance.py
+++ b/monai/handlers/hausdorff_distance.py
@@ -54,7 +54,6 @@ def __init__(
                 Define the mode to reduce computation result. Defaults to ``"mean"``.
 
         """
-        super().__init__(output_transform, device=device)
         metric_fn = HausdorffDistanceMetric(
             include_background=include_background,
             distance_metric=distance_metric,
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index bb8396022c..6b9106f48a 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -14,10 +14,11 @@
 import torch
 
 from monai.handlers.utils import evenly_divisible_all_gather
+from monai.metrics import Metric
 from monai.utils import exact_version, optional_import
 
 idist, _ = optional_import("ignite", "0.4.4", exact_version, "distributed")
-Metric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
+igniteMetric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
 reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.4", exact_version, "reinit__is_reduced")
 if TYPE_CHECKING:
     from ignite.engine import Engine
@@ -25,7 +26,7 @@
     Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
 
 
-class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to optional_import
+class IterationMetric(igniteMetric):  # type: ignore[valid-type, misc] # due to optional_import
     """
     Class for metrics that should be computed on every iteration and compute final results when epoch completed.
     Similar to the `EpochMetric` in ignite:
@@ -45,7 +46,7 @@ class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to option
 
     def __init__(
         self,
-        metric_fn: Callable,
+        metric_fn: Metric,
         output_transform: Callable = lambda x: x,
         device: Union[str, torch.device] = "cpu",
         save_details: bool = True,
diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py
index 084879b324..aa127a49ac 100644
--- a/monai/metrics/confusion_matrix.py
+++ b/monai/metrics/confusion_matrix.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Sequence, Union
+from typing import Optional, Sequence, Union
 
 import torch
 
@@ -65,7 +65,7 @@ def __init__(
         self.compute_sample = compute_sample
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute. It must be one-hot format and first dim is batch.
@@ -76,9 +76,11 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             ValueError: when `y` is not a binarized tensor.
             ValueError: when `y_pred` has less than two dimensions.
         """
+        if not isinstance(y_pred, torch.Tensor) or not isinstance(y, torch.Tensor):
+            raise ValueError("y_pred and y must be PyTorch Tensor.")
         # check binarized input
         if not torch.all(y_pred.byte() == y_pred):
-            warnings.warn("y_pred is not a binarized tensor here!")
+            warnings.warn("y_pred should be a binarized tensor.")
         if not torch.all(y.byte() == y):
             raise ValueError("y should be a binarized tensor.")
         # check dimension
diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py
index 643f13b9d8..ce6f71109f 100644
--- a/monai/metrics/hausdorff_distance.py
+++ b/monai/metrics/hausdorff_distance.py
@@ -64,7 +64,7 @@ def __init__(
         self.directed = directed
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -77,8 +77,10 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             ValueError: when `y` is not a binarized tensor.
             ValueError: when `y_pred` has less than three dimensions.
         """
+        if not isinstance(y_pred, torch.Tensor) or not isinstance(y, torch.Tensor):
+            raise ValueError("y_pred and y must be PyTorch Tensor.")
         if not torch.all(y_pred.byte() == y_pred):
-            warnings.warn("y_pred is not a binarized tensor here!")
+            warnings.warn("y_pred should be a binarized tensor.")
         if not torch.all(y.byte() == y):
             raise ValueError("y should be a binarized tensor.")
         dims = y_pred.ndimension()
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index 7e73b3d332..f1ff6639d0 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Union
+from typing import Optional, Union
 
 import torch
 
@@ -50,7 +50,7 @@ def __init__(
         self.include_background = include_background
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -63,8 +63,10 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             ValueError: when `y` is not a binarized tensor.
             ValueError: when `y_pred` has less than three dimensions.
         """
+        if not isinstance(y_pred, torch.Tensor) or not isinstance(y, torch.Tensor):
+            raise ValueError("y_pred and y must be PyTorch Tensor.")
         if not torch.all(y_pred.byte() == y_pred):
-            warnings.warn("y_pred is not a binarized tensor here!")
+            warnings.warn("y_pred should be a binarized tensor.")
         if not torch.all(y.byte() == y):
             raise ValueError("y should be a binarized tensor.")
         dims = y_pred.ndimension()
@@ -82,7 +84,6 @@ def reduce(self, data: torch.Tensor):
         Execute reduction logic for the output of `compute_meandice`.
 
         """
-        data = torch.cat(data, dim=0) if isinstance(data, list) else data
         # do metric reduction
         f, not_nans = do_metric_reduction(data, self.reduction)
         return f, not_nans
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 3cdb2c2f7a..a3130a005d 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 from abc import ABC, abstractmethod
-from typing import Any, Optional
+from typing import Any, List, Optional, Sequence, Union
 
 import torch
 
@@ -41,21 +41,27 @@ def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
                 or a `batch-first` Tensor.
 
         """
+        ret: Union[torch.Tensor, Sequence[torch.Tensor]]
         if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
+            ret_: List[torch.Tensor]
             if y is not None:
-                ret = [self._apply(p_.detach().unsqueeze(0), y_.detach().unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
+                ret_ = [self._apply(p_.detach().unsqueeze(0), y_.detach().unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
             else:
-                ret = [self._apply(p_.detach().unsqueeze(0), None) for p_ in y_pred]
+                ret_ = [self._apply(p_.detach().unsqueeze(0), None) for p_ in y_pred]
             # concat the list of results
-            if isinstance(ret[0], torch.Tensor):
-                ret = torch.cat(ret, dim=0)
-            elif isinstance(ret[0], (list, tuple)) and all([isinstance(i, torch.Tensor) for i in ret[0]]):
+            if isinstance(ret_[0], torch.Tensor):
+                ret = torch.cat(ret_, dim=0)
+            elif isinstance(ret_[0], (list, tuple)) and all([isinstance(i, torch.Tensor) for i in ret_[0]]):
                 # if _apply() returned not only 1 Tensor, concat them separately
-                ret = [torch.cat([k[i] for k in ret], dim=0) for i in range(len(ret[0]))]
-        else:
-            y = y.detach() if y is not None else y
-            ret = self._apply(y_pred.detach(), y)
+                ret = [torch.cat([k[i] for k in ret_], dim=0) for i in range(len(ret_[0]))]
+            else:
+                # if not expected data type, return raw results directly
+                ret = ret_
+        elif isinstance(y_pred, torch.Tensor):
+            y_ = y.detach() if isinstance(y, torch.Tensor) else None
+            ret = self._apply(y_pred.detach(), y_)
+
         return ret
 
     @abstractmethod
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index f7c22c4573..49e73f7fb8 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -12,7 +12,7 @@
 import math
 from abc import abstractmethod
 from functools import partial
-from typing import Any, Union
+from typing import Any, Optional, Union
 
 import torch
 
@@ -41,7 +41,6 @@ def __init__(self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN
         self.reduction = reduction
 
     def reduce(self, data: torch.Tensor):
-        data = torch.cat(data, dim=0) if isinstance(data, list) else data
         return do_metric_reduction(data, self.reduction)
 
     def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
@@ -58,7 +57,9 @@ def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
     def _compute_metric(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
-    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+        if not isinstance(y_pred, torch.Tensor) or not isinstance(y, torch.Tensor):
+            raise ValueError("y_pred and y must be PyTorch Tensor.")
         self._check_shape(y_pred, y)
         return self._compute_metric(y_pred, y)
 
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index 3c81a0b21b..44e6847ea0 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Tuple, Union, cast
+from typing import Tuple, Optional, Union, cast
 
 import numpy as np
 import torch
@@ -45,7 +45,7 @@ def __init__(self, average: Union[Average, str] = Average.MACRO) -> None:
         super().__init__()
         self.average = average
 
-    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         return y_pred, y
 
     def reduce(self, data: Tuple[torch.Tensor, torch.Tensor]):
diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py
index 51a82a29c7..5855a64cf5 100644
--- a/monai/metrics/surface_distance.py
+++ b/monai/metrics/surface_distance.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Union
+from typing import Optional, Union
 
 import numpy as np
 import torch
@@ -55,7 +55,7 @@ def __init__(
         self.symmetric = symmetric
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
+    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -68,8 +68,10 @@ def _apply(self, y_pred: torch.Tensor, y: torch.Tensor):
             ValueError: when `y` is not a binarized tensor.
             ValueError: when `y_pred` has less than three dimensions.
         """
+        if not isinstance(y_pred, torch.Tensor) or not isinstance(y, torch.Tensor):
+            raise ValueError("y_pred and y must be PyTorch Tensor.")
         if not torch.all(y_pred.byte() == y_pred):
-            warnings.warn("y_pred is not a binarized tensor here!")
+            warnings.warn("y_pred should be a binarized tensor.")
         if not torch.all(y.byte() == y):
             raise ValueError("y should be a binarized tensor.")
         dims = y_pred.ndimension()

From 13721338f5d3912b61a4a912e68df9afddaf78c3 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 2 Jun 2021 23:59:03 +0800
Subject: [PATCH 11/22] [DLMED] fix integration test

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/test_integration_segmentation_3d.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_integration_segmentation_3d.py b/tests/test_integration_segmentation_3d.py
index af97236eda..67f1930678 100644
--- a/tests/test_integration_segmentation_3d.py
+++ b/tests/test_integration_segmentation_3d.py
@@ -149,7 +149,7 @@ def run_training_test(root_dir, device="cuda:0", cachedataset=0):
                     val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device)
                     sw_batch_size, roi_size = 4, (96, 96, 96)
                     val_outputs = val_post_tran(sliding_window_inference(val_images, roi_size, sw_batch_size, model))
-                    value, not_nans = dice_metric(y_pred=val_outputs, y=val_labels)
+                    value, not_nans = dice_metric.reduce(dice_metric(y_pred=val_outputs, y=val_labels))
                     metric_count += not_nans.item()
                     metric_sum += value.item() * not_nans.item()
                 metric = metric_sum / metric_count
@@ -218,7 +218,7 @@ def run_inference_test(root_dir, device="cuda:0"):
             # define sliding window size and batch size for windows inference
             sw_batch_size, roi_size = 4, (96, 96, 96)
             val_outputs = val_post_tran(sliding_window_inference(val_images, roi_size, sw_batch_size, model))
-            value, not_nans = dice_metric(y_pred=val_outputs, y=val_labels)
+            value, not_nans = dice_metric.reduce(dice_metric(y_pred=val_outputs, y=val_labels))
             metric_count += not_nans.item()
             metric_sum += value.item() * not_nans.item()
             saver.save_batch(val_outputs, val_data["img_meta_dict"])

From 1ed7a4f041e971322d06d98839a5659da1c2e54f Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Wed, 2 Jun 2021 16:03:09 +0000
Subject: [PATCH 12/22] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/metrics/rocauc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index 44e6847ea0..69e03dc553 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Tuple, Optional, Union, cast
+from typing import Optional, Tuple, Union, cast
 
 import numpy as np
 import torch

From 0d5d2785c3e4ac7c422bb7dad9e3b514938442fb Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 00:12:45 +0800
Subject: [PATCH 13/22] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 6b9106f48a..19c11765d4 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -18,7 +18,7 @@
 from monai.utils import exact_version, optional_import
 
 idist, _ = optional_import("ignite", "0.4.4", exact_version, "distributed")
-igniteMetric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
+IgniteMetric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
 reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.4", exact_version, "reinit__is_reduced")
 if TYPE_CHECKING:
     from ignite.engine import Engine
@@ -26,7 +26,7 @@
     Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
 
 
-class IterationMetric(igniteMetric):  # type: ignore[valid-type, misc] # due to optional_import
+class IterationMetric(IgniteMetric):  # type: ignore[valid-type, misc] # due to optional_import
     """
     Class for metrics that should be computed on every iteration and compute final results when epoch completed.
     Similar to the `EpochMetric` in ignite:

From 1a173fbfbc0ab1769c583bc615f0cc0b760d5dfd Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 00:35:23 +0800
Subject: [PATCH 14/22] [DLMED] fix pytype issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index a3130a005d..93a5b50e07 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -59,7 +59,7 @@ def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
                 # if not expected data type, return raw results directly
                 ret = ret_
         elif isinstance(y_pred, torch.Tensor):
-            y_ = y.detach() if isinstance(y, torch.Tensor) else None
+            y_ = y.detach() if y is not None and isinstance(y, torch.Tensor) else None
             ret = self._apply(y_pred.detach(), y_)
 
         return ret

From af858e4ee99ed8076b8a70550e6a3d0d960b4d30 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 00:37:56 +0800
Subject: [PATCH 15/22] [DLMED] add more sanity check

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/metric.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 93a5b50e07..4882a21342 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -61,6 +61,8 @@ def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
         elif isinstance(y_pred, torch.Tensor):
             y_ = y.detach() if y is not None and isinstance(y, torch.Tensor) else None
             ret = self._apply(y_pred.detach(), y_)
+        else:
+            raise ValueError("y_pred or y must be a list of `channel-first` Tensors or a `batch-first` Tensor.")
 
         return ret
 

From e4d5f7b4bdf9fe9a4e22570ba1a83c84e4c1c398 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 06:24:29 +0800
Subject: [PATCH 16/22] [DLMED] update according to comments

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 docs/source/metrics.rst          | 8 ++++++++
 monai/config/__init__.py         | 2 +-
 monai/config/type_definitions.py | 8 ++++----
 monai/metrics/metric.py          | 4 ++--
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index b543ab3f40..6e48aeabe6 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -10,6 +10,11 @@ Metrics
 ------
 .. autofunction:: compute_froc_score
 
+`Metric`
+--------
+.. autoclass:: Metric
+    :members:
+
 `Mean Dice`
 -----------
 .. autofunction:: compute_meandice
@@ -21,6 +26,9 @@ Metrics
 --------------------------
 .. autofunction:: compute_roc_auc
 
+.. autoclass:: ROCAUCMetric
+    :members:
+
 `Confusion matrix`
 ------------------
 .. autofunction:: get_confusion_matrix
diff --git a/monai/config/__init__.py b/monai/config/__init__.py
index f874951b88..fed8e49771 100644
--- a/monai/config/__init__.py
+++ b/monai/config/__init__.py
@@ -18,4 +18,4 @@
     print_gpu_info,
     print_system_info,
 )
-from .type_definitions import DtypeLike, IndexSelection, KeysCollection, NdarrayTensor, TensorList
+from .type_definitions import DtypeLike, IndexSelection, KeysCollection, NdarrayTensor, TensorOrList
diff --git a/monai/config/type_definitions.py b/monai/config/type_definitions.py
index 52dc06e6a9..375ae460b2 100644
--- a/monai/config/type_definitions.py
+++ b/monai/config/type_definitions.py
@@ -14,7 +14,7 @@
 import numpy as np
 import torch
 
-__all__ = ["KeysCollection", "IndexSelection", "DtypeLike", "NdarrayTensor", "TensorList"]
+__all__ = ["KeysCollection", "IndexSelection", "DtypeLike", "NdarrayTensor", "TensorOrList"]
 
 """Commonly used concepts
 This module provides naming and type specifications for commonly used concepts
@@ -70,8 +70,8 @@
 NdarrayTensor = TypeVar("NdarrayTensor", np.ndarray, torch.Tensor)
 
 
-TensorList = Union[torch.Tensor, Sequence[torch.Tensor]]
-"""TensorList
+TensorOrList = Union[torch.Tensor, Sequence[torch.Tensor]]
+"""TensorOrList
 
-The TensorList type is used for defining `batch-first Tensor` or `list of channel-first Tensor`.
+The TensorOrList type is used for defining `batch-first Tensor` or `list of channel-first Tensor`.
 """
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 4882a21342..41c0258d51 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -14,7 +14,7 @@
 
 import torch
 
-from monai.config import TensorList
+from monai.config import TensorOrList
 
 
 class Metric(ABC):
@@ -27,7 +27,7 @@ class Metric(ABC):
 
     """
 
-    def __call__(self, y_pred: TensorList, y: Optional[TensorList] = None):
+    def __call__(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):
         """
         Execute basic computation for model prediction and ground truth.
         It can support  both `list of channel-first Tensor` and `batch-first Tensor`.

From 74a271ab87ab729747776964c14b23ed82e1a987 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 22:07:49 +0800
Subject: [PATCH 17/22] [DLMED] update according to Yiheng's comments

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/hausdorff_distance.py |  5 ++---
 monai/metrics/meandice.py           |  4 ++--
 monai/metrics/regression.py         | 12 ++++++------
 monai/metrics/surface_distance.py   |  3 ++-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py
index ce6f71109f..e85cae61d8 100644
--- a/monai/metrics/hausdorff_distance.py
+++ b/monai/metrics/hausdorff_distance.py
@@ -27,11 +27,10 @@ class HausdorffDistanceMetric(Metric):
     """
     Compute Hausdorff Distance between two tensors. It can support both multi-classes and multi-labels tasks.
     It supports both directed and non-directed Hausdorff distance calculation. In addition, specify the `percentile`
-    parameter can get the percentile of the distance.
-    Input `y_pred` (BNHW[D] where N is number of classes) is compared with ground truth `y` (BNHW[D]).
+    parameter can get the percentile of the distance. Input `y_pred` is compared with ground truth `y`.
     `y_preds` is expected to have binarized predictions and `y` should be in one-hot format.
     You can use suitable transforms in ``monai.transforms.post`` first to achieve binarized values.
-    `y_preds` and `y` can also be a list of Tensor with shape: [CHW[D]].
+    `y_preds` and `y` can be a list of channel-first Tensor (CHW[D]) or a batch-first Tensor (BCHW[D]).
     The implementation refers to `DeepMind's implementation <https://github.com/deepmind/surface-distance>`_.
 
     Args:
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index f1ff6639d0..cf8d0464d5 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -23,14 +23,14 @@
 class DiceMetric(Metric):
     """
     Compute average Dice loss between two tensors. It can support both multi-classes and multi-labels tasks.
-    Input `y_pred` (BNHW[D] where N is number of classes) is compared with ground truth `y` (BNHW[D]).
+    Input `y_pred` is compared with ground truth `y`.
     `y_preds` is expected to have binarized predictions and `y` should be in one-hot format. You can use suitable transforms
     in ``monai.transforms.post`` first to achieve binarized values.
     The `include_background` parameter can be set to ``False`` for an instance of DiceLoss to exclude
     the first category (channel index 0) which is by convention assumed to be background. If the non-background
     segmentations are small compared to the total image size they can get overwhelmed by the signal from the
     background so excluding it in such cases helps convergence.
-    `y_preds` and `y` can also be a list of Tensor with shape: [CHW[D]].
+    `y_preds` and `y` can be a list of channel-first Tensor (CHW[D]) or a batch-first Tensor (BCHW[D]).
 
     Args:
         include_background: whether to skip Dice computation on the first channel of
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index 49e73f7fb8..3e13072fa1 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -25,9 +25,9 @@
 class RegressionMetric(Metric):
     """
     Base class for regression metrics.
-    Input `y_pred` (BCHW[D] where C is number of channels) is compared with ground truth `y` (BCHW[D]).
+    Input `y_pred` is compared with ground truth `y`.
     Both `y_pred` and `y` are expected to be real-valued, where `y_pred` is output from a regression model.
-    `y_preds` and `y` can also be a list of Tensor with shape: [CHW[D]].
+    `y_preds` and `y` can be a list of channel-first Tensor (CHW[D]) or a batch-first Tensor (BCHW[D]).
 
     Args:
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
@@ -72,7 +72,7 @@ class MSEMetric(RegressionMetric):
 
     More info: https://en.wikipedia.org/wiki/Mean_squared_error
 
-    Input `y_pred` (BCHW[D] where C is number of channels) is compared with ground truth `y` (BCHW[D]).
+    Input `y_pred` is compared with ground truth `y`.
     Both `y_pred` and `y` are expected to be real-valued, where `y_pred` is output from a regression model.
 
     Args:
@@ -102,7 +102,7 @@ class MAEMetric(RegressionMetric):
 
     More info: https://en.wikipedia.org/wiki/Mean_absolute_error
 
-    Input `y_pred` (BCHW[D] where C is number of channels) is compared with ground truth `y` (BCHW[D]).
+    Input `y_pred` is compared with ground truth `y`.
     Both `y_pred` and `y` are expected to be real-valued, where `y_pred` is output from a regression model.
 
     Args:
@@ -133,7 +133,7 @@ class RMSEMetric(RegressionMetric):
 
     More info: https://en.wikipedia.org/wiki/Root-mean-square_deviation
 
-    Input `y_pred` (BCHW[D] where C is number of channels) is compared with ground truth `y` (BCHW[D]).
+    Input `y_pred` is compared with ground truth `y`.
     Both `y_pred` and `y` are expected to be real-valued, where `y_pred` is output from a regression model.
 
     Args:
@@ -168,7 +168,7 @@ class PSNRMetric(RegressionMetric):
     Help taken from:
     https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/image_ops_impl.py line 4139
 
-    Input `y_pred` (BCHW[D] where C is number of channels) is compared with ground truth `y` (BCHW[D]).
+    Input `y_pred` is compared with ground truth `y`.
     Both `y_pred` and `y` are expected to be real-valued, where `y_pred` is output from a regression model.
 
     Args:
diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py
index 5855a64cf5..908da13215 100644
--- a/monai/metrics/surface_distance.py
+++ b/monai/metrics/surface_distance.py
@@ -25,9 +25,10 @@ class SurfaceDistanceMetric(Metric):
     """
     Compute Surface Distance between two tensors. It can support both multi-classes and multi-labels tasks.
     It supports both symmetric and asymmetric surface distance calculation.
-    Input `y_pred` (BNHW[D] where N is number of classes) is compared with ground truth `y` (BNHW[D]).
+    Input `y_pred` is compared with ground truth `y`.
     `y_preds` is expected to have binarized predictions and `y` should be in one-hot format.
     You can use suitable transforms in ``monai.transforms.post`` first to achieve binarized values.
+    `y_preds` and `y` can be a list of channel-first Tensor (CHW[D]) or a batch-first Tensor (BCHW[D]).
 
     Args:
         include_background: whether to skip distance computation on the first channel of

From 0d2f32c00f8ba5311d29e79d33b1afa3d323d80b Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 22:15:01 +0800
Subject: [PATCH 18/22] [DLMED] update according to Wenqi's comments

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 5 ++++-
 monai/metrics/confusion_matrix.py  | 4 ++--
 monai/metrics/metric.py            | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 19c11765d4..cd90363f59 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -104,7 +104,7 @@ def compute(self) -> Any:
         result: torch.Tensor = torch.zeros(1)
         if idist.get_rank() == 0:
             # run compute_fn on zero rank only
-            result = self.metric_fn.reduce(_scores)
+            result = self._reduce(_scores)
             result = result[0] if isinstance(result, (list, tuple)) else result
 
         if ws > 1:
@@ -113,6 +113,9 @@ def compute(self) -> Any:
 
         return result.item() if isinstance(result, torch.Tensor) else result
 
+    def _reduce(self, scores) -> Any:
+        return self.metric_fn.reduce(_scores)
+
     def attach(self, engine: Engine, name: str) -> None:
         """
         Attaches current metric to provided engine. On the end of engine's run,
diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py
index aa127a49ac..0e457db5b7 100644
--- a/monai/metrics/confusion_matrix.py
+++ b/monai/metrics/confusion_matrix.py
@@ -45,8 +45,8 @@ class ConfusionMatrixMetric(Metric):
             Except for input only one metric, multiple metrics are also supported via input a sequence of metric names, such as
             ("sensitivity", "precision", "recall"), if ``compute_sample`` is ``True``, multiple ``f`` and ``not_nans`` will be
             returned with the same order as input names when calling the class.
-        compute_sample: when reducing, if ``True``, each sample's metric will be computed first. If ``False``,
-            compute reduction first, defaults to ``False``.
+        compute_sample: when reducing, if ``True``, each sample's metric will be computed based on each confusion matrix first.
+            if ``False``, compute reduction on the confusion matrices first, defaults to ``False``.
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
             ``"mean_channel"``, ``"sum_channel"``}
 
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 41c0258d51..783ebf40f0 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -41,7 +41,7 @@ def __call__(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):
                 or a `batch-first` Tensor.
 
         """
-        ret: Union[torch.Tensor, Sequence[torch.Tensor]]
+        ret: TensorOrList
         if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
             ret_: List[torch.Tensor]

From 07ec911fcfb2eab0d97ac695bf4559b9282f3fab Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 22:37:22 +0800
Subject: [PATCH 19/22] [DLMED] change to "_compute()" and "aggregate()"

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py        |  2 +-
 monai/handlers/roc_auc.py                 |  2 +-
 monai/metrics/confusion_matrix.py         |  4 ++--
 monai/metrics/hausdorff_distance.py       |  4 ++--
 monai/metrics/meandice.py                 |  4 ++--
 monai/metrics/metric.py                   | 14 +++++++-------
 monai/metrics/regression.py               |  4 ++--
 monai/metrics/rocauc.py                   |  6 +++---
 monai/metrics/surface_distance.py         |  4 ++--
 tests/test_compute_confusion_matrix.py    |  8 ++++----
 tests/test_compute_meandice.py            |  4 ++--
 tests/test_compute_regression_metrics.py  | 14 +++++++-------
 tests/test_compute_roc_auc.py             |  2 +-
 tests/test_hausdorff_distance.py          |  4 ++--
 tests/test_integration_segmentation_3d.py |  4 ++--
 tests/test_surface_distance.py            |  4 ++--
 16 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index cd90363f59..7b8619a4db 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -114,7 +114,7 @@ def compute(self) -> Any:
         return result.item() if isinstance(result, torch.Tensor) else result
 
     def _reduce(self, scores) -> Any:
-        return self.metric_fn.reduce(_scores)
+        return self.metric_fn.aggregate(_scores)
 
     def attach(self, engine: Engine, name: str) -> None:
         """
diff --git a/monai/handlers/roc_auc.py b/monai/handlers/roc_auc.py
index 86eb2fe213..1b12dc0e96 100644
--- a/monai/handlers/roc_auc.py
+++ b/monai/handlers/roc_auc.py
@@ -60,7 +60,7 @@ def __init__(
         self.metric = ROCAUCMetric(average=Average(average))
         self._is_reduced: bool = False
         super().__init__(
-            compute_fn=lambda p, y: self.metric.reduce(data=(p, y)),
+            compute_fn=lambda p, y: self.metric.aggregate(data=(p, y)),
             output_transform=output_transform,
             check_compute_fn=False,
             device=device,
diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py
index 0e457db5b7..554aeef146 100644
--- a/monai/metrics/confusion_matrix.py
+++ b/monai/metrics/confusion_matrix.py
@@ -65,7 +65,7 @@ def __init__(
         self.compute_sample = compute_sample
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+    def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute. It must be one-hot format and first dim is batch.
@@ -98,7 +98,7 @@ def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
             include_background=self.include_background,
         )
 
-    def reduce(self, data: torch.Tensor):
+    def aggregate(self, data: torch.Tensor):
         """
         Execute reduction for the confusion matrix values, the `data` usually is a Tensor of shape [BC4],
         Where, the third dimension represents the number of true positive, false positive, true negative
diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py
index e85cae61d8..69f9189cc8 100644
--- a/monai/metrics/hausdorff_distance.py
+++ b/monai/metrics/hausdorff_distance.py
@@ -63,7 +63,7 @@ def __init__(
         self.directed = directed
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+    def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -95,7 +95,7 @@ def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
             directed=self.directed,
         )
 
-    def reduce(self, data: torch.Tensor):
+    def aggregate(self, data: torch.Tensor):
         """
         Execute reduction logic for the output of `compute_hausdorff_distance`.
 
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index cf8d0464d5..bdb93f4794 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -50,7 +50,7 @@ def __init__(
         self.include_background = include_background
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+    def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -79,7 +79,7 @@ def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
             include_background=self.include_background,
         )
 
-    def reduce(self, data: torch.Tensor):
+    def aggregate(self, data: torch.Tensor):
         """
         Execute reduction logic for the output of `compute_meandice`.
 
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index 783ebf40f0..a8bcc0f3bb 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -46,28 +46,28 @@ def __call__(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
             ret_: List[torch.Tensor]
             if y is not None:
-                ret_ = [self._apply(p_.detach().unsqueeze(0), y_.detach().unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
+                ret_ = [self._compute(p_.detach().unsqueeze(0), y_.detach().unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
             else:
-                ret_ = [self._apply(p_.detach().unsqueeze(0), None) for p_ in y_pred]
+                ret_ = [self._compute(p_.detach().unsqueeze(0), None) for p_ in y_pred]
             # concat the list of results
             if isinstance(ret_[0], torch.Tensor):
                 ret = torch.cat(ret_, dim=0)
             elif isinstance(ret_[0], (list, tuple)) and all([isinstance(i, torch.Tensor) for i in ret_[0]]):
-                # if _apply() returned not only 1 Tensor, concat them separately
+                # if _compute() returned not only 1 Tensor, concat them separately
                 ret = [torch.cat([k[i] for k in ret_], dim=0) for i in range(len(ret_[0]))]
             else:
                 # if not expected data type, return raw results directly
                 ret = ret_
         elif isinstance(y_pred, torch.Tensor):
             y_ = y.detach() if y is not None and isinstance(y, torch.Tensor) else None
-            ret = self._apply(y_pred.detach(), y_)
+            ret = self._compute(y_pred.detach(), y_)
         else:
             raise ValueError("y_pred or y must be a list of `channel-first` Tensors or a `batch-first` Tensor.")
 
         return ret
 
     @abstractmethod
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+    def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Actual computation logic of the metric, input data should be `batch-first` Tensor.
 
@@ -75,9 +75,9 @@ def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
     @abstractmethod
-    def reduce(self, data: Any):
+    def aggregate(self, data: Any):
         """
-        Execute reduction operation for the metric results. Users can call it for the batch data of every iteration
+        Aggregate the metric results. Users can call it for the batch data of every iteration
         or accumulte the results of every iteration and call it for the final output.
 
         """
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index 3e13072fa1..4ea32cb276 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -40,7 +40,7 @@ def __init__(self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN
         super().__init__()
         self.reduction = reduction
 
-    def reduce(self, data: torch.Tensor):
+    def aggregate(self, data: torch.Tensor):
         return do_metric_reduction(data, self.reduction)
 
     def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
@@ -57,7 +57,7 @@ def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
     def _compute_metric(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+    def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         if not isinstance(y_pred, torch.Tensor) or not isinstance(y, torch.Tensor):
             raise ValueError("y_pred and y must be PyTorch Tensor.")
         self._check_shape(y_pred, y)
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index 69e03dc553..2f3e01a821 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -45,10 +45,10 @@ def __init__(self, average: Union[Average, str] = Average.MACRO) -> None:
         super().__init__()
         self.average = average
 
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+    def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         return y_pred, y
 
-    def reduce(self, data: Tuple[torch.Tensor, torch.Tensor]):
+    def aggregate(self, data: Tuple[torch.Tensor, torch.Tensor]):
         """
         As AUC metric needs to execute on the overall data, so usually users accumulate `y_pred` and `y`
         of every iteration, then execute real computation and reduction on the accumulated data.
@@ -65,7 +65,7 @@ def reduce(self, data: Tuple[torch.Tensor, torch.Tensor]):
                 y.append(y_)
                 y_pred.append(pred_)
 
-            result = metric.reduce(torch.cat(y_pred, dim=0), torch.cat(y, dim=0))
+            result = metric.aggregate(torch.cat(y_pred, dim=0), torch.cat(y, dim=0))
 
         """
         y_pred, y = data
diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py
index 908da13215..7b4519e04a 100644
--- a/monai/metrics/surface_distance.py
+++ b/monai/metrics/surface_distance.py
@@ -56,7 +56,7 @@ def __init__(
         self.symmetric = symmetric
         self.reduction = reduction
 
-    def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
+    def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """
         Args:
             y_pred: input data to compute, typical segmentation model output.
@@ -87,7 +87,7 @@ def _apply(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
             distance_metric=self.distance_metric,
         )
 
-    def reduce(self, data: torch.Tensor):
+    def aggregate(self, data: torch.Tensor):
         """
         Execute reduction logic for the output of `compute_average_surface_distance`.
 
diff --git a/tests/test_compute_confusion_matrix.py b/tests/test_compute_confusion_matrix.py
index ca07685d8d..7e9da9851b 100644
--- a/tests/test_compute_confusion_matrix.py
+++ b/tests/test_compute_confusion_matrix.py
@@ -231,7 +231,7 @@ def test_compute_sample(self, input_data, expected_value):
         vals["y_pred"] = params.pop("y_pred")
         vals["y"] = params.pop("y")
         metric = ConfusionMatrixMetric(**params)
-        result, _ = metric.reduce(metric(**vals))
+        result, _ = metric.aggregate(metric(**vals))
         np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
 
     @parameterized.expand(TEST_CASES_COMPUTE_SAMPLE_MULTI_METRICS)
@@ -241,7 +241,7 @@ def test_compute_sample_multiple_metrics(self, input_data, expected_values):
         vals["y_pred"] = params.pop("y_pred")
         vals["y"] = params.pop("y")
         metric = ConfusionMatrixMetric(**params)
-        results = metric.reduce(metric(**vals))
+        results = metric.aggregate(metric(**vals))
         for idx in range(0, len(results), 2):
             result = results[idx]
             expected_value = expected_values[int(idx / 2)]
@@ -254,7 +254,7 @@ def test_compute_sample_with_nan(self, input_data, expected_value, expected_not_
         vals["y_pred"] = params.pop("y_pred")
         vals["y"] = params.pop("y")
         metric = ConfusionMatrixMetric(**params)
-        result, not_nans = metric.reduce(metric(**vals))
+        result, not_nans = metric.aggregate(metric(**vals))
         np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
         np.testing.assert_allclose(not_nans, expected_not_nans, atol=1e-4, rtol=1e-4)
 
@@ -267,7 +267,7 @@ def test_clf_with_nan(self, input_data, expected_value):
         metric = ConfusionMatrixMetric(**params)
         result = metric(**vals)
         np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
-        result, _ = metric.reduce(result)
+        result, _ = metric.aggregate(result)
         expected_value, _ = do_metric_reduction(expected_value, "mean_channel")
         expected_value = compute_confusion_matrix_metric("tpr", expected_value)
         np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4)
diff --git a/tests/test_compute_meandice.py b/tests/test_compute_meandice.py
index a32d7ef894..c6763f59d5 100644
--- a/tests/test_compute_meandice.py
+++ b/tests/test_compute_meandice.py
@@ -197,7 +197,7 @@ def test_value_class(self, input_data, expected_value):
         vals["y"] = input_data.pop("y")
         dice_metric = DiceMetric(**input_data, reduction="none")
         result = dice_metric(**vals)
-        result, _ = dice_metric.reduce(result)
+        result, _ = dice_metric.aggregate(result)
         np.testing.assert_allclose(result.cpu().numpy(), expected_value, atol=1e-4)
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8])
@@ -205,7 +205,7 @@ def test_nans_class(self, params, input_data, expected_value):
 
         dice_metric = DiceMetric(**params)
         result = dice_metric(**input_data)
-        result, _ = dice_metric.reduce(result)
+        result, _ = dice_metric.aggregate(result)
         np.testing.assert_allclose(result.cpu().numpy(), expected_value, atol=1e-4)
 
 
diff --git a/tests/test_compute_regression_metrics.py b/tests/test_compute_regression_metrics.py
index b10633ac17..2b9157444a 100644
--- a/tests/test_compute_regression_metrics.py
+++ b/tests/test_compute_regression_metrics.py
@@ -66,19 +66,19 @@ def test_shape_reduction(self):
                     # iterate over regression metrics, check shape for diff. reduction func
                     for mt_fn in metrics:
                         mt = mt_fn(reduction="mean")
-                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
+                        out_tensor, _ = mt.aggregate(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 1)
 
                         mt = mt_fn(reduction="sum")
-                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
+                        out_tensor, _ = mt.aggregate(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 0)
 
                         mt = mt_fn(reduction="mean_channel")
-                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
+                        out_tensor, _ = mt.aggregate(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 1 and out_tensor.shape[0] == batch)
 
                         mt = mt_fn(reduction="sum_channel")
-                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
+                        out_tensor, _ = mt.aggregate(mt(in_tensor, in_tensor))
                         self.assertTrue(len(out_tensor.shape) == 1 and out_tensor.shape[0] == batch)
 
     def test_compare_numpy(self):
@@ -106,7 +106,7 @@ def test_compare_numpy(self):
                     # check metrics
                     for mt_fn, mt_fn_np in zip(metrics, metrics_np):
                         mt = mt_fn(reduction="mean")
-                        out_tensor, _ = mt.reduce(mt(y_pred=in_tensor_a, y=in_tensor_b))
+                        out_tensor, _ = mt.aggregate(mt(y_pred=in_tensor_a, y=in_tensor_b))
                         out_np = mt_fn_np(y_pred=in_tensor_a.cpu().numpy(), y=in_tensor_b.cpu().numpy())
 
                         np.testing.assert_allclose(out_tensor.cpu().numpy(), out_np, atol=1e-4)
@@ -154,7 +154,7 @@ def test_same_input(self):
                     # check metrics
                     for mt_fn, rs in zip(metrics, results):
                         mt = mt_fn(reduction="mean")
-                        out_tensor, _ = mt.reduce(mt(in_tensor, in_tensor))
+                        out_tensor, _ = mt.aggregate(mt(in_tensor, in_tensor))
                         np.testing.assert_allclose(out_tensor.cpu(), rs, atol=1e-4)
 
     def test_diff_input(self):
@@ -180,7 +180,7 @@ def test_diff_input(self):
                     # check metrics
                     for mt_fn, rs in zip(metrics, results):
                         mt = mt_fn(reduction="mean")
-                        out_tensor, _ = mt.reduce(mt(in_tensor_a, in_tensor_b))
+                        out_tensor, _ = mt.aggregate(mt(in_tensor_a, in_tensor_b))
                         np.testing.assert_allclose(out_tensor.cpu(), rs, atol=1e-4)
 
 
diff --git a/tests/test_compute_roc_auc.py b/tests/test_compute_roc_auc.py
index 3a91648e2a..acfcc022bb 100644
--- a/tests/test_compute_roc_auc.py
+++ b/tests/test_compute_roc_auc.py
@@ -95,7 +95,7 @@ def test_class_value(self, y_pred, y, softmax, to_onehot, average, expected_valu
         y_pred = Activations(softmax=softmax)(y_pred)
         y = AsDiscrete(to_onehot=to_onehot, n_classes=2)(y)
         metric = ROCAUCMetric(average=average)
-        result = metric.reduce(metric(y_pred=y_pred, y=y))
+        result = metric.aggregate(metric(y_pred=y_pred, y=y))
         np.testing.assert_allclose(expected_value, result, rtol=1e-5)
 
 
diff --git a/tests/test_hausdorff_distance.py b/tests/test_hausdorff_distance.py
index a19e928e7d..384ae82f1f 100644
--- a/tests/test_hausdorff_distance.py
+++ b/tests/test_hausdorff_distance.py
@@ -131,7 +131,7 @@ def test_value(self, input_data, expected_value):
                 batch, n_class = 2, 3
                 batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
                 batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
-                result, _ = hd_metric.reduce(hd_metric(batch_seg_1, batch_seg_2))
+                result, _ = hd_metric.aggregate(hd_metric(batch_seg_1, batch_seg_2))
                 expected_value_curr = expected_value[ct]
                 np.testing.assert_allclose(expected_value_curr, result, rtol=1e-7)
                 ct += 1
@@ -144,7 +144,7 @@ def test_nans(self, input_data):
         hd_metric = HausdorffDistanceMetric(include_background=False)
         batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0)
         batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0)
-        result, not_nans = hd_metric.reduce(hd_metric(batch_seg_1, batch_seg_2))
+        result, not_nans = hd_metric.aggregate(hd_metric(batch_seg_1, batch_seg_2))
         np.testing.assert_allclose(0, result, rtol=1e-7)
         np.testing.assert_allclose(0, not_nans, rtol=1e-7)
 
diff --git a/tests/test_integration_segmentation_3d.py b/tests/test_integration_segmentation_3d.py
index 67f1930678..8b15d3fc56 100644
--- a/tests/test_integration_segmentation_3d.py
+++ b/tests/test_integration_segmentation_3d.py
@@ -149,7 +149,7 @@ def run_training_test(root_dir, device="cuda:0", cachedataset=0):
                     val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device)
                     sw_batch_size, roi_size = 4, (96, 96, 96)
                     val_outputs = val_post_tran(sliding_window_inference(val_images, roi_size, sw_batch_size, model))
-                    value, not_nans = dice_metric.reduce(dice_metric(y_pred=val_outputs, y=val_labels))
+                    value, not_nans = dice_metric.aggregate(dice_metric(y_pred=val_outputs, y=val_labels))
                     metric_count += not_nans.item()
                     metric_sum += value.item() * not_nans.item()
                 metric = metric_sum / metric_count
@@ -218,7 +218,7 @@ def run_inference_test(root_dir, device="cuda:0"):
             # define sliding window size and batch size for windows inference
             sw_batch_size, roi_size = 4, (96, 96, 96)
             val_outputs = val_post_tran(sliding_window_inference(val_images, roi_size, sw_batch_size, model))
-            value, not_nans = dice_metric.reduce(dice_metric(y_pred=val_outputs, y=val_labels))
+            value, not_nans = dice_metric.aggregate(dice_metric(y_pred=val_outputs, y=val_labels))
             metric_count += not_nans.item()
             metric_sum += value.item() * not_nans.item()
             saver.save_batch(val_outputs, val_data["img_meta_dict"])
diff --git a/tests/test_surface_distance.py b/tests/test_surface_distance.py
index 53c56531a8..a80c06d463 100644
--- a/tests/test_surface_distance.py
+++ b/tests/test_surface_distance.py
@@ -136,7 +136,7 @@ def test_value(self, input_data, expected_value):
             batch, n_class = 2, 3
             batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
             batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1])
-            result, _ = sur_metric.reduce(sur_metric(batch_seg_1, batch_seg_2))
+            result, _ = sur_metric.aggregate(sur_metric(batch_seg_1, batch_seg_2))
             expected_value_curr = expected_value[ct]
             np.testing.assert_allclose(expected_value_curr, result, rtol=1e-7)
             ct += 1
@@ -150,7 +150,7 @@ def test_nans(self, input_data):
         # test list of channel-first Tensor
         batch_seg_1 = [seg_1.unsqueeze(0)]
         batch_seg_2 = [seg_2.unsqueeze(0)]
-        result, not_nans = sur_metric.reduce(sur_metric(batch_seg_1, batch_seg_2))
+        result, not_nans = sur_metric.aggregate(sur_metric(batch_seg_1, batch_seg_2))
         np.testing.assert_allclose(0, result, rtol=1e-7)
         np.testing.assert_allclose(0, not_nans, rtol=1e-7)
 

From 1bb00b06ea744178f04734b11f750e8815c200e3 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 22:57:23 +0800
Subject: [PATCH 20/22] [DLMED] add compute_list()

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/metric.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index a8bcc0f3bb..af9a538e57 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -44,11 +44,7 @@ def __call__(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):
         ret: TensorOrList
         if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
             # if y_pred or y is a list of channel-first data, add batch dim and compute metric
-            ret_: List[torch.Tensor]
-            if y is not None:
-                ret_ = [self._compute(p_.detach().unsqueeze(0), y_.detach().unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
-            else:
-                ret_ = [self._compute(p_.detach().unsqueeze(0), None) for p_ in y_pred]
+            ret_: List[torch.Tensor] = self._compute_list(y_pred, y)
             # concat the list of results
             if isinstance(ret_[0], torch.Tensor):
                 ret = torch.cat(ret_, dim=0)
@@ -66,6 +62,17 @@ def __call__(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):
 
         return ret
 
+    def _compute_list(self, y_pred: List[torch.Tensor], y: Optional[List[torch.Tensor]] = None):
+        """
+        Excute the computation for every item of a list.
+        Subclass may enhance the operation with multi-threads to accelerate.
+
+        """
+        if y is not None:
+            return [self._compute(p_.detach().unsqueeze(0), y_.detach().unsqueeze(0)) for p_, y_ in zip(y_pred, y)]
+        else:
+            return [self._compute(p_.detach().unsqueeze(0), None) for p_ in y_pred]
+
     @abstractmethod
     def _compute(self, y_pred: torch.Tensor, y: Optional[torch.Tensor] = None):
         """

From 1e10a5185777529ccb614f59fc5f71fd4cf21c97 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 23:12:13 +0800
Subject: [PATCH 21/22] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/metrics/metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index af9a538e57..a9c832bfa8 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 from abc import ABC, abstractmethod
-from typing import Any, List, Optional, Sequence, Union
+from typing import Any, List, Optional
 
 import torch
 

From 51cc9f118cb2c3300859e2b30485c7ffc2cc65be Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 3 Jun 2021 23:59:46 +0800
Subject: [PATCH 22/22] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 2 +-
 monai/metrics/metric.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 7b8619a4db..a0428d80c4 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -114,7 +114,7 @@ def compute(self) -> Any:
         return result.item() if isinstance(result, torch.Tensor) else result
 
     def _reduce(self, scores) -> Any:
-        return self.metric_fn.aggregate(_scores)
+        return self.metric_fn.aggregate(scores)
 
     def attach(self, engine: Engine, name: str) -> None:
         """
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index a9c832bfa8..00cd9f7b73 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -62,7 +62,7 @@ def __call__(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):
 
         return ret
 
-    def _compute_list(self, y_pred: List[torch.Tensor], y: Optional[List[torch.Tensor]] = None):
+    def _compute_list(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):
         """
         Excute the computation for every item of a list.
         Subclass may enhance the operation with multi-threads to accelerate.