diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py index 320f657537..780377c0f8 100644 --- a/monai/metrics/confusion_matrix.py +++ b/monai/metrics/confusion_matrix.py @@ -47,7 +47,7 @@ class ConfusionMatrixMetric(CumulativeIterationMetric): returned with the same order as input names when calling the class. compute_sample: when reducing, if ``True``, each sample's metric will be computed based on each confusion matrix first. if ``False``, compute reduction on the confusion matrices first, defaults to ``False``. - reduction: define the mode to reduce metrics, will only execute reduction on `not-nan` values, + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", will not do reduction. get_not_nans: whether to return the `not_nans` count, if True, aggregate() returns [(metric, not_nans), ...]. If False, @@ -102,10 +102,17 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor): # type: ignor return get_confusion_matrix(y_pred=y_pred, y=y, include_background=self.include_background) - def aggregate(self): + def aggregate(self, compute_sample: bool = False, reduction: Union[MetricReduction, str, None] = None): # type: ignore """ Execute reduction for the confusion matrix values. + Args: + compute_sample: when reducing, if ``True``, each sample's metric will be computed based on each confusion matrix first. + if ``False``, compute reduction on the confusion matrices first, defaults to ``False``. + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, + available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, + ``"mean_channel"``, ``"sum_channel"``}, default to `self.reduction`. if "none", will not do reduction. + """ data = self.get_buffer() if not isinstance(data, torch.Tensor): @@ -113,11 +120,11 @@ def aggregate(self): results = [] for metric_name in self.metric_name: - if self.compute_sample: + if compute_sample or self.compute_sample: sub_confusion_matrix = compute_confusion_matrix_metric(metric_name, data) - f, not_nans = do_metric_reduction(sub_confusion_matrix, self.reduction) + f, not_nans = do_metric_reduction(sub_confusion_matrix, reduction or self.reduction) else: - f, not_nans = do_metric_reduction(data, self.reduction) + f, not_nans = do_metric_reduction(data, reduction or self.reduction) f = compute_confusion_matrix_metric(metric_name, f) if self.get_not_nans: results.append((f, not_nans)) diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py index 5ce739d1f4..ab4ed0f821 100644 --- a/monai/metrics/hausdorff_distance.py +++ b/monai/metrics/hausdorff_distance.py @@ -42,7 +42,7 @@ class HausdorffDistanceMetric(CumulativeIterationMetric): percentile of the Hausdorff Distance rather than the maximum result will be achieved. Defaults to ``None``. directed: whether to calculate directed Hausdorff distance. Defaults to ``False``. - reduction: define the mode to reduce metrics, will only execute reduction on `not-nan` values, + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", will not do reduction. get_not_nans: whether to return the `not_nans` count, if True, aggregate() returns (metric, not_nans). @@ -99,17 +99,22 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor): # type: ignor directed=self.directed, ) - def aggregate(self): + def aggregate(self, reduction: Union[MetricReduction, str, None] = None): # type: ignore """ Execute reduction logic for the output of `compute_hausdorff_distance`. + Args: + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, + available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, + ``"mean_channel"``, ``"sum_channel"``}, default to `self.reduction`. if "none", will not do reduction. + """ data = self.get_buffer() if not isinstance(data, torch.Tensor): raise ValueError("the data to aggregate must be PyTorch Tensor.") # do metric reduction - f, not_nans = do_metric_reduction(data, self.reduction) + f, not_nans = do_metric_reduction(data, reduction or self.reduction) return (f, not_nans) if self.get_not_nans else f diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py index 4179420804..aabb3b42a0 100644 --- a/monai/metrics/meandice.py +++ b/monai/metrics/meandice.py @@ -35,7 +35,7 @@ class DiceMetric(CumulativeIterationMetric): Args: include_background: whether to skip Dice computation on the first channel of the predicted output. Defaults to ``True``. - reduction: define the mode to reduce metrics, will only execute reduction on `not-nan` values, + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", will not do reduction. get_not_nans: whether to return the `not_nans` count, if True, aggregate() returns (metric, not_nans). @@ -79,17 +79,22 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor): # type: ignor # compute dice (BxC) for each channel for each batch return compute_meandice(y_pred=y_pred, y=y, include_background=self.include_background) - def aggregate(self): + def aggregate(self, reduction: Union[MetricReduction, str, None] = None): # type: ignore """ Execute reduction logic for the output of `compute_meandice`. + Args: + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, + available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, + ``"mean_channel"``, ``"sum_channel"``}, default to `self.reduction`. if "none", will not do reduction. + """ data = self.get_buffer() if not isinstance(data, torch.Tensor): raise ValueError("the data to aggregate must be PyTorch Tensor.") # do metric reduction - f, not_nans = do_metric_reduction(data, self.reduction) + f, not_nans = do_metric_reduction(data, reduction or self.reduction) return (f, not_nans) if self.get_not_nans else f diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py index 7782c4c468..fa8b3354de 100644 --- a/monai/metrics/metric.py +++ b/monai/metrics/metric.py @@ -274,7 +274,10 @@ def get_buffer(self): """ self._sync() - return self._synced_tensors[0] if len(self._synced_tensors) == 1 else self._synced_tensors + if self._synced_tensors is None: + return self._synced_tensors + buffers = [x.detach().clone() if isinstance(x, torch.Tensor) else x for x in self._synced_tensors] + return buffers[0] if len(buffers) == 1 else buffers class CumulativeIterationMetric(Cumulative, IterationMetric): diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py index d5733eee97..62f5fa939e 100644 --- a/monai/metrics/regression.py +++ b/monai/metrics/regression.py @@ -30,7 +30,7 @@ class RegressionMetric(CumulativeIterationMetric): `y_preds` and `y` can be a list of channel-first Tensor (CHW[D]) or a batch-first Tensor (BCHW[D]). Args: - reduction: define the mode to reduce metrics, will only execute reduction on `not-nan` values, + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", will not do reduction. get_not_nans: whether to return the `not_nans` count, if True, aggregate() returns (metric, not_nans). @@ -45,12 +45,18 @@ def __init__( self.reduction = reduction self.get_not_nans = get_not_nans - def aggregate(self): + def aggregate(self, reduction: Union[MetricReduction, str, None] = None): # type: ignore + """ + Args: + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, + available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, + ``"mean_channel"``, ``"sum_channel"``}, default to `self.reduction`. if "none", will not do reduction. + """ data = self.get_buffer() if not isinstance(data, torch.Tensor): raise ValueError("the data to aggregate must be PyTorch Tensor.") - f, not_nans = do_metric_reduction(data, self.reduction) + f, not_nans = do_metric_reduction(data, reduction or self.reduction) return (f, not_nans) if self.get_not_nans else f def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None: diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py index 221fc50272..bd3cdb1203 100644 --- a/monai/metrics/rocauc.py +++ b/monai/metrics/rocauc.py @@ -49,10 +49,14 @@ def __init__(self, average: Union[Average, str] = Average.MACRO) -> None: def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor): # type: ignore return y_pred, y - def aggregate(self): + def aggregate(self, average: Union[Average, str, None] = None): # type: ignore """ - As AUC metric needs to execute on the overall data, so usually users accumulate `y_pred` and `y` - of every iteration, then execute real computation and reduction on the accumulated data. + Typically `y_pred` and `y` are stored in the cumulative buffers at each iteration, + This function reads the buffers and computes the area under the ROC. + + Args: + average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``} + Type of averaging performed if not binary classification. Defaults to `self.average`. """ y_pred, y = self.get_buffer() @@ -60,7 +64,7 @@ def aggregate(self): if not isinstance(y_pred, torch.Tensor) or not isinstance(y, torch.Tensor): raise ValueError("y_pred and y must be PyTorch Tensor.") - return compute_roc_auc(y_pred=y_pred, y=y, average=self.average) + return compute_roc_auc(y_pred=y_pred, y=y, average=average or self.average) def _calculate(y_pred: torch.Tensor, y: torch.Tensor) -> float: diff --git a/monai/metrics/surface_dice.py b/monai/metrics/surface_dice.py index 5630af178d..f964b0472a 100644 --- a/monai/metrics/surface_dice.py +++ b/monai/metrics/surface_dice.py @@ -39,12 +39,9 @@ class SurfaceDiceMetric(CumulativeIterationMetric): distance_metric: The metric used to compute surface distances. One of [``"euclidean"``, ``"chessboard"``, ``"taxicab"``]. Defaults to ``"euclidean"``. - reduction: The mode to aggregate metrics. - One of [``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, ``"mean_channel"``, ``"sum_channel"``, - ``"none"``]. - Defaults to ``"mean"``. - If ``"none"`` is chosen, no aggregation will be performed. - The aggregation will ignore nan values. + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, + available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, + ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", will not do reduction. get_not_nans: whether to return the `not_nans` count. Defaults to ``False``. `not_nans` is the number of batch samples for which not all class-specific NSD values were nan values. @@ -87,10 +84,15 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor): # type: ignor distance_metric=self.distance_metric, ) - def aggregate(self): + def aggregate(self, reduction: Union[MetricReduction, str, None] = None): # type: ignore r""" Aggregates the output of `_compute_tensor`. + Args: + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, + available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, + ``"mean_channel"``, ``"sum_channel"``}, default to `self.reduction`. if "none", will not do reduction. + Returns: If `get_not_nans` is set to ``True``, this function returns the aggregated NSD and the `not_nans` count. If `get_not_nans` is set to ``False``, this function returns only the aggregated NSD. @@ -100,7 +102,7 @@ def aggregate(self): raise ValueError("the data to aggregate must be PyTorch Tensor.") # do metric reduction - f, not_nans = do_metric_reduction(data, self.reduction) + f, not_nans = do_metric_reduction(data, reduction or self.reduction) return (f, not_nans) if self.get_not_nans else f diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py index 2c84bb9e7c..65651e3e51 100644 --- a/monai/metrics/surface_distance.py +++ b/monai/metrics/surface_distance.py @@ -37,7 +37,7 @@ class SurfaceDistanceMetric(CumulativeIterationMetric): `seg_pred` and `seg_gt`. Defaults to ``False``. distance_metric: : [``"euclidean"``, ``"chessboard"``, ``"taxicab"``] the metric used to compute surface distance. Defaults to ``"euclidean"``. - reduction: define the mode to reduce metrics, will only execute reduction on `not-nan` values, + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", will not do reduction. get_not_nans: whether to return the `not_nans` count, if True, aggregate() returns (metric, not_nans). @@ -91,17 +91,22 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor): # type: ignor distance_metric=self.distance_metric, ) - def aggregate(self): + def aggregate(self, reduction: Union[MetricReduction, str, None] = None): # type: ignore """ Execute reduction logic for the output of `compute_average_surface_distance`. + Args: + reduction: define mode of reduction to the metrics, will only apply reduction on `not-nan` values, + available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, + ``"mean_channel"``, ``"sum_channel"``}, default to `self.reduction`. if "none", will not do reduction. + """ data = self.get_buffer() if not isinstance(data, torch.Tensor): raise ValueError("the data to aggregate must be PyTorch Tensor.") # do metric reduction - f, not_nans = do_metric_reduction(data, self.reduction) + f, not_nans = do_metric_reduction(data, reduction or self.reduction) return (f, not_nans) if self.get_not_nans else f diff --git a/monai/metrics/utils.py b/monai/metrics/utils.py index fc42100d6f..3e3a29d468 100644 --- a/monai/metrics/utils.py +++ b/monai/metrics/utils.py @@ -50,11 +50,10 @@ def do_metric_reduction(f: torch.Tensor, reduction: Union[MetricReduction, str] Args: f: a tensor that contains the calculated metric scores per batch and per class. The first two dims should be batch and class. - reduction: define the mode to reduce metrics, will only execute reduction on `not-nan` values, + reduction: define the mode to reduce metrics, will only apply reduction on `not-nan` values, available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``, ``"mean_channel"``, ``"sum_channel"``}, default to ``"mean"``. if "none", return the input f tensor and not_nans. - Define the mode to reduce computation result of 1 batch data. Defaults to ``"mean"``. Raises: ValueError: When ``reduction`` is not one of diff --git a/monai/utils/enums.py b/monai/utils/enums.py index 1bfbdf824b..cbb2f053a5 100644 --- a/monai/utils/enums.py +++ b/monai/utils/enums.py @@ -141,7 +141,7 @@ class Average(Enum): class MetricReduction(Enum): """ - See also: :py:class:`monai.metrics.meandice.DiceMetric` + See also: :py:func:`monai.metrics.utils.do_metric_reduction` """ NONE = "none" diff --git a/tests/test_compute_confusion_matrix.py b/tests/test_compute_confusion_matrix.py index 1212715548..0e38357d12 100644 --- a/tests/test_compute_confusion_matrix.py +++ b/tests/test_compute_confusion_matrix.py @@ -262,7 +262,7 @@ def test_clf_with_nan(self, input_data, expected_value): metric = ConfusionMatrixMetric(**params) result = metric(**vals) np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4) - result, _ = metric.aggregate()[0] + result, _ = metric.aggregate(reduction="mean_channel")[0] expected_value, _ = do_metric_reduction(expected_value, "mean_channel") expected_value = compute_confusion_matrix_metric("tpr", expected_value) np.testing.assert_allclose(result, expected_value, atol=1e-4, rtol=1e-4) diff --git a/tests/test_compute_meandice.py b/tests/test_compute_meandice.py index ad66ed672a..c4daf7c5a9 100644 --- a/tests/test_compute_meandice.py +++ b/tests/test_compute_meandice.py @@ -192,9 +192,9 @@ def test_value_class(self, input_data, expected_value): vals = {} vals["y_pred"] = input_data.pop("y_pred") vals["y"] = input_data.pop("y") - dice_metric = DiceMetric(**input_data, reduction="none") + dice_metric = DiceMetric(**input_data) dice_metric(**vals) - result = dice_metric.aggregate() + result = dice_metric.aggregate(reduction="none") np.testing.assert_allclose(result.cpu().numpy(), expected_value, atol=1e-4) @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8]) diff --git a/tests/test_compute_regression_metrics.py b/tests/test_compute_regression_metrics.py index 65ca73a4ec..cab1184812 100644 --- a/tests/test_compute_regression_metrics.py +++ b/tests/test_compute_regression_metrics.py @@ -75,9 +75,9 @@ def test_shape_reduction(self): out_tensor = mt.aggregate() self.assertTrue(len(out_tensor.shape) == 0) - mt = mt_fn(reduction="mean_channel") + mt = mt_fn(reduction="sum") # test reduction arg overriding mt(in_tensor, in_tensor) - out_tensor = mt.aggregate() + out_tensor = mt.aggregate(reduction="mean_channel") self.assertTrue(len(out_tensor.shape) == 1 and out_tensor.shape[0] == batch) mt = mt_fn(reduction="sum_channel") @@ -109,9 +109,9 @@ def test_compare_numpy(self): # check metrics for mt_fn, mt_fn_np in zip(metrics, metrics_np): - mt = mt_fn(reduction="mean") + mt = mt_fn() mt(y_pred=in_tensor_a, y=in_tensor_b) - out_tensor = mt.aggregate() + out_tensor = mt.aggregate(reduction="mean") out_np = mt_fn_np(y_pred=in_tensor_a.cpu().numpy(), y=in_tensor_b.cpu().numpy()) np.testing.assert_allclose(out_tensor.cpu().numpy(), out_np, atol=1e-4) diff --git a/tests/test_compute_roc_auc.py b/tests/test_compute_roc_auc.py index 887db08c7c..2c9135024f 100644 --- a/tests/test_compute_roc_auc.py +++ b/tests/test_compute_roc_auc.py @@ -141,6 +141,8 @@ def test_class_value(self, y_pred, y, softmax, to_onehot, average, expected_valu metric = ROCAUCMetric(average=average) metric(y_pred=y_pred, y=y) result = metric.aggregate() + np.testing.assert_allclose(expected_value, result, rtol=1e-5) + result = metric.aggregate(average=average) # test optional argument metric.reset() np.testing.assert_allclose(expected_value, result, rtol=1e-5) diff --git a/tests/test_cumulative.py b/tests/test_cumulative.py index 12a6a5e5e7..16f5c1d1f5 100644 --- a/tests/test_cumulative.py +++ b/tests/test_cumulative.py @@ -35,6 +35,8 @@ def test_multi(self): c.append() c.extend() self.assertEqual(c.get_buffer(), []) + c.get_buffer().append(1) + self.assertEqual(c.get_buffer(), []) # no in-place update for the buffer c.reset() diff --git a/tests/test_hausdorff_distance.py b/tests/test_hausdorff_distance.py index 79a2c84b37..44c011fe13 100644 --- a/tests/test_hausdorff_distance.py +++ b/tests/test_hausdorff_distance.py @@ -127,7 +127,7 @@ def test_value(self, input_data, expected_value): batch_seg_1 = seg_1.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1]) batch_seg_2 = seg_2.unsqueeze(0).unsqueeze(0).repeat([batch, n_class, 1, 1, 1]) hd_metric(batch_seg_1, batch_seg_2) - result = hd_metric.aggregate() + result = hd_metric.aggregate(reduction="mean") expected_value_curr = expected_value[ct] np.testing.assert_allclose(expected_value_curr, result, rtol=1e-7) ct += 1 diff --git a/tests/test_surface_dice.py b/tests/test_surface_dice.py index 5252adafce..ccc6242e1e 100644 --- a/tests/test_surface_dice.py +++ b/tests/test_surface_dice.py @@ -274,7 +274,7 @@ def test_not_predicted_not_present(self): np.testing.assert_array_equal(res_classes, [[0, 0, np.nan]]) # test aggregation - res_bgr = sur_metric_bgr.aggregate() + res_bgr = sur_metric_bgr.aggregate(reduction="mean") np.testing.assert_equal(res_bgr, torch.tensor([1 / 3], dtype=torch.float64)) res = sur_metric.aggregate() np.testing.assert_equal(res, torch.tensor([0], dtype=torch.float64)) diff --git a/tests/test_surface_distance.py b/tests/test_surface_distance.py index edfe9e8663..4cd70b43aa 100644 --- a/tests/test_surface_distance.py +++ b/tests/test_surface_distance.py @@ -134,7 +134,7 @@ def test_nans(self, input_data): batch_seg_1 = [seg_1.unsqueeze(0)] batch_seg_2 = [seg_2.unsqueeze(0)] sur_metric(batch_seg_1, batch_seg_2) - result, not_nans = sur_metric.aggregate() + result, not_nans = sur_metric.aggregate(reduction="mean") np.testing.assert_allclose(0, result, rtol=1e-5) np.testing.assert_allclose(0, not_nans, rtol=1e-5)