From dd5dd5a0a740337e6444dfdbcc070516dc04659e Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Thu, 26 May 2022 21:18:22 -0400
Subject: [PATCH 01/18] add box -- mask converter

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/box_ops.py | 105 +++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index ef8d248c02..dcc38401ff 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -14,6 +14,7 @@
 
 import torch
 
+from monai.utils import look_up_option
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims
 from monai.transforms.utils import create_scale
@@ -186,3 +187,107 @@ def flip_boxes(
         flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE
 
     return flip_boxes
+
+def convert_boxes_to_mask(
+    boxes: NdarrayOrTensor, 
+    spatial_size: Union[Sequence[int], int],
+    labels: Union[Sequence[int], torch.Tensor, np.ndarray],
+    bg_label: int = -1, 
+    ellipse_mask: bool = False
+    ) -> NdarrayOrTensor:
+    """
+    Convert box to int8 image, which has the same size with the input image,
+    Each channel represents one box. The box region will have intensity of labels, the background intensity is bg_label
+    Box mask may take a lot of memory, so we generate box mask as numpy array
+
+    Args:
+        bg_label: background labels for the output box image, just in case one of the fg labels is 0
+    """
+    spatial_dims: int = get_spatial_dims(boxes=boxes)
+    spatial_size = ensure_tuple_rep(spatial_size, spatial_dims)
+
+    labels = box_utils.convert_to_list(labels)
+    # if no box, return empty mask
+    if len(labels)==0:
+        return np.ones([1]+spatial_size,dtype=np.int8)*np.int8(bg_label)
+    
+    if bg_label >= min(labels):
+        raise ValueError(f"bg_label should be smaller than any foreground box labels. min(box_labels)={min(labels)}, while bg_label={bg_label}")
+
+    if len(labels) != boxes.shape[0]:
+        raise ValueError("Number of labels should equal to number of boxes.")
+    
+    boxes_mask = np.ones([len(labels)]+spatial_size,dtype=np.int8)*np.int8(bg_label)
+    boxes,_,_ = convert_data_type(boxes,dtype=np.int16)
+    for b in range(boxes.shape[0]):
+        # draw a circle/ball mask
+        box_size = [boxes[b,axis+spatial_dims].item()-boxes[b,axis].item() for axis in range(spatial_dims)]
+        if self.ellipse_mask:
+            max_box_size = max(box_size)
+            radius = max_box_size/2.0
+            center = (max_box_size-1)/2.0
+            boxes_only_mask = np.ones([max_box_size]*spatial_dims,dtype=np.int8)*np.int8(bg_label) # a square/cube mask
+            if spatial_dims == 2:
+                Y, X = np.ogrid[:max_box_size, :max_box_size]
+                dist_from_center = (X-center)**2 + (Y-center)**2
+            elif spatial_dims == 3:
+                Y, X, Z = np.ogrid[:max_box_size, :max_box_size, :max_box_size]
+                dist_from_center = (X-center)**2 + (Y-center)**2+ (Z-center)**2
+            boxes_only_mask[dist_from_center <= radius**2] = np.int8(labels[b])
+
+            # squeeze it to a ellipse/ellipsoid
+            zoom_factor = [box_size[axis]/float(max_box_size) for axis in range(spatial_dims)]
+            boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask,zoom=zoom_factor,mode='nearest',prefilter=False)
+        else:
+            boxes_only_mask = np.ones(box_size,dtype=np.int8)*np.int8(labels[b]) 
+
+        # apply to global mask
+        if spatial_dims == 2:
+            boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims] ] = boxes_only_mask
+        if spatial_dims == 3:
+            boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims], boxes[b,2]:boxes[b,2+spatial_dims] ] = boxes_only_mask
+
+    # if isinstance(boxes, torch.Tensor):
+    #     boxes_mask = torch.from_numpy(boxes_mask)
+
+    return boxes_mask
+
+def convert_mask_to_boxes(boxes_mask: NdarrayOrTensor, bg_label: int = -1) -> NdarrayOrTensor:
+    """
+    Convert binary mask image to box, which has the same size with the input image
+
+    Args:
+        boxes_mask: sized (num_box, H, W) or (num_box, H, W, D)
+        bg_label: background labels for the boxes_mask
+
+    Return:
+        - boxes
+        - labelss
+    """
+    look_up_option(len(boxes_mask.shape), [3,4])
+    spatial_size = list(boxes_mask.shape[1:])
+    spatial_dims = box_utils.get_spatial_dims(spatial_size=spatial_size)
+
+    if isinstance(boxes_mask, torch.Tensor):
+        boxes_mask = boxes_mask.cpu().detach().numpy()
+    
+    boxes = []
+    labels = []
+    for b in range(boxes_mask.shape[0]):
+        fg_indices = np.nonzero(boxes_mask[b,...]-bg_label)
+        if fg_indices[0].shape[0] == 0:
+            continue
+        boxes_b = []
+        for fd_i in fg_indices:
+            boxes_b.append(min(fd_i)) # top left corner
+        for fd_i in fg_indices:
+            boxes_b.append(max(fd_i)+1) # bottom right corner
+        if spatial_dims == 2:
+            labels.append(boxes_mask[b,boxes_b[0],boxes_b[1]])
+        if spatial_dims == 3:
+            labels.append(boxes_mask[b,boxes_b[0],boxes_b[1],boxes_b[2]])
+        boxes.append(boxes_b)
+
+    if len(boxes) == 0:
+        return np.zeros([0,2*spatial_dims]), np.zeros([0])
+    return np.asarray(boxes),np.asarray(labels)

From c642be0b56350840666c1fadd2a6d9ece24b3a7c Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:05:27 -0400
Subject: [PATCH 02/18] add box mask transform

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/array.py      |  73 +++++++-
 monai/apps/detection/transforms/box_ops.py    | 167 +++++++++++-------
 monai/apps/detection/transforms/dictionary.py | 131 +++++++++++++-
 tests/test_box_transform.py                   |  44 ++++-
 4 files changed, 340 insertions(+), 75 deletions(-)

diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py
index b2587a213a..49ac70ea54 100644
--- a/monai/apps/detection/transforms/array.py
+++ b/monai/apps/detection/transforms/array.py
@@ -32,7 +32,14 @@
 from monai.utils.enums import TransformBackends
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
-from .box_ops import apply_affine_to_boxes, flip_boxes, resize_boxes, zoom_boxes
+from .box_ops import (
+    apply_affine_to_boxes,
+    convert_boxes_to_masks,
+    convert_masks_to_boxes,
+    flip_boxes,
+    resize_boxes,
+    zoom_boxes,
+)
 
 __all__ = [
     "ConvertBoxToStandardMode",
@@ -42,6 +49,7 @@
     "ResizeBox",
     "FlipBox",
     "ClipBoxToImage",
+    "BoxToBoxMask",
 ]
 
 
@@ -373,3 +381,66 @@ def __call__(  # type: ignore
             labels_t = deepcopy(labels_t[keep_t, ...])
             labels_clip_list.append(convert_to_dst_type(src=labels_t, dst=labels_tuple[i])[0])
         return boxes_clip, tuple(labels_clip_list)
+
+
+class BoxToBoxMask(Transform):
+    """
+    Convert box to int16 mask image, which has the same size with the input image.
+
+    Args:
+        bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels.
+        ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D).
+    """
+    backend = [TransformBackends.NUMPY]
+
+    def __init__(self, bg_label: int = -1, ellipse_mask: bool = False) -> None:
+        self.bg_label = bg_label
+        self.ellipse_mask = ellipse_mask
+
+    def __call__(  # type: ignore
+        self, boxes: NdarrayOrTensor, labels: NdarrayOrTensor, spatial_size: Union[Sequence[int], int]
+    ) -> NdarrayOrTensor:
+        """
+        Args:
+            boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
+            labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,).
+            spatial_size: image spatial size.
+
+        Return:
+            - int16 array, sized (num_box, H, W). Each channel represents a box.
+                The foreground region in channel c has intensity of labels[c].
+                The background intensity is bg_label.
+        """
+        return convert_boxes_to_masks(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask)
+
+
+class BoxMaskToBox(Transform):
+    """
+    Convert int16 mask image to box, which has the same size with the input image.
+    Pairs with :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`.
+    Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
+
+    Args:
+        bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels.
+        box_dtype: output dtype for boxes
+        label_dtype: output dtype for labels
+    """
+    backend = [TransformBackends.NUMPY]
+
+    def __init__(self, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long) -> None:
+        self.bg_label = bg_label
+        self.box_dtype = box_dtype
+        self.label_dtype = label_dtype
+
+    def __call__(self, boxes_mask: NdarrayOrTensor) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
+        """
+        Args:
+            boxes_mask: int16 array, sized (num_box, H, W). Each channel represents a box.
+                The foreground region in channel c has intensity of labels[c].
+                The background intensity is bg_label.
+
+        Return:
+            - bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
+            - classification foreground(fg) labels, dtype should be int, sized (N,).
+        """
+        return convert_masks_to_boxes(boxes_mask, self.bg_label, self.box_dtype, self.label_dtype)
diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index dcc38401ff..37728ef07e 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -10,14 +10,16 @@
 # limitations under the License.
 
 from copy import deepcopy
-from typing import Optional, Sequence, Union
+from typing import Optional, Sequence, Tuple, Union
 
+import numpy as np
 import torch
+from scipy.ndimage import zoom as scipy_zoom
 
-from monai.utils import look_up_option
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims
 from monai.transforms.utils import create_scale
+from monai.utils import look_up_option
 from monai.utils.misc import ensure_tuple, ensure_tuple_rep
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
@@ -188,106 +190,139 @@ def flip_boxes(
 
     return flip_boxes
 
-def convert_boxes_to_mask(
-    boxes: NdarrayOrTensor, 
+
+def convert_boxes_to_masks(
+    boxes: NdarrayOrTensor,
+    labels: NdarrayOrTensor,
     spatial_size: Union[Sequence[int], int],
-    labels: Union[Sequence[int], torch.Tensor, np.ndarray],
-    bg_label: int = -1, 
-    ellipse_mask: bool = False
-    ) -> NdarrayOrTensor:
+    bg_label: int = -1,
+    ellipse_mask: bool = False,
+) -> NdarrayOrTensor:
     """
-    Convert box to int8 image, which has the same size with the input image,
-    Each channel represents one box. The box region will have intensity of labels, the background intensity is bg_label
-    Box mask may take a lot of memory, so we generate box mask as numpy array
+    Convert box to int16 mask image, which has the same size with the input image.
 
     Args:
-        bg_label: background labels for the output box image, just in case one of the fg labels is 0
+        boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
+        labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,).
+        spatial_size: image spatial size.
+        bg_label: background labels for the output mask image, make sure it is smaller than any fg labels.
+        ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D).
+
+    Return:
+        - int16 array, sized (num_box, H, W). Each channel represents a box.
+            The foreground region in channel c has intensity of labels[c].
+            The background intensity is bg_label.
     """
     spatial_dims: int = get_spatial_dims(boxes=boxes)
     spatial_size = ensure_tuple_rep(spatial_size, spatial_dims)
 
-    labels = box_utils.convert_to_list(labels)
     # if no box, return empty mask
-    if len(labels)==0:
-        return np.ones([1]+spatial_size,dtype=np.int8)*np.int8(bg_label)
-    
+    if len(labels) == 0:
+        boxes_mask_np = np.ones((1,) + spatial_size, dtype=np.int16) * np.int16(bg_label)
+        boxes_mask, *_ = convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)
+        return boxes_mask
+
+    # bg_label should be smaller than labels
     if bg_label >= min(labels):
-        raise ValueError(f"bg_label should be smaller than any foreground box labels. min(box_labels)={min(labels)}, while bg_label={bg_label}")
+        raise ValueError(
+            f"bg_label should be smaller than any foreground box labels. \
+min(labels)={min(labels)}, while bg_label={bg_label}"
+        )
 
-    if len(labels) != boxes.shape[0]:
+    if labels.shape[0] != boxes.shape[0]:
         raise ValueError("Number of labels should equal to number of boxes.")
-    
-    boxes_mask = np.ones([len(labels)]+spatial_size,dtype=np.int8)*np.int8(bg_label)
-    boxes,_,_ = convert_data_type(boxes,dtype=np.int16)
-    for b in range(boxes.shape[0]):
-        # draw a circle/ball mask
-        box_size = [boxes[b,axis+spatial_dims].item()-boxes[b,axis].item() for axis in range(spatial_dims)]
-        if self.ellipse_mask:
+
+    # allocate memory for boxes_mask_np
+    boxes_mask_np = np.ones((labels.shape[0],) + spatial_size, dtype=np.int16) * np.int16(bg_label)
+
+    boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray)[0]
+    boxes_np = np.round(boxes_np).astype(np.int32)
+    labels_np, *_ = convert_to_dst_type(src=labels, dst=boxes_np)
+    for b in range(boxes_np.shape[0]):
+        # generate a foreground mask
+        box_size = [boxes_np[b, axis + spatial_dims] - boxes_np[b, axis] for axis in range(spatial_dims)]
+        if ellipse_mask:
+            # initialize a square/cube mask
             max_box_size = max(box_size)
-            radius = max_box_size/2.0
-            center = (max_box_size-1)/2.0
-            boxes_only_mask = np.ones([max_box_size]*spatial_dims,dtype=np.int8)*np.int8(bg_label) # a square/cube mask
+            radius = max_box_size / 2.0
+            center = (max_box_size - 1) / 2.0
+            boxes_only_mask = np.ones([max_box_size] * spatial_dims, dtype=np.int16) * np.int16(bg_label)
+            # apply label intensity to circle/ball foreground
             if spatial_dims == 2:
-                Y, X = np.ogrid[:max_box_size, :max_box_size]
-                dist_from_center = (X-center)**2 + (Y-center)**2
+                grid_y, grid_x = np.ogrid[:max_box_size, :max_box_size]
+                dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2
             elif spatial_dims == 3:
-                Y, X, Z = np.ogrid[:max_box_size, :max_box_size, :max_box_size]
-                dist_from_center = (X-center)**2 + (Y-center)**2+ (Z-center)**2
-            boxes_only_mask[dist_from_center <= radius**2] = np.int8(labels[b])
-
-            # squeeze it to a ellipse/ellipsoid
-            zoom_factor = [box_size[axis]/float(max_box_size) for axis in range(spatial_dims)]
-            boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask,zoom=zoom_factor,mode='nearest',prefilter=False)
+                grid_y, grid_x, grid_z = np.ogrid[:max_box_size, :max_box_size, :max_box_size]
+                dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2 + (grid_z - center) ** 2
+            boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b])
+            # squeeze it to a ellipse/ellipsoid mask
+            zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)]
+            boxes_only_mask = scipy_zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False)
         else:
-            boxes_only_mask = np.ones(box_size,dtype=np.int8)*np.int8(labels[b]) 
+            # generate a rect mask
+            boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b])
 
         # apply to global mask
         if spatial_dims == 2:
-            boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims] ] = boxes_only_mask
+            boxes_mask_np[
+                b, boxes_np[b, 0] : boxes_np[b, spatial_dims], boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims]
+            ] = boxes_only_mask
         if spatial_dims == 3:
-            boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims], boxes[b,2]:boxes[b,2+spatial_dims] ] = boxes_only_mask
+            boxes_mask_np[
+                b,
+                boxes_np[b, 0] : boxes_np[b, spatial_dims],
+                boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims],
+                boxes_np[b, 2] : boxes_np[b, 2 + spatial_dims],
+            ] = boxes_only_mask
 
-    # if isinstance(boxes, torch.Tensor):
-    #     boxes_mask = torch.from_numpy(boxes_mask)
+    return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0]
 
-    return boxes_mask
 
-def convert_mask_to_boxes(boxes_mask: NdarrayOrTensor, bg_label: int = -1) -> NdarrayOrTensor:
+def convert_masks_to_boxes(
+    boxes_mask: NdarrayOrTensor, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long
+) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
     """
-    Convert binary mask image to box, which has the same size with the input image
+    Convert int16 mask image to box, which has the same size with the input image
 
     Args:
-        boxes_mask: sized (num_box, H, W) or (num_box, H, W, D)
+        boxes_mask: int16 array, sized (num_box, H, W). Each channel represents a box.
+            The foreground region in channel c has intensity of labels[c].
+            The background intensity is bg_label.
         bg_label: background labels for the boxes_mask
+        box_dtype: output dtype for boxes
+        label_dtype: output dtype for labels
 
     Return:
-        - boxes
-        - labelss
+        - bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
+        - classification foreground(fg) labels, dtype should be int, sized (N,).
     """
-    look_up_option(len(boxes_mask.shape), [3,4])
+    look_up_option(len(boxes_mask.shape), [3, 4])
     spatial_size = list(boxes_mask.shape[1:])
-    spatial_dims = box_utils.get_spatial_dims(spatial_size=spatial_size)
-
-    if isinstance(boxes_mask, torch.Tensor):
-        boxes_mask = boxes_mask.cpu().detach().numpy()
-    
-    boxes = []
-    labels = []
-    for b in range(boxes_mask.shape[0]):
-        fg_indices = np.nonzero(boxes_mask[b,...]-bg_label)
+    spatial_dims = get_spatial_dims(spatial_size=spatial_size)
+
+    boxes_mask_np, *_ = convert_data_type(boxes_mask, np.ndarray)
+
+    boxes_list = []
+    labels_list = []
+    for b in range(boxes_mask_np.shape[0]):
+        fg_indices = np.nonzero(boxes_mask_np[b, ...] - bg_label)
         if fg_indices[0].shape[0] == 0:
             continue
         boxes_b = []
         for fd_i in fg_indices:
-            boxes_b.append(min(fd_i)) # top left corner
+            boxes_b.append(min(fd_i))  # top left corner
         for fd_i in fg_indices:
-            boxes_b.append(max(fd_i)+1) # bottom right corner
+            boxes_b.append(max(fd_i) + 1 - TO_REMOVE)  # bottom right corner
         if spatial_dims == 2:
-            labels.append(boxes_mask[b,boxes_b[0],boxes_b[1]])
+            labels_list.append(boxes_mask_np[b, boxes_b[0], boxes_b[1]])
         if spatial_dims == 3:
-            labels.append(boxes_mask[b,boxes_b[0],boxes_b[1],boxes_b[2]])
-        boxes.append(boxes_b)
+            labels_list.append(boxes_mask_np[b, boxes_b[0], boxes_b[1], boxes_b[2]])
+        boxes_list.append(boxes_b)
 
-    if len(boxes) == 0:
-        return np.zeros([0,2*spatial_dims]), np.zeros([0])
-    return np.asarray(boxes),np.asarray(labels)
+    if len(boxes_list) == 0:
+        boxes_np, labels_np = np.zeros([0, 2 * spatial_dims]), np.zeros([0])
+    else:
+        boxes_np, labels_np = np.asarray(boxes_list), np.asarray(labels_list)
+    boxes, *_ = convert_to_dst_type(src=boxes_np, dst=boxes_mask, dtype=box_dtype)
+    labels, *_ = convert_to_dst_type(src=labels_np, dst=boxes_mask, dtype=label_dtype)
+    return boxes, labels
diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index b802ebcfe2..a5b50e1557 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -24,6 +24,8 @@
 
 from monai.apps.detection.transforms.array import (
     AffineBox,
+    BoxMaskToBox,
+    BoxToBoxMask,
     ClipBoxToImage,
     ConvertBoxMode,
     ConvertBoxToStandardMode,
@@ -32,7 +34,7 @@
 )
 from monai.config import KeysCollection
 from monai.config.type_definitions import NdarrayOrTensor
-from monai.data.box_utils import BoxMode
+from monai.data.box_utils import COMPUTE_DTYPE, BoxMode
 from monai.data.utils import orientation_ras_lps
 from monai.transforms import Flip, RandFlip, RandZoom, SpatialPad, Zoom
 from monai.transforms.inverse import InvertibleTransform
@@ -66,6 +68,12 @@
     "ClipBoxToImaged",
     "ClipBoxToImageD",
     "ClipBoxToImageDict",
+    "BoxToBoxMaskd",
+    "BoxToBoxMaskD",
+    "BoxToBoxMaskDict",
+    "BoxMaskToBoxd",
+    "BoxMaskToBoxD",
+    "BoxMaskToBoxDict"
 ]
 
 DEFAULT_POST_FIX = PostFix.meta()
@@ -246,7 +254,8 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
         # when convert boxes from world coordinate to image coordinate,
         # we apply inverse affine transform
         affine_t, *_ = convert_data_type(affine, torch.Tensor)
-        inv_affine_t = torch.inverse(affine_t)
+        # torch.inverse should not run in half precision
+        inv_affine_t = torch.inverse(affine_t.to(COMPUTE_DTYPE))
 
         for key in self.key_iterator(d):
             self.push_transform(d, key, extra_info={"affine": affine})
@@ -758,6 +767,122 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
         return d
 
 
+class BoxToBoxMaskd(MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`.
+    Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxMaskToBoxd` .
+    Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
+    The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border.
+
+    Args:
+        box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
+        box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``.
+        label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``.
+        box_ref_image_keys: Keys that represents the reference images to which ``box_keys`` are attached.
+        min_fg_label: min foreground box label.
+        ellipse_mask: bool.
+            If True, it assumes the object shape is close to ellipse or ellipsoid.
+            If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
+            If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
+            See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
+        allow_missing_keys: don't raise exception if key is missing.
+
+    Example:
+        .. code-block:: python
+
+            BoxToBoxMaskd(
+                box_keys="boxes", box_mask_keys="box_mask",
+                box_ref_image_keys="image", label_keys="labels",
+                min_fg_label=0, ellipse_mask=True
+            )
+    """
+
+    def __init__(
+        self,
+        box_keys: KeysCollection,
+        box_mask_keys: KeysCollection,
+        label_keys: KeysCollection,
+        box_ref_image_keys: KeysCollection,
+        min_fg_label: int,
+        ellipse_mask: bool = False,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        super().__init__(box_keys, allow_missing_keys)
+        self.box_keys = ensure_tuple(box_keys)
+        self.label_keys = ensure_tuple(label_keys)
+        self.box_mask_keys = ensure_tuple(box_mask_keys)
+        if not len(self.label_keys) == len(self.box_keys) == len(self.box_mask_keys):
+            raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!")
+        self.box_ref_image_keys = ensure_tuple_rep(box_ref_image_keys, len(self.box_keys))
+        self.bg_label = min_fg_label - 1  # make sure background label is always smaller than fg labels.
+        self.converter = BoxToBoxMask(bg_label=self.bg_label, ellipse_mask=ellipse_mask)
+
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
+        d = dict(data)
+
+        for box_key, label_key, box_mask_key, box_ref_image_key in zip(
+            self.box_keys, self.label_keys, self.box_mask_keys, self.box_ref_image_keys
+        ):
+            spatial_size = d[box_ref_image_key].shape[1:]
+            d[box_mask_key] = self.converter(d[box_key], d[label_key], spatial_size)
+            # make box mask background intensity to be 0, since the following operations may pad 0 on the border.
+            d[box_mask_key] -= self.bg_label
+        return d
+
+
+class BoxMaskToBoxd(MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxMaskToBox`.
+    Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` .
+    Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
+
+    Args:
+        box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
+        box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``.
+        label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``.
+        min_fg_label: min foreground box label.
+        box_dtype: output dtype for box_keys
+        label_dtype: output dtype for label_keys
+        allow_missing_keys: don't raise exception if key is missing.
+
+    Example:
+        .. code-block:: python
+
+            BoxMaskToBoxd(
+                box_keys="boxes", box_mask_keys="box_mask",
+                label_keys="labels", min_fg_label=0
+            )
+    """
+
+    def __init__(
+        self,
+        box_keys: KeysCollection,
+        box_mask_keys: KeysCollection,
+        label_keys: KeysCollection,
+        min_fg_label: int,
+        box_dtype=torch.float32,
+        label_dtype=torch.long,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        super().__init__(box_keys, allow_missing_keys)
+        self.box_keys = ensure_tuple(box_keys)
+        self.label_keys = ensure_tuple(label_keys)
+        self.box_mask_keys = ensure_tuple(box_mask_keys)
+        if not len(self.label_keys) == len(self.box_keys) == len(self.box_mask_keys):
+            raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!")
+        self.bg_label = min_fg_label - 1  # make sure background label is always smaller than fg labels.
+        self.converter = BoxMaskToBox(bg_label=self.bg_label, box_dtype=box_dtype, label_dtype=label_dtype)
+        self.box_dtype = box_dtype
+
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
+        d = dict(data)
+
+        for box_key, label_key, box_mask_key in zip(self.box_keys, self.label_keys, self.box_mask_keys):
+            d[box_mask_key] += self.bg_label  # pairs with the operation in BoxToBoxMaskd
+            d[box_key], d[label_key] = self.converter(d[box_mask_key])
+        return d
+
+
 ConvertBoxModeD = ConvertBoxModeDict = ConvertBoxModed
 ConvertBoxToStandardModeD = ConvertBoxToStandardModeDict = ConvertBoxToStandardModed
 ZoomBoxD = ZoomBoxDict = ZoomBoxd
@@ -766,3 +891,5 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
 FlipBoxD = FlipBoxDict = FlipBoxd
 RandFlipBoxD = RandFlipBoxDict = RandFlipBoxd
 ClipBoxToImageD = ClipBoxToImageDict = ClipBoxToImaged
+BoxToBoxMaskD = BoxToBoxMaskDict = BoxToBoxMaskd
+BoxMaskToBoxD = BoxMaskToBoxDict = BoxMaskToBoxd
diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py
index f290ce5726..86f666e5f9 100644
--- a/tests/test_box_transform.py
+++ b/tests/test_box_transform.py
@@ -17,6 +17,8 @@
 
 from monai.apps.detection.transforms.dictionary import (
     AffineBoxToImageCoordinated,
+    BoxMaskToBoxd,
+    BoxToBoxMaskd,
     ClipBoxToImaged,
     ConvertBoxModed,
     FlipBoxd,
@@ -27,8 +29,7 @@
 from monai.transforms import CastToTyped, Invertd
 from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TESTS = []
-
+TESTS_3D = []
 boxes = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 3, 3], [0, 1, 1, 2, 3, 4]]
 labels = [1, 1, 0]
 scores = [[0.2, 0.8], [0.3, 0.7], [0.6, 0.4]]
@@ -36,7 +37,7 @@
 image = np.zeros(image_size)
 
 for p in TEST_NDARRAYS:
-    TESTS.append(
+    TESTS_3D.append(
         [
             {"box_keys": "boxes", "dst_mode": "xyzwhd"},
             {"boxes": p(boxes), "image": p(image), "labels": p(labels), "scores": p(scores)},
@@ -48,10 +49,41 @@
         ]
     )
 
+TESTS_2D = []
+boxes = [[0, 1, 2, 2], [0, 0, 1, 1]]
+labels = [1, 0]
+image_size = [1, 2, 2]
+image = np.zeros(image_size)
+for p in TEST_NDARRAYS:
+    TESTS_2D.append(
+        [{"boxes": p(boxes), "image": p(image), "labels": p(labels)}, p([[[0, 2], [0, 2]], [[1, 0], [0, 0]]])]
+    )
+
 
 class TestBoxTransform(unittest.TestCase):
-    @parameterized.expand(TESTS)
-    def test_value(
+    @parameterized.expand(TESTS_2D)
+    def test_value_2d(self, data, expected_mask):
+        test_dtype = [torch.float32, torch.float16]
+        for dtype in test_dtype:
+            data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data)
+            transform_to_mask = BoxToBoxMaskd(
+                box_keys="boxes",
+                box_mask_keys="box_mask",
+                box_ref_image_keys="image",
+                label_keys="labels",
+                min_fg_label=0,
+                ellipse_mask=True,
+            )
+            transform_to_box = BoxMaskToBoxd(
+                box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0
+            )
+            data_mask = transform_to_mask(data)
+            assert_allclose(data_mask["box_mask"], expected_mask, type_test=True, device_test=True, atol=1e-3)
+            data_back = transform_to_box(data_mask)
+            assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3)
+
+    @parameterized.expand(TESTS_3D)
+    def test_value_3d(
         self,
         keys,
         data,
@@ -61,7 +93,7 @@ def test_value(
         expected_flip_result,
         expected_clip_result,
     ):
-        test_dtype = [torch.float32]
+        test_dtype = [torch.float16]
         for dtype in test_dtype:
             data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data)
             # test ConvertBoxToStandardModed

From 93f584479d6e132cc7bee4781c25ff16841c4626 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:21:21 -0400
Subject: [PATCH 03/18] add examples

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/dictionary.py | 60 ++++++++++++++++---
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index a5b50e1557..056f4aed19 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -774,6 +774,12 @@ class BoxToBoxMaskd(MapTransform):
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
     The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border.
 
+    This is the general solution for transforms that need to be applied on images and boxes simultaneously. 
+    It is performed with the following steps. 
+    1) use BoxToBoxMaskd to covert boxes and labels to box_masks; 
+    2) do transforms, e.g., rotation or cropping, on images and box_masks together;
+    3) use BoxMaskToBoxd to convert box_masks back to boxes and labels.
+
     Args:
         box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
         box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``.
@@ -790,11 +796,28 @@ class BoxToBoxMaskd(MapTransform):
     Example:
         .. code-block:: python
 
-            BoxToBoxMaskd(
-                box_keys="boxes", box_mask_keys="box_mask",
-                box_ref_image_keys="image", label_keys="labels",
-                min_fg_label=0, ellipse_mask=True
+            # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together.
+            import numpy as np
+            from monai.transforms import Compose, RandRotated, RandSpatialCropd
+            transforms = Compose(
+                [
+                    BoxToBoxMaskd(
+                        box_keys="boxes", label_keys="labels",
+                        box_mask_keys="box_mask", box_ref_image_keys="image", 
+                        min_fg_label=0, ellipse_mask=True
+                    ),
+                    RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"],
+                        prob=0.2,range_x=np.pi/6,range_y=np.pi/6,range_z=np.pi/6,
+                        keep_size=True,padding_mode="zeros"
+                    ),
+                    RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False),
+                    BoxMaskToBoxd(
+                        box_mask_keys="box_mask", box_keys="boxes", 
+                        label_keys="labels", min_fg_label=0
+                    )
+                ]
             )
+            
     """
 
     def __init__(
@@ -836,6 +859,12 @@ class BoxMaskToBoxd(MapTransform):
     Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` .
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
 
+    This is the general solution for transforms that need to be applied on images and boxes simultaneously. 
+    It is performed with the following steps. 
+    1) use BoxToBoxMaskd to covert boxes and labels to box_masks; 
+    2) do transforms, e.g., rotation or cropping, on images and box_masks together;
+    3) use BoxMaskToBoxd to convert box_masks back to boxes and labels.
+
     Args:
         box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
         box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``.
@@ -848,9 +877,26 @@ class BoxMaskToBoxd(MapTransform):
     Example:
         .. code-block:: python
 
-            BoxMaskToBoxd(
-                box_keys="boxes", box_mask_keys="box_mask",
-                label_keys="labels", min_fg_label=0
+            # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together.
+            import numpy as np
+            from monai.transforms import Compose, RandRotated, RandSpatialCropd
+            transforms = Compose(
+                [
+                    BoxToBoxMaskd(
+                        box_keys="boxes", label_keys="labels",
+                        box_mask_keys="box_mask", box_ref_image_keys="image", 
+                        min_fg_label=0, ellipse_mask=True
+                    ),
+                    RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"],
+                        prob=0.2,range_x=np.pi/6,range_y=np.pi/6,range_z=np.pi/6,
+                        keep_size=True,padding_mode="zeros"
+                    ),
+                    RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False),
+                    BoxMaskToBoxd(
+                        box_mask_keys="box_mask", box_keys="boxes", 
+                        label_keys="labels", min_fg_label=0
+                    )
+                ]
             )
     """
 

From 0e97bf65b11ec72322c466647d9ed8f27ab50415 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 27 May 2022 04:21:50 +0000
Subject: [PATCH 04/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 monai/apps/detection/transforms/dictionary.py | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index 056f4aed19..454a1ec8d8 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -774,9 +774,9 @@ class BoxToBoxMaskd(MapTransform):
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
     The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border.
 
-    This is the general solution for transforms that need to be applied on images and boxes simultaneously. 
-    It is performed with the following steps. 
-    1) use BoxToBoxMaskd to covert boxes and labels to box_masks; 
+    This is the general solution for transforms that need to be applied on images and boxes simultaneously.
+    It is performed with the following steps.
+    1) use BoxToBoxMaskd to covert boxes and labels to box_masks;
     2) do transforms, e.g., rotation or cropping, on images and box_masks together;
     3) use BoxMaskToBoxd to convert box_masks back to boxes and labels.
 
@@ -803,7 +803,7 @@ class BoxToBoxMaskd(MapTransform):
                 [
                     BoxToBoxMaskd(
                         box_keys="boxes", label_keys="labels",
-                        box_mask_keys="box_mask", box_ref_image_keys="image", 
+                        box_mask_keys="box_mask", box_ref_image_keys="image",
                         min_fg_label=0, ellipse_mask=True
                     ),
                     RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"],
@@ -812,12 +812,12 @@ class BoxToBoxMaskd(MapTransform):
                     ),
                     RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False),
                     BoxMaskToBoxd(
-                        box_mask_keys="box_mask", box_keys="boxes", 
+                        box_mask_keys="box_mask", box_keys="boxes",
                         label_keys="labels", min_fg_label=0
                     )
                 ]
             )
-            
+
     """
 
     def __init__(
@@ -859,9 +859,9 @@ class BoxMaskToBoxd(MapTransform):
     Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` .
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
 
-    This is the general solution for transforms that need to be applied on images and boxes simultaneously. 
-    It is performed with the following steps. 
-    1) use BoxToBoxMaskd to covert boxes and labels to box_masks; 
+    This is the general solution for transforms that need to be applied on images and boxes simultaneously.
+    It is performed with the following steps.
+    1) use BoxToBoxMaskd to covert boxes and labels to box_masks;
     2) do transforms, e.g., rotation or cropping, on images and box_masks together;
     3) use BoxMaskToBoxd to convert box_masks back to boxes and labels.
 
@@ -884,7 +884,7 @@ class BoxMaskToBoxd(MapTransform):
                 [
                     BoxToBoxMaskd(
                         box_keys="boxes", label_keys="labels",
-                        box_mask_keys="box_mask", box_ref_image_keys="image", 
+                        box_mask_keys="box_mask", box_ref_image_keys="image",
                         min_fg_label=0, ellipse_mask=True
                     ),
                     RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"],
@@ -893,7 +893,7 @@ class BoxMaskToBoxd(MapTransform):
                     ),
                     RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False),
                     BoxMaskToBoxd(
-                        box_mask_keys="box_mask", box_keys="boxes", 
+                        box_mask_keys="box_mask", box_keys="boxes",
                         label_keys="labels", min_fg_label=0
                     )
                 ]

From cc6a6833fb9d0221e217aaf229a8baa5f3678b61 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:22:20 -0400
Subject: [PATCH 05/18] add examples

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/dictionary.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index 056f4aed19..bf1276b858 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -796,9 +796,9 @@ class BoxToBoxMaskd(MapTransform):
     Example:
         .. code-block:: python
 
-            # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together.
+            # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and image together.
             import numpy as np
-            from monai.transforms import Compose, RandRotated, RandSpatialCropd
+            from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd
             transforms = Compose(
                 [
                     BoxToBoxMaskd(
@@ -815,6 +815,7 @@ class BoxToBoxMaskd(MapTransform):
                         box_mask_keys="box_mask", box_keys="boxes", 
                         label_keys="labels", min_fg_label=0
                     )
+                    DeleteItemsd(keys=["box_mask"]),
                 ]
             )
             
@@ -879,7 +880,7 @@ class BoxMaskToBoxd(MapTransform):
 
             # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together.
             import numpy as np
-            from monai.transforms import Compose, RandRotated, RandSpatialCropd
+            from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd
             transforms = Compose(
                 [
                     BoxToBoxMaskd(
@@ -896,6 +897,7 @@ class BoxMaskToBoxd(MapTransform):
                         box_mask_keys="box_mask", box_keys="boxes", 
                         label_keys="labels", min_fg_label=0
                     )
+                    DeleteItemsd(keys=["box_mask"]),
                 ]
             )
     """

From 091401e797909671d8f3f9a8e03c0a199086b429 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:33:54 -0400
Subject: [PATCH 06/18] add import

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/array.py      | 2 ++
 monai/apps/detection/transforms/box_ops.py    | 7 ++++---
 monai/apps/detection/transforms/dictionary.py | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py
index 49ac70ea54..a44b2f5d52 100644
--- a/monai/apps/detection/transforms/array.py
+++ b/monai/apps/detection/transforms/array.py
@@ -391,6 +391,7 @@ class BoxToBoxMask(Transform):
         bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels.
         ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D).
     """
+
     backend = [TransformBackends.NUMPY]
 
     def __init__(self, bg_label: int = -1, ellipse_mask: bool = False) -> None:
@@ -425,6 +426,7 @@ class BoxMaskToBox(Transform):
         box_dtype: output dtype for boxes
         label_dtype: output dtype for labels
     """
+
     backend = [TransformBackends.NUMPY]
 
     def __init__(self, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long) -> None:
diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index 37728ef07e..e5606cd3af 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -14,15 +14,16 @@
 
 import numpy as np
 import torch
-from scipy.ndimage import zoom as scipy_zoom
 
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims
 from monai.transforms.utils import create_scale
-from monai.utils import look_up_option
+from monai.utils import look_up_option, optional_import
 from monai.utils.misc import ensure_tuple, ensure_tuple_rep
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
+scipy_ndimage, _ = optional_import("scipy.ndimage")
+
 
 def _apply_affine_to_points(points: torch.Tensor, affine: torch.Tensor, include_shift: bool = True) -> torch.Tensor:
     """
@@ -257,7 +258,7 @@ def convert_boxes_to_masks(
             boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b])
             # squeeze it to a ellipse/ellipsoid mask
             zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)]
-            boxes_only_mask = scipy_zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False)
+            boxes_only_mask = scipy_ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False)
         else:
             # generate a rect mask
             boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b])
diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index 1cdb4dec55..39aebe5bee 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -73,7 +73,7 @@
     "BoxToBoxMaskDict",
     "BoxMaskToBoxd",
     "BoxMaskToBoxD",
-    "BoxMaskToBoxDict"
+    "BoxMaskToBoxDict",
 ]
 
 DEFAULT_POST_FIX = PostFix.meta()

From 49399e3a127034ef836147cbeb1d6181148a45b7 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:35:25 -0400
Subject: [PATCH 07/18] add import

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/array.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py
index a44b2f5d52..66388a6df2 100644
--- a/monai/apps/detection/transforms/array.py
+++ b/monai/apps/detection/transforms/array.py
@@ -50,6 +50,7 @@
     "FlipBox",
     "ClipBoxToImage",
     "BoxToBoxMask",
+    "BoxMaskToBox",
 ]
 
 

From 3ff7f2e3f9b30bb9dbc1f6b699603e72fe74d0b6 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:37:26 -0400
Subject: [PATCH 08/18] add docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/array.py   | 6 +++++-
 monai/apps/detection/transforms/box_ops.py | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py
index 66388a6df2..1eda277fbd 100644
--- a/monai/apps/detection/transforms/array.py
+++ b/monai/apps/detection/transforms/array.py
@@ -390,7 +390,11 @@ class BoxToBoxMask(Transform):
 
     Args:
         bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels.
-        ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D).
+        ellipse_mask: bool.
+            If True, it assumes the object shape is close to ellipse or ellipsoid.
+            If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
+            If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
+            See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
     """
 
     backend = [TransformBackends.NUMPY]
diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index e5606cd3af..adad36db1d 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -207,7 +207,11 @@ def convert_boxes_to_masks(
         labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,).
         spatial_size: image spatial size.
         bg_label: background labels for the output mask image, make sure it is smaller than any fg labels.
-        ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D).
+        ellipse_mask: bool.
+            If True, it assumes the object shape is close to ellipse or ellipsoid.
+            If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
+            If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
+            See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
 
     Return:
         - int16 array, sized (num_box, H, W). Each channel represents a box.

From c5199be64866180a01c4733437102874cda4e866 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:39:34 -0400
Subject: [PATCH 09/18] update test

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 tests/test_box_transform.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py
index 86f666e5f9..a37be6a2de 100644
--- a/tests/test_box_transform.py
+++ b/tests/test_box_transform.py
@@ -81,6 +81,7 @@ def test_value_2d(self, data, expected_mask):
             assert_allclose(data_mask["box_mask"], expected_mask, type_test=True, device_test=True, atol=1e-3)
             data_back = transform_to_box(data_mask)
             assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3)
+            assert_allclose(data_back["labels"], data["labels"], type_test=False, device_test=False, atol=1e-3)
 
     @parameterized.expand(TESTS_3D)
     def test_value_3d(

From 38eadb737dacd5e8e521880f79e7747feb5c7b3a Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 00:48:39 -0400
Subject: [PATCH 10/18] update import

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/box_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index adad36db1d..7865cb216b 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -22,7 +22,7 @@
 from monai.utils.misc import ensure_tuple, ensure_tuple_rep
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
-scipy_ndimage, _ = optional_import("scipy.ndimage")
+scipy, _ = optional_import("scipy")
 
 
 def _apply_affine_to_points(points: torch.Tensor, affine: torch.Tensor, include_shift: bool = True) -> torch.Tensor:
@@ -262,7 +262,7 @@ def convert_boxes_to_masks(
             boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b])
             # squeeze it to a ellipse/ellipsoid mask
             zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)]
-            boxes_only_mask = scipy_ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False)
+            boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False)
         else:
             # generate a rect mask
             boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b])

From 71589a13ea675f62d7b494fbf6fd251573259535 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 01:05:26 -0400
Subject: [PATCH 11/18] update test

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/box_ops.py | 1 +
 tests/test_box_transform.py                | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index 7865cb216b..c962b8f8cc 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -262,6 +262,7 @@ def convert_boxes_to_masks(
             boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b])
             # squeeze it to a ellipse/ellipsoid mask
             zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)]
+            # scipy zoom does not support float16 cpu
             boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False)
         else:
             # generate a rect mask
diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py
index a37be6a2de..4752aaf732 100644
--- a/tests/test_box_transform.py
+++ b/tests/test_box_transform.py
@@ -94,7 +94,7 @@ def test_value_3d(
         expected_flip_result,
         expected_clip_result,
     ):
-        test_dtype = [torch.float16]
+        test_dtype = [torch.float32]
         for dtype in test_dtype:
             data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data)
             # test ConvertBoxToStandardModed

From 9d3ea7bed67dea5d4d2c239a82f330de9bbd570a Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 16:30:03 -0400
Subject: [PATCH 12/18] change func name

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/dictionary.py | 54 +++++++++----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index 39aebe5bee..e765939846 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -24,8 +24,8 @@
 
 from monai.apps.detection.transforms.array import (
     AffineBox,
-    BoxMaskToBox,
-    BoxToBoxMask,
+    MaskToBox,
+    BoxToMask,
     ClipBoxToImage,
     ConvertBoxMode,
     ConvertBoxToStandardMode,
@@ -68,12 +68,12 @@
     "ClipBoxToImaged",
     "ClipBoxToImageD",
     "ClipBoxToImageDict",
-    "BoxToBoxMaskd",
-    "BoxToBoxMaskD",
-    "BoxToBoxMaskDict",
-    "BoxMaskToBoxd",
-    "BoxMaskToBoxD",
-    "BoxMaskToBoxDict",
+    "BoxToMaskd",
+    "BoxToMaskD",
+    "BoxToMaskDict",
+    "MaskToBoxd",
+    "MaskToBoxD",
+    "MaskToBoxDict",
 ]
 
 DEFAULT_POST_FIX = PostFix.meta()
@@ -767,18 +767,18 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
         return d
 
 
-class BoxToBoxMaskd(MapTransform):
+class BoxToMaskd(MapTransform):
     """
-    Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`.
-    Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxMaskToBoxd` .
+    Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToMask`.
+    Pairs with :py:class:`monai.apps.detection.transforms.dictionary.MaskToBoxd` .
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
     The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border.
 
     This is the general solution for transforms that need to be applied on images and boxes simultaneously.
     It is performed with the following steps.
-    1) use BoxToBoxMaskd to covert boxes and labels to box_masks;
+    1) use BoxToMaskd to covert boxes and labels to box_masks;
     2) do transforms, e.g., rotation or cropping, on images and box_masks together;
-    3) use BoxMaskToBoxd to convert box_masks back to boxes and labels.
+    3) use MaskToBoxd to convert box_masks back to boxes and labels.
 
     Args:
         box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
@@ -801,7 +801,7 @@ class BoxToBoxMaskd(MapTransform):
             from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd
             transforms = Compose(
                 [
-                    BoxToBoxMaskd(
+                    BoxToMaskd(
                         box_keys="boxes", label_keys="labels",
                         box_mask_keys="box_mask", box_ref_image_keys="image",
                         min_fg_label=0, ellipse_mask=True
@@ -811,7 +811,7 @@ class BoxToBoxMaskd(MapTransform):
                         keep_size=True,padding_mode="zeros"
                     ),
                     RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False),
-                    BoxMaskToBoxd(
+                    MaskToBoxd(
                         box_mask_keys="box_mask", box_keys="boxes",
                         label_keys="labels", min_fg_label=0
                     )
@@ -839,7 +839,7 @@ def __init__(
             raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!")
         self.box_ref_image_keys = ensure_tuple_rep(box_ref_image_keys, len(self.box_keys))
         self.bg_label = min_fg_label - 1  # make sure background label is always smaller than fg labels.
-        self.converter = BoxToBoxMask(bg_label=self.bg_label, ellipse_mask=ellipse_mask)
+        self.converter = BoxToMask(bg_label=self.bg_label, ellipse_mask=ellipse_mask)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -854,17 +854,17 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
         return d
 
 
-class BoxMaskToBoxd(MapTransform):
+class MaskToBoxd(MapTransform):
     """
-    Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxMaskToBox`.
-    Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` .
+    Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.MaskToBox`.
+    Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToMaskd` .
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
 
     This is the general solution for transforms that need to be applied on images and boxes simultaneously.
     It is performed with the following steps.
-    1) use BoxToBoxMaskd to covert boxes and labels to box_masks;
+    1) use BoxToMaskd to covert boxes and labels to box_masks;
     2) do transforms, e.g., rotation or cropping, on images and box_masks together;
-    3) use BoxMaskToBoxd to convert box_masks back to boxes and labels.
+    3) use MaskToBoxd to convert box_masks back to boxes and labels.
 
     Args:
         box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
@@ -883,7 +883,7 @@ class BoxMaskToBoxd(MapTransform):
             from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd
             transforms = Compose(
                 [
-                    BoxToBoxMaskd(
+                    BoxToMaskd(
                         box_keys="boxes", label_keys="labels",
                         box_mask_keys="box_mask", box_ref_image_keys="image",
                         min_fg_label=0, ellipse_mask=True
@@ -893,7 +893,7 @@ class BoxMaskToBoxd(MapTransform):
                         keep_size=True,padding_mode="zeros"
                     ),
                     RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False),
-                    BoxMaskToBoxd(
+                    MaskToBoxd(
                         box_mask_keys="box_mask", box_keys="boxes",
                         label_keys="labels", min_fg_label=0
                     )
@@ -919,14 +919,14 @@ def __init__(
         if not len(self.label_keys) == len(self.box_keys) == len(self.box_mask_keys):
             raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!")
         self.bg_label = min_fg_label - 1  # make sure background label is always smaller than fg labels.
-        self.converter = BoxMaskToBox(bg_label=self.bg_label, box_dtype=box_dtype, label_dtype=label_dtype)
+        self.converter = MaskToBox(bg_label=self.bg_label, box_dtype=box_dtype, label_dtype=label_dtype)
         self.box_dtype = box_dtype
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
 
         for box_key, label_key, box_mask_key in zip(self.box_keys, self.label_keys, self.box_mask_keys):
-            d[box_mask_key] += self.bg_label  # pairs with the operation in BoxToBoxMaskd
+            d[box_mask_key] += self.bg_label  # pairs with the operation in BoxToMaskd
             d[box_key], d[label_key] = self.converter(d[box_mask_key])
         return d
 
@@ -939,5 +939,5 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
 FlipBoxD = FlipBoxDict = FlipBoxd
 RandFlipBoxD = RandFlipBoxDict = RandFlipBoxd
 ClipBoxToImageD = ClipBoxToImageDict = ClipBoxToImaged
-BoxToBoxMaskD = BoxToBoxMaskDict = BoxToBoxMaskd
-BoxMaskToBoxD = BoxMaskToBoxDict = BoxMaskToBoxd
+BoxToMaskD = BoxToMaskDict = BoxToMaskd
+MaskToBoxD = MaskToBoxDict = MaskToBoxd

From dddbe57d40365de2e1074c781f9e435d7447c27d Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 16:30:50 -0400
Subject: [PATCH 13/18] change func name

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 tests/test_box_transform.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py
index 4752aaf732..0cb529ff9e 100644
--- a/tests/test_box_transform.py
+++ b/tests/test_box_transform.py
@@ -17,8 +17,8 @@
 
 from monai.apps.detection.transforms.dictionary import (
     AffineBoxToImageCoordinated,
-    BoxMaskToBoxd,
-    BoxToBoxMaskd,
+    MaskToBoxd,
+    BoxToMaskd,
     ClipBoxToImaged,
     ConvertBoxModed,
     FlipBoxd,
@@ -66,7 +66,7 @@ def test_value_2d(self, data, expected_mask):
         test_dtype = [torch.float32, torch.float16]
         for dtype in test_dtype:
             data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data)
-            transform_to_mask = BoxToBoxMaskd(
+            transform_to_mask = BoxToMaskd(
                 box_keys="boxes",
                 box_mask_keys="box_mask",
                 box_ref_image_keys="image",
@@ -74,7 +74,7 @@ def test_value_2d(self, data, expected_mask):
                 min_fg_label=0,
                 ellipse_mask=True,
             )
-            transform_to_box = BoxMaskToBoxd(
+            transform_to_box = MaskToBoxd(
                 box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0
             )
             data_mask = transform_to_mask(data)

From 7460d05a5d14b89e449d6762343e2e3ccad2b73e Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 16:31:42 -0400
Subject: [PATCH 14/18] change func name

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/array.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py
index 1eda277fbd..7165b86b6d 100644
--- a/monai/apps/detection/transforms/array.py
+++ b/monai/apps/detection/transforms/array.py
@@ -49,8 +49,8 @@
     "ResizeBox",
     "FlipBox",
     "ClipBoxToImage",
-    "BoxToBoxMask",
-    "BoxMaskToBox",
+    "BoxToMask",
+    "MaskToBox",
 ]
 
 
@@ -384,7 +384,7 @@ def __call__(  # type: ignore
         return boxes_clip, tuple(labels_clip_list)
 
 
-class BoxToBoxMask(Transform):
+class BoxToMask(Transform):
     """
     Convert box to int16 mask image, which has the same size with the input image.
 
@@ -420,10 +420,10 @@ def __call__(  # type: ignore
         return convert_boxes_to_masks(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask)
 
 
-class BoxMaskToBox(Transform):
+class MaskToBox(Transform):
     """
     Convert int16 mask image to box, which has the same size with the input image.
-    Pairs with :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`.
+    Pairs with :py:class:`monai.apps.detection.transforms.array.BoxToMask`.
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
 
     Args:

From 19fa66112a3fa66ede373b677e14a3acbf35e188 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 16:33:04 -0400
Subject: [PATCH 15/18] change func name

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/apps/detection/transforms/array.py   | 8 ++++----
 monai/apps/detection/transforms/box_ops.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py
index 7165b86b6d..1c57db79f3 100644
--- a/monai/apps/detection/transforms/array.py
+++ b/monai/apps/detection/transforms/array.py
@@ -34,8 +34,8 @@
 
 from .box_ops import (
     apply_affine_to_boxes,
-    convert_boxes_to_masks,
-    convert_masks_to_boxes,
+    convert_box_to_mask,
+    convert_mask_to_box,
     flip_boxes,
     resize_boxes,
     zoom_boxes,
@@ -417,7 +417,7 @@ def __call__(  # type: ignore
                 The foreground region in channel c has intensity of labels[c].
                 The background intensity is bg_label.
         """
-        return convert_boxes_to_masks(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask)
+        return convert_box_to_mask(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask)
 
 
 class MaskToBox(Transform):
@@ -450,4 +450,4 @@ def __call__(self, boxes_mask: NdarrayOrTensor) -> Tuple[NdarrayOrTensor, Ndarra
             - bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
             - classification foreground(fg) labels, dtype should be int, sized (N,).
         """
-        return convert_masks_to_boxes(boxes_mask, self.bg_label, self.box_dtype, self.label_dtype)
+        return convert_mask_to_box(boxes_mask, self.bg_label, self.box_dtype, self.label_dtype)
diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index c962b8f8cc..b149be5468 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -192,7 +192,7 @@ def flip_boxes(
     return flip_boxes
 
 
-def convert_boxes_to_masks(
+def convert_box_to_mask(
     boxes: NdarrayOrTensor,
     labels: NdarrayOrTensor,
     spatial_size: Union[Sequence[int], int],
@@ -284,7 +284,7 @@ def convert_boxes_to_masks(
     return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0]
 
 
-def convert_masks_to_boxes(
+def convert_mask_to_box(
     boxes_mask: NdarrayOrTensor, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long
 ) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
     """

From 66b94d8a0e8bd304306bcb407eaef189dc65bd06 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Fri, 27 May 2022 22:08:08 +0000
Subject: [PATCH 16/18] [MONAI] code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/apps/detection/transforms/dictionary.py | 2 +-
 tests/test_box_transform.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index e765939846..5aeacfd63e 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -24,12 +24,12 @@
 
 from monai.apps.detection.transforms.array import (
     AffineBox,
-    MaskToBox,
     BoxToMask,
     ClipBoxToImage,
     ConvertBoxMode,
     ConvertBoxToStandardMode,
     FlipBox,
+    MaskToBox,
     ZoomBox,
 )
 from monai.config import KeysCollection
diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py
index 0cb529ff9e..8dbd446b16 100644
--- a/tests/test_box_transform.py
+++ b/tests/test_box_transform.py
@@ -17,11 +17,11 @@
 
 from monai.apps.detection.transforms.dictionary import (
     AffineBoxToImageCoordinated,
-    MaskToBoxd,
     BoxToMaskd,
     ClipBoxToImaged,
     ConvertBoxModed,
     FlipBoxd,
+    MaskToBoxd,
     RandFlipBoxd,
     RandZoomBoxd,
     ZoomBoxd,

From 1d44667c0220b0c8c6c9e0207e79fc6fbed79a65 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Fri, 27 May 2022 18:17:22 -0400
Subject: [PATCH 17/18] corrent a corner case in NMS

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index dfe9f3798b..6bfd89080a 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -1039,7 +1039,7 @@ def non_max_suppression(
 
     # returns empty array if boxes is empty
     if boxes.shape[0] == 0:
-        return convert_to_dst_type(src=np.array([]), dst=boxes)[0]
+        return convert_to_dst_type(src=np.array([]), dst=boxes, dtype=torch.long)[0]
 
     if boxes.shape[0] != scores.shape[0]:
         raise ValueError(

From 1994175ddcc5d29b41804f1193b3447bc99260bd Mon Sep 17 00:00:00 2001
From: Wenqi Li <wenqil@nvidia.com>
Date: Sat, 28 May 2022 08:57:31 +0100
Subject: [PATCH 18/18] update docstring, simplify convert_box_to_mask, add
 test cases

Signed-off-by: Wenqi Li <wenqil@nvidia.com>
---
 monai/apps/detection/transforms/array.py      |  9 +--
 monai/apps/detection/transforms/box_ops.py    | 56 +++++++------------
 monai/apps/detection/transforms/dictionary.py | 34 ++++++-----
 tests/test_box_transform.py                   | 26 ++++++++-
 4 files changed, 70 insertions(+), 55 deletions(-)

diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py
index 1c57db79f3..42aeda71cf 100644
--- a/monai/apps/detection/transforms/array.py
+++ b/monai/apps/detection/transforms/array.py
@@ -391,10 +391,11 @@ class BoxToMask(Transform):
     Args:
         bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels.
         ellipse_mask: bool.
-            If True, it assumes the object shape is close to ellipse or ellipsoid.
-            If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
-            If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
-            See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
+
+            - If True, it assumes the object shape is close to ellipse or ellipsoid.
+            - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
+            - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
+              See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
     """
 
     backend = [TransformBackends.NUMPY]
diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py
index b149be5468..6b9c2ac87b 100644
--- a/monai/apps/detection/transforms/box_ops.py
+++ b/monai/apps/detection/transforms/box_ops.py
@@ -17,6 +17,7 @@
 
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims
+from monai.transforms import Resize
 from monai.transforms.utils import create_scale
 from monai.utils import look_up_option, optional_import
 from monai.utils.misc import ensure_tuple, ensure_tuple_rep
@@ -184,12 +185,12 @@ def flip_boxes(
     flip_axes = ensure_tuple(flip_axes)
 
     # flip box
-    flip_boxes = deepcopy(boxes)
+    _flip_boxes = deepcopy(boxes)
     for axis in flip_axes:
-        flip_boxes[:, axis + spatial_dims] = spatial_size[axis] - boxes[:, axis] - TO_REMOVE
-        flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE
+        _flip_boxes[:, axis + spatial_dims] = spatial_size[axis] - boxes[:, axis] - TO_REMOVE
+        _flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE
 
-    return flip_boxes
+    return _flip_boxes
 
 
 def convert_box_to_mask(
@@ -208,10 +209,11 @@ def convert_box_to_mask(
         spatial_size: image spatial size.
         bg_label: background labels for the output mask image, make sure it is smaller than any fg labels.
         ellipse_mask: bool.
-            If True, it assumes the object shape is close to ellipse or ellipsoid.
-            If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
-            If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
-            See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
+
+            - If True, it assumes the object shape is close to ellipse or ellipsoid.
+            - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
+            - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
+              See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
 
     Return:
         - int16 array, sized (num_box, H, W). Each channel represents a box.
@@ -230,8 +232,8 @@ def convert_box_to_mask(
     # bg_label should be smaller than labels
     if bg_label >= min(labels):
         raise ValueError(
-            f"bg_label should be smaller than any foreground box labels. \
-min(labels)={min(labels)}, while bg_label={bg_label}"
+            f"bg_label should be smaller than any foreground box labels.\n"
+            f"min(labels)={min(labels)}, while bg_label={bg_label}"
         )
 
     if labels.shape[0] != boxes.shape[0]:
@@ -240,8 +242,7 @@ def convert_box_to_mask(
     # allocate memory for boxes_mask_np
     boxes_mask_np = np.ones((labels.shape[0],) + spatial_size, dtype=np.int16) * np.int16(bg_label)
 
-    boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray)[0]
-    boxes_np = np.round(boxes_np).astype(np.int32)
+    boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray, dtype=np.int32)[0]
     labels_np, *_ = convert_to_dst_type(src=labels, dst=boxes_np)
     for b in range(boxes_np.shape[0]):
         # generate a foreground mask
@@ -253,34 +254,19 @@ def convert_box_to_mask(
             center = (max_box_size - 1) / 2.0
             boxes_only_mask = np.ones([max_box_size] * spatial_dims, dtype=np.int16) * np.int16(bg_label)
             # apply label intensity to circle/ball foreground
-            if spatial_dims == 2:
-                grid_y, grid_x = np.ogrid[:max_box_size, :max_box_size]
-                dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2
-            elif spatial_dims == 3:
-                grid_y, grid_x, grid_z = np.ogrid[:max_box_size, :max_box_size, :max_box_size]
-                dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2 + (grid_z - center) ** 2
+            ranges = tuple(slice(0, max_box_size) for _ in range(spatial_dims))
+            dist_from_center = sum((grid - center) ** 2 for grid in np.ogrid[ranges])
             boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b])
             # squeeze it to a ellipse/ellipsoid mask
-            zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)]
-            # scipy zoom does not support float16 cpu
-            boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False)
+            resizer = Resize(spatial_size=box_size, mode="nearest", anti_aliasing=False)
+            boxes_only_mask = resizer(boxes_only_mask[None])[0]  # type: ignore
         else:
             # generate a rect mask
-            boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b])
-
+            boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b])  # type: ignore
         # apply to global mask
-        if spatial_dims == 2:
-            boxes_mask_np[
-                b, boxes_np[b, 0] : boxes_np[b, spatial_dims], boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims]
-            ] = boxes_only_mask
-        if spatial_dims == 3:
-            boxes_mask_np[
-                b,
-                boxes_np[b, 0] : boxes_np[b, spatial_dims],
-                boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims],
-                boxes_np[b, 2] : boxes_np[b, 2 + spatial_dims],
-            ] = boxes_only_mask
-
+        slicing = [b]
+        slicing.extend(slice(boxes_np[b, d], boxes_np[b, d + spatial_dims]) for d in range(spatial_dims))  # type:ignore
+        boxes_mask_np[tuple(slicing)] = boxes_only_mask
     return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0]
 
 
diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py
index 5aeacfd63e..b0d9c5f1f9 100644
--- a/monai/apps/detection/transforms/dictionary.py
+++ b/monai/apps/detection/transforms/dictionary.py
@@ -772,31 +772,34 @@ class BoxToMaskd(MapTransform):
     Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToMask`.
     Pairs with :py:class:`monai.apps.detection.transforms.dictionary.MaskToBoxd` .
     Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.
-    The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border.
+    The output ``d[box_mask_key]`` will have background intensity 0, since the following operations
+    may pad 0 on the border.
 
     This is the general solution for transforms that need to be applied on images and boxes simultaneously.
     It is performed with the following steps.
-    1) use BoxToMaskd to covert boxes and labels to box_masks;
-    2) do transforms, e.g., rotation or cropping, on images and box_masks together;
-    3) use MaskToBoxd to convert box_masks back to boxes and labels.
+
+        1) use ``BoxToMaskd`` to covert boxes and labels to box_masks;
+        2) do transforms, e.g., rotation or cropping, on images and box_masks together;
+        3) use ``MaskToBoxd`` to convert box_masks back to boxes and labels.
 
     Args:
         box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
         box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``.
-        label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``.
+        label_keys: Keys that represents the labels corresponding to the ``box_keys``. Same length with ``box_keys``.
         box_ref_image_keys: Keys that represents the reference images to which ``box_keys`` are attached.
         min_fg_label: min foreground box label.
         ellipse_mask: bool.
-            If True, it assumes the object shape is close to ellipse or ellipsoid.
-            If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
-            If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
-            See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
+
+            - If True, it assumes the object shape is close to ellipse or ellipsoid.
+            - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
+            - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
+              See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
         allow_missing_keys: don't raise exception if key is missing.
 
     Example:
         .. code-block:: python
 
-            # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and image together.
+            # This code snippet creates transforms (random rotation and cropping) on boxes, labels, and image together.
             import numpy as np
             from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd
             transforms = Compose(
@@ -862,14 +865,15 @@ class MaskToBoxd(MapTransform):
 
     This is the general solution for transforms that need to be applied on images and boxes simultaneously.
     It is performed with the following steps.
-    1) use BoxToMaskd to covert boxes and labels to box_masks;
-    2) do transforms, e.g., rotation or cropping, on images and box_masks together;
-    3) use MaskToBoxd to convert box_masks back to boxes and labels.
+
+        1) use ``BoxToMaskd`` to covert boxes and labels to box_masks;
+        2) do transforms, e.g., rotation or cropping, on images and box_masks together;
+        3) use ``MaskToBoxd`` to convert box_masks back to boxes and labels.
 
     Args:
         box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``.
         box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``.
-        label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``.
+        label_keys: Keys that represents the labels corresponding to the ``box_keys``. Same length with ``box_keys``.
         min_fg_label: min foreground box label.
         box_dtype: output dtype for box_keys
         label_dtype: output dtype for label_keys
@@ -878,7 +882,7 @@ class MaskToBoxd(MapTransform):
     Example:
         .. code-block:: python
 
-            # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together.
+            # This code snippet creates transforms (random rotation and cropping) on boxes, labels, and images together.
             import numpy as np
             from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd
             transforms = Compose(
diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py
index 8dbd446b16..0def9e1458 100644
--- a/tests/test_box_transform.py
+++ b/tests/test_box_transform.py
@@ -72,7 +72,7 @@ def test_value_2d(self, data, expected_mask):
                 box_ref_image_keys="image",
                 label_keys="labels",
                 min_fg_label=0,
-                ellipse_mask=True,
+                ellipse_mask=False,
             )
             transform_to_box = MaskToBoxd(
                 box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0
@@ -83,6 +83,30 @@ def test_value_2d(self, data, expected_mask):
             assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3)
             assert_allclose(data_back["labels"], data["labels"], type_test=False, device_test=False, atol=1e-3)
 
+    def test_value_3d_mask(self):
+        test_dtype = [torch.float32, torch.float16]
+        image = np.zeros((1, 32, 33, 34))
+        boxes = np.array([[7, 8, 9, 10, 12, 13], [1, 3, 5, 2, 5, 9], [0, 0, 0, 1, 1, 1]])
+        data = {"image": image, "boxes": boxes, "labels": np.array((1, 0, 3))}
+        for dtype in test_dtype:
+            data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data)
+            transform_to_mask = BoxToMaskd(
+                box_keys="boxes",
+                box_mask_keys="box_mask",
+                box_ref_image_keys="image",
+                label_keys="labels",
+                min_fg_label=0,
+                ellipse_mask=False,
+            )
+            transform_to_box = MaskToBoxd(
+                box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0
+            )
+            data_mask = transform_to_mask(data)
+            assert_allclose(data_mask["box_mask"].shape, (3, 32, 33, 34), type_test=True, device_test=True, atol=1e-3)
+            data_back = transform_to_box(data_mask)
+            assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3)
+            assert_allclose(data_back["labels"], data["labels"], type_test=False, device_test=False, atol=1e-3)
+
     @parameterized.expand(TESTS_3D)
     def test_value_3d(
         self,