From dd5dd5a0a740337e6444dfdbcc070516dc04659e Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Thu, 26 May 2022 21:18:22 -0400 Subject: [PATCH 01/18] add box -- mask converter Signed-off-by: Can Zhao --- monai/apps/detection/transforms/box_ops.py | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index ef8d248c02..dcc38401ff 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -14,6 +14,7 @@ import torch +from monai.utils import look_up_option from monai.config.type_definitions import NdarrayOrTensor from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims from monai.transforms.utils import create_scale @@ -186,3 +187,107 @@ def flip_boxes( flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE return flip_boxes + +def convert_boxes_to_mask( + boxes: NdarrayOrTensor, + spatial_size: Union[Sequence[int], int], + labels: Union[Sequence[int], torch.Tensor, np.ndarray], + bg_label: int = -1, + ellipse_mask: bool = False + ) -> NdarrayOrTensor: + """ + Convert box to int8 image, which has the same size with the input image, + Each channel represents one box. The box region will have intensity of labels, the background intensity is bg_label + Box mask may take a lot of memory, so we generate box mask as numpy array + + Args: + bg_label: background labels for the output box image, just in case one of the fg labels is 0 + """ + spatial_dims: int = get_spatial_dims(boxes=boxes) + spatial_size = ensure_tuple_rep(spatial_size, spatial_dims) + + labels = box_utils.convert_to_list(labels) + # if no box, return empty mask + if len(labels)==0: + return np.ones([1]+spatial_size,dtype=np.int8)*np.int8(bg_label) + + if bg_label >= min(labels): + raise ValueError(f"bg_label should be smaller than any foreground box labels. min(box_labels)={min(labels)}, while bg_label={bg_label}") + + if len(labels) != boxes.shape[0]: + raise ValueError("Number of labels should equal to number of boxes.") + + boxes_mask = np.ones([len(labels)]+spatial_size,dtype=np.int8)*np.int8(bg_label) + boxes,_,_ = convert_data_type(boxes,dtype=np.int16) + for b in range(boxes.shape[0]): + # draw a circle/ball mask + box_size = [boxes[b,axis+spatial_dims].item()-boxes[b,axis].item() for axis in range(spatial_dims)] + if self.ellipse_mask: + max_box_size = max(box_size) + radius = max_box_size/2.0 + center = (max_box_size-1)/2.0 + boxes_only_mask = np.ones([max_box_size]*spatial_dims,dtype=np.int8)*np.int8(bg_label) # a square/cube mask + if spatial_dims == 2: + Y, X = np.ogrid[:max_box_size, :max_box_size] + dist_from_center = (X-center)**2 + (Y-center)**2 + elif spatial_dims == 3: + Y, X, Z = np.ogrid[:max_box_size, :max_box_size, :max_box_size] + dist_from_center = (X-center)**2 + (Y-center)**2+ (Z-center)**2 + boxes_only_mask[dist_from_center <= radius**2] = np.int8(labels[b]) + + # squeeze it to a ellipse/ellipsoid + zoom_factor = [box_size[axis]/float(max_box_size) for axis in range(spatial_dims)] + boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask,zoom=zoom_factor,mode='nearest',prefilter=False) + else: + boxes_only_mask = np.ones(box_size,dtype=np.int8)*np.int8(labels[b]) + + # apply to global mask + if spatial_dims == 2: + boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims] ] = boxes_only_mask + if spatial_dims == 3: + boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims], boxes[b,2]:boxes[b,2+spatial_dims] ] = boxes_only_mask + + # if isinstance(boxes, torch.Tensor): + # boxes_mask = torch.from_numpy(boxes_mask) + + return boxes_mask + +def convert_mask_to_boxes(boxes_mask: NdarrayOrTensor, bg_label: int = -1) -> NdarrayOrTensor: + """ + Convert binary mask image to box, which has the same size with the input image + + Args: + boxes_mask: sized (num_box, H, W) or (num_box, H, W, D) + bg_label: background labels for the boxes_mask + + Return: + - boxes + - labelss + """ + look_up_option(len(boxes_mask.shape), [3,4]) + spatial_size = list(boxes_mask.shape[1:]) + spatial_dims = box_utils.get_spatial_dims(spatial_size=spatial_size) + + if isinstance(boxes_mask, torch.Tensor): + boxes_mask = boxes_mask.cpu().detach().numpy() + + boxes = [] + labels = [] + for b in range(boxes_mask.shape[0]): + fg_indices = np.nonzero(boxes_mask[b,...]-bg_label) + if fg_indices[0].shape[0] == 0: + continue + boxes_b = [] + for fd_i in fg_indices: + boxes_b.append(min(fd_i)) # top left corner + for fd_i in fg_indices: + boxes_b.append(max(fd_i)+1) # bottom right corner + if spatial_dims == 2: + labels.append(boxes_mask[b,boxes_b[0],boxes_b[1]]) + if spatial_dims == 3: + labels.append(boxes_mask[b,boxes_b[0],boxes_b[1],boxes_b[2]]) + boxes.append(boxes_b) + + if len(boxes) == 0: + return np.zeros([0,2*spatial_dims]), np.zeros([0]) + return np.asarray(boxes),np.asarray(labels) From c642be0b56350840666c1fadd2a6d9ece24b3a7c Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:05:27 -0400 Subject: [PATCH 02/18] add box mask transform Signed-off-by: Can Zhao --- monai/apps/detection/transforms/array.py | 73 +++++++- monai/apps/detection/transforms/box_ops.py | 167 +++++++++++------- monai/apps/detection/transforms/dictionary.py | 131 +++++++++++++- tests/test_box_transform.py | 44 ++++- 4 files changed, 340 insertions(+), 75 deletions(-) diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py index b2587a213a..49ac70ea54 100644 --- a/monai/apps/detection/transforms/array.py +++ b/monai/apps/detection/transforms/array.py @@ -32,7 +32,14 @@ from monai.utils.enums import TransformBackends from monai.utils.type_conversion import convert_data_type, convert_to_dst_type -from .box_ops import apply_affine_to_boxes, flip_boxes, resize_boxes, zoom_boxes +from .box_ops import ( + apply_affine_to_boxes, + convert_boxes_to_masks, + convert_masks_to_boxes, + flip_boxes, + resize_boxes, + zoom_boxes, +) __all__ = [ "ConvertBoxToStandardMode", @@ -42,6 +49,7 @@ "ResizeBox", "FlipBox", "ClipBoxToImage", + "BoxToBoxMask", ] @@ -373,3 +381,66 @@ def __call__( # type: ignore labels_t = deepcopy(labels_t[keep_t, ...]) labels_clip_list.append(convert_to_dst_type(src=labels_t, dst=labels_tuple[i])[0]) return boxes_clip, tuple(labels_clip_list) + + +class BoxToBoxMask(Transform): + """ + Convert box to int16 mask image, which has the same size with the input image. + + Args: + bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels. + ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D). + """ + backend = [TransformBackends.NUMPY] + + def __init__(self, bg_label: int = -1, ellipse_mask: bool = False) -> None: + self.bg_label = bg_label + self.ellipse_mask = ellipse_mask + + def __call__( # type: ignore + self, boxes: NdarrayOrTensor, labels: NdarrayOrTensor, spatial_size: Union[Sequence[int], int] + ) -> NdarrayOrTensor: + """ + Args: + boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``. + labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,). + spatial_size: image spatial size. + + Return: + - int16 array, sized (num_box, H, W). Each channel represents a box. + The foreground region in channel c has intensity of labels[c]. + The background intensity is bg_label. + """ + return convert_boxes_to_masks(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask) + + +class BoxMaskToBox(Transform): + """ + Convert int16 mask image to box, which has the same size with the input image. + Pairs with :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`. + Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. + + Args: + bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels. + box_dtype: output dtype for boxes + label_dtype: output dtype for labels + """ + backend = [TransformBackends.NUMPY] + + def __init__(self, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long) -> None: + self.bg_label = bg_label + self.box_dtype = box_dtype + self.label_dtype = label_dtype + + def __call__(self, boxes_mask: NdarrayOrTensor) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]: + """ + Args: + boxes_mask: int16 array, sized (num_box, H, W). Each channel represents a box. + The foreground region in channel c has intensity of labels[c]. + The background intensity is bg_label. + + Return: + - bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``. + - classification foreground(fg) labels, dtype should be int, sized (N,). + """ + return convert_masks_to_boxes(boxes_mask, self.bg_label, self.box_dtype, self.label_dtype) diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index dcc38401ff..37728ef07e 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -10,14 +10,16 @@ # limitations under the License. from copy import deepcopy -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Tuple, Union +import numpy as np import torch +from scipy.ndimage import zoom as scipy_zoom -from monai.utils import look_up_option from monai.config.type_definitions import NdarrayOrTensor from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims from monai.transforms.utils import create_scale +from monai.utils import look_up_option from monai.utils.misc import ensure_tuple, ensure_tuple_rep from monai.utils.type_conversion import convert_data_type, convert_to_dst_type @@ -188,106 +190,139 @@ def flip_boxes( return flip_boxes -def convert_boxes_to_mask( - boxes: NdarrayOrTensor, + +def convert_boxes_to_masks( + boxes: NdarrayOrTensor, + labels: NdarrayOrTensor, spatial_size: Union[Sequence[int], int], - labels: Union[Sequence[int], torch.Tensor, np.ndarray], - bg_label: int = -1, - ellipse_mask: bool = False - ) -> NdarrayOrTensor: + bg_label: int = -1, + ellipse_mask: bool = False, +) -> NdarrayOrTensor: """ - Convert box to int8 image, which has the same size with the input image, - Each channel represents one box. The box region will have intensity of labels, the background intensity is bg_label - Box mask may take a lot of memory, so we generate box mask as numpy array + Convert box to int16 mask image, which has the same size with the input image. Args: - bg_label: background labels for the output box image, just in case one of the fg labels is 0 + boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``. + labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,). + spatial_size: image spatial size. + bg_label: background labels for the output mask image, make sure it is smaller than any fg labels. + ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D). + + Return: + - int16 array, sized (num_box, H, W). Each channel represents a box. + The foreground region in channel c has intensity of labels[c]. + The background intensity is bg_label. """ spatial_dims: int = get_spatial_dims(boxes=boxes) spatial_size = ensure_tuple_rep(spatial_size, spatial_dims) - labels = box_utils.convert_to_list(labels) # if no box, return empty mask - if len(labels)==0: - return np.ones([1]+spatial_size,dtype=np.int8)*np.int8(bg_label) - + if len(labels) == 0: + boxes_mask_np = np.ones((1,) + spatial_size, dtype=np.int16) * np.int16(bg_label) + boxes_mask, *_ = convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16) + return boxes_mask + + # bg_label should be smaller than labels if bg_label >= min(labels): - raise ValueError(f"bg_label should be smaller than any foreground box labels. min(box_labels)={min(labels)}, while bg_label={bg_label}") + raise ValueError( + f"bg_label should be smaller than any foreground box labels. \ +min(labels)={min(labels)}, while bg_label={bg_label}" + ) - if len(labels) != boxes.shape[0]: + if labels.shape[0] != boxes.shape[0]: raise ValueError("Number of labels should equal to number of boxes.") - - boxes_mask = np.ones([len(labels)]+spatial_size,dtype=np.int8)*np.int8(bg_label) - boxes,_,_ = convert_data_type(boxes,dtype=np.int16) - for b in range(boxes.shape[0]): - # draw a circle/ball mask - box_size = [boxes[b,axis+spatial_dims].item()-boxes[b,axis].item() for axis in range(spatial_dims)] - if self.ellipse_mask: + + # allocate memory for boxes_mask_np + boxes_mask_np = np.ones((labels.shape[0],) + spatial_size, dtype=np.int16) * np.int16(bg_label) + + boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray)[0] + boxes_np = np.round(boxes_np).astype(np.int32) + labels_np, *_ = convert_to_dst_type(src=labels, dst=boxes_np) + for b in range(boxes_np.shape[0]): + # generate a foreground mask + box_size = [boxes_np[b, axis + spatial_dims] - boxes_np[b, axis] for axis in range(spatial_dims)] + if ellipse_mask: + # initialize a square/cube mask max_box_size = max(box_size) - radius = max_box_size/2.0 - center = (max_box_size-1)/2.0 - boxes_only_mask = np.ones([max_box_size]*spatial_dims,dtype=np.int8)*np.int8(bg_label) # a square/cube mask + radius = max_box_size / 2.0 + center = (max_box_size - 1) / 2.0 + boxes_only_mask = np.ones([max_box_size] * spatial_dims, dtype=np.int16) * np.int16(bg_label) + # apply label intensity to circle/ball foreground if spatial_dims == 2: - Y, X = np.ogrid[:max_box_size, :max_box_size] - dist_from_center = (X-center)**2 + (Y-center)**2 + grid_y, grid_x = np.ogrid[:max_box_size, :max_box_size] + dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2 elif spatial_dims == 3: - Y, X, Z = np.ogrid[:max_box_size, :max_box_size, :max_box_size] - dist_from_center = (X-center)**2 + (Y-center)**2+ (Z-center)**2 - boxes_only_mask[dist_from_center <= radius**2] = np.int8(labels[b]) - - # squeeze it to a ellipse/ellipsoid - zoom_factor = [box_size[axis]/float(max_box_size) for axis in range(spatial_dims)] - boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask,zoom=zoom_factor,mode='nearest',prefilter=False) + grid_y, grid_x, grid_z = np.ogrid[:max_box_size, :max_box_size, :max_box_size] + dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2 + (grid_z - center) ** 2 + boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b]) + # squeeze it to a ellipse/ellipsoid mask + zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)] + boxes_only_mask = scipy_zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False) else: - boxes_only_mask = np.ones(box_size,dtype=np.int8)*np.int8(labels[b]) + # generate a rect mask + boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b]) # apply to global mask if spatial_dims == 2: - boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims] ] = boxes_only_mask + boxes_mask_np[ + b, boxes_np[b, 0] : boxes_np[b, spatial_dims], boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims] + ] = boxes_only_mask if spatial_dims == 3: - boxes_mask[b, boxes[b,0]:boxes[b,spatial_dims], boxes[b,1]:boxes[b,1+spatial_dims], boxes[b,2]:boxes[b,2+spatial_dims] ] = boxes_only_mask + boxes_mask_np[ + b, + boxes_np[b, 0] : boxes_np[b, spatial_dims], + boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims], + boxes_np[b, 2] : boxes_np[b, 2 + spatial_dims], + ] = boxes_only_mask - # if isinstance(boxes, torch.Tensor): - # boxes_mask = torch.from_numpy(boxes_mask) + return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0] - return boxes_mask -def convert_mask_to_boxes(boxes_mask: NdarrayOrTensor, bg_label: int = -1) -> NdarrayOrTensor: +def convert_masks_to_boxes( + boxes_mask: NdarrayOrTensor, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long +) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]: """ - Convert binary mask image to box, which has the same size with the input image + Convert int16 mask image to box, which has the same size with the input image Args: - boxes_mask: sized (num_box, H, W) or (num_box, H, W, D) + boxes_mask: int16 array, sized (num_box, H, W). Each channel represents a box. + The foreground region in channel c has intensity of labels[c]. + The background intensity is bg_label. bg_label: background labels for the boxes_mask + box_dtype: output dtype for boxes + label_dtype: output dtype for labels Return: - - boxes - - labelss + - bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``. + - classification foreground(fg) labels, dtype should be int, sized (N,). """ - look_up_option(len(boxes_mask.shape), [3,4]) + look_up_option(len(boxes_mask.shape), [3, 4]) spatial_size = list(boxes_mask.shape[1:]) - spatial_dims = box_utils.get_spatial_dims(spatial_size=spatial_size) - - if isinstance(boxes_mask, torch.Tensor): - boxes_mask = boxes_mask.cpu().detach().numpy() - - boxes = [] - labels = [] - for b in range(boxes_mask.shape[0]): - fg_indices = np.nonzero(boxes_mask[b,...]-bg_label) + spatial_dims = get_spatial_dims(spatial_size=spatial_size) + + boxes_mask_np, *_ = convert_data_type(boxes_mask, np.ndarray) + + boxes_list = [] + labels_list = [] + for b in range(boxes_mask_np.shape[0]): + fg_indices = np.nonzero(boxes_mask_np[b, ...] - bg_label) if fg_indices[0].shape[0] == 0: continue boxes_b = [] for fd_i in fg_indices: - boxes_b.append(min(fd_i)) # top left corner + boxes_b.append(min(fd_i)) # top left corner for fd_i in fg_indices: - boxes_b.append(max(fd_i)+1) # bottom right corner + boxes_b.append(max(fd_i) + 1 - TO_REMOVE) # bottom right corner if spatial_dims == 2: - labels.append(boxes_mask[b,boxes_b[0],boxes_b[1]]) + labels_list.append(boxes_mask_np[b, boxes_b[0], boxes_b[1]]) if spatial_dims == 3: - labels.append(boxes_mask[b,boxes_b[0],boxes_b[1],boxes_b[2]]) - boxes.append(boxes_b) + labels_list.append(boxes_mask_np[b, boxes_b[0], boxes_b[1], boxes_b[2]]) + boxes_list.append(boxes_b) - if len(boxes) == 0: - return np.zeros([0,2*spatial_dims]), np.zeros([0]) - return np.asarray(boxes),np.asarray(labels) + if len(boxes_list) == 0: + boxes_np, labels_np = np.zeros([0, 2 * spatial_dims]), np.zeros([0]) + else: + boxes_np, labels_np = np.asarray(boxes_list), np.asarray(labels_list) + boxes, *_ = convert_to_dst_type(src=boxes_np, dst=boxes_mask, dtype=box_dtype) + labels, *_ = convert_to_dst_type(src=labels_np, dst=boxes_mask, dtype=label_dtype) + return boxes, labels diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index b802ebcfe2..a5b50e1557 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -24,6 +24,8 @@ from monai.apps.detection.transforms.array import ( AffineBox, + BoxMaskToBox, + BoxToBoxMask, ClipBoxToImage, ConvertBoxMode, ConvertBoxToStandardMode, @@ -32,7 +34,7 @@ ) from monai.config import KeysCollection from monai.config.type_definitions import NdarrayOrTensor -from monai.data.box_utils import BoxMode +from monai.data.box_utils import COMPUTE_DTYPE, BoxMode from monai.data.utils import orientation_ras_lps from monai.transforms import Flip, RandFlip, RandZoom, SpatialPad, Zoom from monai.transforms.inverse import InvertibleTransform @@ -66,6 +68,12 @@ "ClipBoxToImaged", "ClipBoxToImageD", "ClipBoxToImageDict", + "BoxToBoxMaskd", + "BoxToBoxMaskD", + "BoxToBoxMaskDict", + "BoxMaskToBoxd", + "BoxMaskToBoxD", + "BoxMaskToBoxDict" ] DEFAULT_POST_FIX = PostFix.meta() @@ -246,7 +254,8 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N # when convert boxes from world coordinate to image coordinate, # we apply inverse affine transform affine_t, *_ = convert_data_type(affine, torch.Tensor) - inv_affine_t = torch.inverse(affine_t) + # torch.inverse should not run in half precision + inv_affine_t = torch.inverse(affine_t.to(COMPUTE_DTYPE)) for key in self.key_iterator(d): self.push_transform(d, key, extra_info={"affine": affine}) @@ -758,6 +767,122 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N return d +class BoxToBoxMaskd(MapTransform): + """ + Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`. + Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxMaskToBoxd` . + Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. + The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border. + + Args: + box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. + box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``. + label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``. + box_ref_image_keys: Keys that represents the reference images to which ``box_keys`` are attached. + min_fg_label: min foreground box label. + ellipse_mask: bool. + If True, it assumes the object shape is close to ellipse or ellipsoid. + If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. + If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True + See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. + allow_missing_keys: don't raise exception if key is missing. + + Example: + .. code-block:: python + + BoxToBoxMaskd( + box_keys="boxes", box_mask_keys="box_mask", + box_ref_image_keys="image", label_keys="labels", + min_fg_label=0, ellipse_mask=True + ) + """ + + def __init__( + self, + box_keys: KeysCollection, + box_mask_keys: KeysCollection, + label_keys: KeysCollection, + box_ref_image_keys: KeysCollection, + min_fg_label: int, + ellipse_mask: bool = False, + allow_missing_keys: bool = False, + ) -> None: + super().__init__(box_keys, allow_missing_keys) + self.box_keys = ensure_tuple(box_keys) + self.label_keys = ensure_tuple(label_keys) + self.box_mask_keys = ensure_tuple(box_mask_keys) + if not len(self.label_keys) == len(self.box_keys) == len(self.box_mask_keys): + raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!") + self.box_ref_image_keys = ensure_tuple_rep(box_ref_image_keys, len(self.box_keys)) + self.bg_label = min_fg_label - 1 # make sure background label is always smaller than fg labels. + self.converter = BoxToBoxMask(bg_label=self.bg_label, ellipse_mask=ellipse_mask) + + def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]: + d = dict(data) + + for box_key, label_key, box_mask_key, box_ref_image_key in zip( + self.box_keys, self.label_keys, self.box_mask_keys, self.box_ref_image_keys + ): + spatial_size = d[box_ref_image_key].shape[1:] + d[box_mask_key] = self.converter(d[box_key], d[label_key], spatial_size) + # make box mask background intensity to be 0, since the following operations may pad 0 on the border. + d[box_mask_key] -= self.bg_label + return d + + +class BoxMaskToBoxd(MapTransform): + """ + Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxMaskToBox`. + Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` . + Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. + + Args: + box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. + box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``. + label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``. + min_fg_label: min foreground box label. + box_dtype: output dtype for box_keys + label_dtype: output dtype for label_keys + allow_missing_keys: don't raise exception if key is missing. + + Example: + .. code-block:: python + + BoxMaskToBoxd( + box_keys="boxes", box_mask_keys="box_mask", + label_keys="labels", min_fg_label=0 + ) + """ + + def __init__( + self, + box_keys: KeysCollection, + box_mask_keys: KeysCollection, + label_keys: KeysCollection, + min_fg_label: int, + box_dtype=torch.float32, + label_dtype=torch.long, + allow_missing_keys: bool = False, + ) -> None: + super().__init__(box_keys, allow_missing_keys) + self.box_keys = ensure_tuple(box_keys) + self.label_keys = ensure_tuple(label_keys) + self.box_mask_keys = ensure_tuple(box_mask_keys) + if not len(self.label_keys) == len(self.box_keys) == len(self.box_mask_keys): + raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!") + self.bg_label = min_fg_label - 1 # make sure background label is always smaller than fg labels. + self.converter = BoxMaskToBox(bg_label=self.bg_label, box_dtype=box_dtype, label_dtype=label_dtype) + self.box_dtype = box_dtype + + def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]: + d = dict(data) + + for box_key, label_key, box_mask_key in zip(self.box_keys, self.label_keys, self.box_mask_keys): + d[box_mask_key] += self.bg_label # pairs with the operation in BoxToBoxMaskd + d[box_key], d[label_key] = self.converter(d[box_mask_key]) + return d + + ConvertBoxModeD = ConvertBoxModeDict = ConvertBoxModed ConvertBoxToStandardModeD = ConvertBoxToStandardModeDict = ConvertBoxToStandardModed ZoomBoxD = ZoomBoxDict = ZoomBoxd @@ -766,3 +891,5 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N FlipBoxD = FlipBoxDict = FlipBoxd RandFlipBoxD = RandFlipBoxDict = RandFlipBoxd ClipBoxToImageD = ClipBoxToImageDict = ClipBoxToImaged +BoxToBoxMaskD = BoxToBoxMaskDict = BoxToBoxMaskd +BoxMaskToBoxD = BoxMaskToBoxDict = BoxMaskToBoxd diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py index f290ce5726..86f666e5f9 100644 --- a/tests/test_box_transform.py +++ b/tests/test_box_transform.py @@ -17,6 +17,8 @@ from monai.apps.detection.transforms.dictionary import ( AffineBoxToImageCoordinated, + BoxMaskToBoxd, + BoxToBoxMaskd, ClipBoxToImaged, ConvertBoxModed, FlipBoxd, @@ -27,8 +29,7 @@ from monai.transforms import CastToTyped, Invertd from tests.utils import TEST_NDARRAYS, assert_allclose -TESTS = [] - +TESTS_3D = [] boxes = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 3, 3], [0, 1, 1, 2, 3, 4]] labels = [1, 1, 0] scores = [[0.2, 0.8], [0.3, 0.7], [0.6, 0.4]] @@ -36,7 +37,7 @@ image = np.zeros(image_size) for p in TEST_NDARRAYS: - TESTS.append( + TESTS_3D.append( [ {"box_keys": "boxes", "dst_mode": "xyzwhd"}, {"boxes": p(boxes), "image": p(image), "labels": p(labels), "scores": p(scores)}, @@ -48,10 +49,41 @@ ] ) +TESTS_2D = [] +boxes = [[0, 1, 2, 2], [0, 0, 1, 1]] +labels = [1, 0] +image_size = [1, 2, 2] +image = np.zeros(image_size) +for p in TEST_NDARRAYS: + TESTS_2D.append( + [{"boxes": p(boxes), "image": p(image), "labels": p(labels)}, p([[[0, 2], [0, 2]], [[1, 0], [0, 0]]])] + ) + class TestBoxTransform(unittest.TestCase): - @parameterized.expand(TESTS) - def test_value( + @parameterized.expand(TESTS_2D) + def test_value_2d(self, data, expected_mask): + test_dtype = [torch.float32, torch.float16] + for dtype in test_dtype: + data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data) + transform_to_mask = BoxToBoxMaskd( + box_keys="boxes", + box_mask_keys="box_mask", + box_ref_image_keys="image", + label_keys="labels", + min_fg_label=0, + ellipse_mask=True, + ) + transform_to_box = BoxMaskToBoxd( + box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0 + ) + data_mask = transform_to_mask(data) + assert_allclose(data_mask["box_mask"], expected_mask, type_test=True, device_test=True, atol=1e-3) + data_back = transform_to_box(data_mask) + assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3) + + @parameterized.expand(TESTS_3D) + def test_value_3d( self, keys, data, @@ -61,7 +93,7 @@ def test_value( expected_flip_result, expected_clip_result, ): - test_dtype = [torch.float32] + test_dtype = [torch.float16] for dtype in test_dtype: data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data) # test ConvertBoxToStandardModed From 93f584479d6e132cc7bee4781c25ff16841c4626 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:21:21 -0400 Subject: [PATCH 03/18] add examples Signed-off-by: Can Zhao --- monai/apps/detection/transforms/dictionary.py | 60 ++++++++++++++++--- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index a5b50e1557..056f4aed19 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -774,6 +774,12 @@ class BoxToBoxMaskd(MapTransform): Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border. + This is the general solution for transforms that need to be applied on images and boxes simultaneously. + It is performed with the following steps. + 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; + 2) do transforms, e.g., rotation or cropping, on images and box_masks together; + 3) use BoxMaskToBoxd to convert box_masks back to boxes and labels. + Args: box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``. @@ -790,11 +796,28 @@ class BoxToBoxMaskd(MapTransform): Example: .. code-block:: python - BoxToBoxMaskd( - box_keys="boxes", box_mask_keys="box_mask", - box_ref_image_keys="image", label_keys="labels", - min_fg_label=0, ellipse_mask=True + # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together. + import numpy as np + from monai.transforms import Compose, RandRotated, RandSpatialCropd + transforms = Compose( + [ + BoxToBoxMaskd( + box_keys="boxes", label_keys="labels", + box_mask_keys="box_mask", box_ref_image_keys="image", + min_fg_label=0, ellipse_mask=True + ), + RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"], + prob=0.2,range_x=np.pi/6,range_y=np.pi/6,range_z=np.pi/6, + keep_size=True,padding_mode="zeros" + ), + RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False), + BoxMaskToBoxd( + box_mask_keys="box_mask", box_keys="boxes", + label_keys="labels", min_fg_label=0 + ) + ] ) + """ def __init__( @@ -836,6 +859,12 @@ class BoxMaskToBoxd(MapTransform): Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` . Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. + This is the general solution for transforms that need to be applied on images and boxes simultaneously. + It is performed with the following steps. + 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; + 2) do transforms, e.g., rotation or cropping, on images and box_masks together; + 3) use BoxMaskToBoxd to convert box_masks back to boxes and labels. + Args: box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``. @@ -848,9 +877,26 @@ class BoxMaskToBoxd(MapTransform): Example: .. code-block:: python - BoxMaskToBoxd( - box_keys="boxes", box_mask_keys="box_mask", - label_keys="labels", min_fg_label=0 + # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together. + import numpy as np + from monai.transforms import Compose, RandRotated, RandSpatialCropd + transforms = Compose( + [ + BoxToBoxMaskd( + box_keys="boxes", label_keys="labels", + box_mask_keys="box_mask", box_ref_image_keys="image", + min_fg_label=0, ellipse_mask=True + ), + RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"], + prob=0.2,range_x=np.pi/6,range_y=np.pi/6,range_z=np.pi/6, + keep_size=True,padding_mode="zeros" + ), + RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False), + BoxMaskToBoxd( + box_mask_keys="box_mask", box_keys="boxes", + label_keys="labels", min_fg_label=0 + ) + ] ) """ From 0e97bf65b11ec72322c466647d9ed8f27ab50415 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 May 2022 04:21:50 +0000 Subject: [PATCH 04/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/apps/detection/transforms/dictionary.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index 056f4aed19..454a1ec8d8 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -774,9 +774,9 @@ class BoxToBoxMaskd(MapTransform): Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border. - This is the general solution for transforms that need to be applied on images and boxes simultaneously. - It is performed with the following steps. - 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; + This is the general solution for transforms that need to be applied on images and boxes simultaneously. + It is performed with the following steps. + 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; 2) do transforms, e.g., rotation or cropping, on images and box_masks together; 3) use BoxMaskToBoxd to convert box_masks back to boxes and labels. @@ -803,7 +803,7 @@ class BoxToBoxMaskd(MapTransform): [ BoxToBoxMaskd( box_keys="boxes", label_keys="labels", - box_mask_keys="box_mask", box_ref_image_keys="image", + box_mask_keys="box_mask", box_ref_image_keys="image", min_fg_label=0, ellipse_mask=True ), RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"], @@ -812,12 +812,12 @@ class BoxToBoxMaskd(MapTransform): ), RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False), BoxMaskToBoxd( - box_mask_keys="box_mask", box_keys="boxes", + box_mask_keys="box_mask", box_keys="boxes", label_keys="labels", min_fg_label=0 ) ] ) - + """ def __init__( @@ -859,9 +859,9 @@ class BoxMaskToBoxd(MapTransform): Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` . Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. - This is the general solution for transforms that need to be applied on images and boxes simultaneously. - It is performed with the following steps. - 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; + This is the general solution for transforms that need to be applied on images and boxes simultaneously. + It is performed with the following steps. + 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; 2) do transforms, e.g., rotation or cropping, on images and box_masks together; 3) use BoxMaskToBoxd to convert box_masks back to boxes and labels. @@ -884,7 +884,7 @@ class BoxMaskToBoxd(MapTransform): [ BoxToBoxMaskd( box_keys="boxes", label_keys="labels", - box_mask_keys="box_mask", box_ref_image_keys="image", + box_mask_keys="box_mask", box_ref_image_keys="image", min_fg_label=0, ellipse_mask=True ), RandRotated(keys=["image","box_mask"],mode=["nearest","nearest"], @@ -893,7 +893,7 @@ class BoxMaskToBoxd(MapTransform): ), RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False), BoxMaskToBoxd( - box_mask_keys="box_mask", box_keys="boxes", + box_mask_keys="box_mask", box_keys="boxes", label_keys="labels", min_fg_label=0 ) ] From cc6a6833fb9d0221e217aaf229a8baa5f3678b61 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:22:20 -0400 Subject: [PATCH 05/18] add examples Signed-off-by: Can Zhao --- monai/apps/detection/transforms/dictionary.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index 056f4aed19..bf1276b858 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -796,9 +796,9 @@ class BoxToBoxMaskd(MapTransform): Example: .. code-block:: python - # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together. + # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and image together. import numpy as np - from monai.transforms import Compose, RandRotated, RandSpatialCropd + from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd transforms = Compose( [ BoxToBoxMaskd( @@ -815,6 +815,7 @@ class BoxToBoxMaskd(MapTransform): box_mask_keys="box_mask", box_keys="boxes", label_keys="labels", min_fg_label=0 ) + DeleteItemsd(keys=["box_mask"]), ] ) @@ -879,7 +880,7 @@ class BoxMaskToBoxd(MapTransform): # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together. import numpy as np - from monai.transforms import Compose, RandRotated, RandSpatialCropd + from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd transforms = Compose( [ BoxToBoxMaskd( @@ -896,6 +897,7 @@ class BoxMaskToBoxd(MapTransform): box_mask_keys="box_mask", box_keys="boxes", label_keys="labels", min_fg_label=0 ) + DeleteItemsd(keys=["box_mask"]), ] ) """ From 091401e797909671d8f3f9a8e03c0a199086b429 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:33:54 -0400 Subject: [PATCH 06/18] add import Signed-off-by: Can Zhao --- monai/apps/detection/transforms/array.py | 2 ++ monai/apps/detection/transforms/box_ops.py | 7 ++++--- monai/apps/detection/transforms/dictionary.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py index 49ac70ea54..a44b2f5d52 100644 --- a/monai/apps/detection/transforms/array.py +++ b/monai/apps/detection/transforms/array.py @@ -391,6 +391,7 @@ class BoxToBoxMask(Transform): bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels. ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D). """ + backend = [TransformBackends.NUMPY] def __init__(self, bg_label: int = -1, ellipse_mask: bool = False) -> None: @@ -425,6 +426,7 @@ class BoxMaskToBox(Transform): box_dtype: output dtype for boxes label_dtype: output dtype for labels """ + backend = [TransformBackends.NUMPY] def __init__(self, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long) -> None: diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index 37728ef07e..e5606cd3af 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -14,15 +14,16 @@ import numpy as np import torch -from scipy.ndimage import zoom as scipy_zoom from monai.config.type_definitions import NdarrayOrTensor from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims from monai.transforms.utils import create_scale -from monai.utils import look_up_option +from monai.utils import look_up_option, optional_import from monai.utils.misc import ensure_tuple, ensure_tuple_rep from monai.utils.type_conversion import convert_data_type, convert_to_dst_type +scipy_ndimage, _ = optional_import("scipy.ndimage") + def _apply_affine_to_points(points: torch.Tensor, affine: torch.Tensor, include_shift: bool = True) -> torch.Tensor: """ @@ -257,7 +258,7 @@ def convert_boxes_to_masks( boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b]) # squeeze it to a ellipse/ellipsoid mask zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)] - boxes_only_mask = scipy_zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False) + boxes_only_mask = scipy_ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False) else: # generate a rect mask boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b]) diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index 1cdb4dec55..39aebe5bee 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -73,7 +73,7 @@ "BoxToBoxMaskDict", "BoxMaskToBoxd", "BoxMaskToBoxD", - "BoxMaskToBoxDict" + "BoxMaskToBoxDict", ] DEFAULT_POST_FIX = PostFix.meta() From 49399e3a127034ef836147cbeb1d6181148a45b7 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:35:25 -0400 Subject: [PATCH 07/18] add import Signed-off-by: Can Zhao --- monai/apps/detection/transforms/array.py | 1 + 1 file changed, 1 insertion(+) diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py index a44b2f5d52..66388a6df2 100644 --- a/monai/apps/detection/transforms/array.py +++ b/monai/apps/detection/transforms/array.py @@ -50,6 +50,7 @@ "FlipBox", "ClipBoxToImage", "BoxToBoxMask", + "BoxMaskToBox", ] From 3ff7f2e3f9b30bb9dbc1f6b699603e72fe74d0b6 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:37:26 -0400 Subject: [PATCH 08/18] add docstring Signed-off-by: Can Zhao --- monai/apps/detection/transforms/array.py | 6 +++++- monai/apps/detection/transforms/box_ops.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py index 66388a6df2..1eda277fbd 100644 --- a/monai/apps/detection/transforms/array.py +++ b/monai/apps/detection/transforms/array.py @@ -390,7 +390,11 @@ class BoxToBoxMask(Transform): Args: bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels. - ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D). + ellipse_mask: bool. + If True, it assumes the object shape is close to ellipse or ellipsoid. + If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. + If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True + See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. """ backend = [TransformBackends.NUMPY] diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index e5606cd3af..adad36db1d 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -207,7 +207,11 @@ def convert_boxes_to_masks( labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,). spatial_size: image spatial size. bg_label: background labels for the output mask image, make sure it is smaller than any fg labels. - ellipse_mask: whether to make the foregound mask an ellipse (2D) or ellipsoid (3D). + ellipse_mask: bool. + If True, it assumes the object shape is close to ellipse or ellipsoid. + If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. + If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True + See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. Return: - int16 array, sized (num_box, H, W). Each channel represents a box. From c5199be64866180a01c4733437102874cda4e866 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:39:34 -0400 Subject: [PATCH 09/18] update test Signed-off-by: Can Zhao --- tests/test_box_transform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py index 86f666e5f9..a37be6a2de 100644 --- a/tests/test_box_transform.py +++ b/tests/test_box_transform.py @@ -81,6 +81,7 @@ def test_value_2d(self, data, expected_mask): assert_allclose(data_mask["box_mask"], expected_mask, type_test=True, device_test=True, atol=1e-3) data_back = transform_to_box(data_mask) assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3) + assert_allclose(data_back["labels"], data["labels"], type_test=False, device_test=False, atol=1e-3) @parameterized.expand(TESTS_3D) def test_value_3d( From 38eadb737dacd5e8e521880f79e7747feb5c7b3a Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 00:48:39 -0400 Subject: [PATCH 10/18] update import Signed-off-by: Can Zhao --- monai/apps/detection/transforms/box_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index adad36db1d..7865cb216b 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -22,7 +22,7 @@ from monai.utils.misc import ensure_tuple, ensure_tuple_rep from monai.utils.type_conversion import convert_data_type, convert_to_dst_type -scipy_ndimage, _ = optional_import("scipy.ndimage") +scipy, _ = optional_import("scipy") def _apply_affine_to_points(points: torch.Tensor, affine: torch.Tensor, include_shift: bool = True) -> torch.Tensor: @@ -262,7 +262,7 @@ def convert_boxes_to_masks( boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b]) # squeeze it to a ellipse/ellipsoid mask zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)] - boxes_only_mask = scipy_ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False) + boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False) else: # generate a rect mask boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b]) From 71589a13ea675f62d7b494fbf6fd251573259535 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 01:05:26 -0400 Subject: [PATCH 11/18] update test Signed-off-by: Can Zhao --- monai/apps/detection/transforms/box_ops.py | 1 + tests/test_box_transform.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index 7865cb216b..c962b8f8cc 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -262,6 +262,7 @@ def convert_boxes_to_masks( boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b]) # squeeze it to a ellipse/ellipsoid mask zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)] + # scipy zoom does not support float16 cpu boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False) else: # generate a rect mask diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py index a37be6a2de..4752aaf732 100644 --- a/tests/test_box_transform.py +++ b/tests/test_box_transform.py @@ -94,7 +94,7 @@ def test_value_3d( expected_flip_result, expected_clip_result, ): - test_dtype = [torch.float16] + test_dtype = [torch.float32] for dtype in test_dtype: data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data) # test ConvertBoxToStandardModed From 9d3ea7bed67dea5d4d2c239a82f330de9bbd570a Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 16:30:03 -0400 Subject: [PATCH 12/18] change func name Signed-off-by: Can Zhao --- monai/apps/detection/transforms/dictionary.py | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index 39aebe5bee..e765939846 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -24,8 +24,8 @@ from monai.apps.detection.transforms.array import ( AffineBox, - BoxMaskToBox, - BoxToBoxMask, + MaskToBox, + BoxToMask, ClipBoxToImage, ConvertBoxMode, ConvertBoxToStandardMode, @@ -68,12 +68,12 @@ "ClipBoxToImaged", "ClipBoxToImageD", "ClipBoxToImageDict", - "BoxToBoxMaskd", - "BoxToBoxMaskD", - "BoxToBoxMaskDict", - "BoxMaskToBoxd", - "BoxMaskToBoxD", - "BoxMaskToBoxDict", + "BoxToMaskd", + "BoxToMaskD", + "BoxToMaskDict", + "MaskToBoxd", + "MaskToBoxD", + "MaskToBoxDict", ] DEFAULT_POST_FIX = PostFix.meta() @@ -767,18 +767,18 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N return d -class BoxToBoxMaskd(MapTransform): +class BoxToMaskd(MapTransform): """ - Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`. - Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxMaskToBoxd` . + Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToMask`. + Pairs with :py:class:`monai.apps.detection.transforms.dictionary.MaskToBoxd` . Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border. This is the general solution for transforms that need to be applied on images and boxes simultaneously. It is performed with the following steps. - 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; + 1) use BoxToMaskd to covert boxes and labels to box_masks; 2) do transforms, e.g., rotation or cropping, on images and box_masks together; - 3) use BoxMaskToBoxd to convert box_masks back to boxes and labels. + 3) use MaskToBoxd to convert box_masks back to boxes and labels. Args: box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. @@ -801,7 +801,7 @@ class BoxToBoxMaskd(MapTransform): from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd transforms = Compose( [ - BoxToBoxMaskd( + BoxToMaskd( box_keys="boxes", label_keys="labels", box_mask_keys="box_mask", box_ref_image_keys="image", min_fg_label=0, ellipse_mask=True @@ -811,7 +811,7 @@ class BoxToBoxMaskd(MapTransform): keep_size=True,padding_mode="zeros" ), RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False), - BoxMaskToBoxd( + MaskToBoxd( box_mask_keys="box_mask", box_keys="boxes", label_keys="labels", min_fg_label=0 ) @@ -839,7 +839,7 @@ def __init__( raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!") self.box_ref_image_keys = ensure_tuple_rep(box_ref_image_keys, len(self.box_keys)) self.bg_label = min_fg_label - 1 # make sure background label is always smaller than fg labels. - self.converter = BoxToBoxMask(bg_label=self.bg_label, ellipse_mask=ellipse_mask) + self.converter = BoxToMask(bg_label=self.bg_label, ellipse_mask=ellipse_mask) def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]: d = dict(data) @@ -854,17 +854,17 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N return d -class BoxMaskToBoxd(MapTransform): +class MaskToBoxd(MapTransform): """ - Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxMaskToBox`. - Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToBoxMaskd` . + Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.MaskToBox`. + Pairs with :py:class:`monai.apps.detection.transforms.dictionary.BoxToMaskd` . Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. This is the general solution for transforms that need to be applied on images and boxes simultaneously. It is performed with the following steps. - 1) use BoxToBoxMaskd to covert boxes and labels to box_masks; + 1) use BoxToMaskd to covert boxes and labels to box_masks; 2) do transforms, e.g., rotation or cropping, on images and box_masks together; - 3) use BoxMaskToBoxd to convert box_masks back to boxes and labels. + 3) use MaskToBoxd to convert box_masks back to boxes and labels. Args: box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. @@ -883,7 +883,7 @@ class BoxMaskToBoxd(MapTransform): from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd transforms = Compose( [ - BoxToBoxMaskd( + BoxToMaskd( box_keys="boxes", label_keys="labels", box_mask_keys="box_mask", box_ref_image_keys="image", min_fg_label=0, ellipse_mask=True @@ -893,7 +893,7 @@ class BoxMaskToBoxd(MapTransform): keep_size=True,padding_mode="zeros" ), RandSpatialCropd(keys=["image","box_mask"],roi_size=128, random_size=False), - BoxMaskToBoxd( + MaskToBoxd( box_mask_keys="box_mask", box_keys="boxes", label_keys="labels", min_fg_label=0 ) @@ -919,14 +919,14 @@ def __init__( if not len(self.label_keys) == len(self.box_keys) == len(self.box_mask_keys): raise ValueError("Please make sure len(label_keys)==len(box_keys)==len(box_mask_keys)!") self.bg_label = min_fg_label - 1 # make sure background label is always smaller than fg labels. - self.converter = BoxMaskToBox(bg_label=self.bg_label, box_dtype=box_dtype, label_dtype=label_dtype) + self.converter = MaskToBox(bg_label=self.bg_label, box_dtype=box_dtype, label_dtype=label_dtype) self.box_dtype = box_dtype def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]: d = dict(data) for box_key, label_key, box_mask_key in zip(self.box_keys, self.label_keys, self.box_mask_keys): - d[box_mask_key] += self.bg_label # pairs with the operation in BoxToBoxMaskd + d[box_mask_key] += self.bg_label # pairs with the operation in BoxToMaskd d[box_key], d[label_key] = self.converter(d[box_mask_key]) return d @@ -939,5 +939,5 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N FlipBoxD = FlipBoxDict = FlipBoxd RandFlipBoxD = RandFlipBoxDict = RandFlipBoxd ClipBoxToImageD = ClipBoxToImageDict = ClipBoxToImaged -BoxToBoxMaskD = BoxToBoxMaskDict = BoxToBoxMaskd -BoxMaskToBoxD = BoxMaskToBoxDict = BoxMaskToBoxd +BoxToMaskD = BoxToMaskDict = BoxToMaskd +MaskToBoxD = MaskToBoxDict = MaskToBoxd From dddbe57d40365de2e1074c781f9e435d7447c27d Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 16:30:50 -0400 Subject: [PATCH 13/18] change func name Signed-off-by: Can Zhao --- tests/test_box_transform.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py index 4752aaf732..0cb529ff9e 100644 --- a/tests/test_box_transform.py +++ b/tests/test_box_transform.py @@ -17,8 +17,8 @@ from monai.apps.detection.transforms.dictionary import ( AffineBoxToImageCoordinated, - BoxMaskToBoxd, - BoxToBoxMaskd, + MaskToBoxd, + BoxToMaskd, ClipBoxToImaged, ConvertBoxModed, FlipBoxd, @@ -66,7 +66,7 @@ def test_value_2d(self, data, expected_mask): test_dtype = [torch.float32, torch.float16] for dtype in test_dtype: data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data) - transform_to_mask = BoxToBoxMaskd( + transform_to_mask = BoxToMaskd( box_keys="boxes", box_mask_keys="box_mask", box_ref_image_keys="image", @@ -74,7 +74,7 @@ def test_value_2d(self, data, expected_mask): min_fg_label=0, ellipse_mask=True, ) - transform_to_box = BoxMaskToBoxd( + transform_to_box = MaskToBoxd( box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0 ) data_mask = transform_to_mask(data) From 7460d05a5d14b89e449d6762343e2e3ccad2b73e Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 16:31:42 -0400 Subject: [PATCH 14/18] change func name Signed-off-by: Can Zhao --- monai/apps/detection/transforms/array.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py index 1eda277fbd..7165b86b6d 100644 --- a/monai/apps/detection/transforms/array.py +++ b/monai/apps/detection/transforms/array.py @@ -49,8 +49,8 @@ "ResizeBox", "FlipBox", "ClipBoxToImage", - "BoxToBoxMask", - "BoxMaskToBox", + "BoxToMask", + "MaskToBox", ] @@ -384,7 +384,7 @@ def __call__( # type: ignore return boxes_clip, tuple(labels_clip_list) -class BoxToBoxMask(Transform): +class BoxToMask(Transform): """ Convert box to int16 mask image, which has the same size with the input image. @@ -420,10 +420,10 @@ def __call__( # type: ignore return convert_boxes_to_masks(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask) -class BoxMaskToBox(Transform): +class MaskToBox(Transform): """ Convert int16 mask image to box, which has the same size with the input image. - Pairs with :py:class:`monai.apps.detection.transforms.array.BoxToBoxMask`. + Pairs with :py:class:`monai.apps.detection.transforms.array.BoxToMask`. Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. Args: From 19fa66112a3fa66ede373b677e14a3acbf35e188 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 16:33:04 -0400 Subject: [PATCH 15/18] change func name Signed-off-by: Can Zhao --- monai/apps/detection/transforms/array.py | 8 ++++---- monai/apps/detection/transforms/box_ops.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py index 7165b86b6d..1c57db79f3 100644 --- a/monai/apps/detection/transforms/array.py +++ b/monai/apps/detection/transforms/array.py @@ -34,8 +34,8 @@ from .box_ops import ( apply_affine_to_boxes, - convert_boxes_to_masks, - convert_masks_to_boxes, + convert_box_to_mask, + convert_mask_to_box, flip_boxes, resize_boxes, zoom_boxes, @@ -417,7 +417,7 @@ def __call__( # type: ignore The foreground region in channel c has intensity of labels[c]. The background intensity is bg_label. """ - return convert_boxes_to_masks(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask) + return convert_box_to_mask(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask) class MaskToBox(Transform): @@ -450,4 +450,4 @@ def __call__(self, boxes_mask: NdarrayOrTensor) -> Tuple[NdarrayOrTensor, Ndarra - bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``. - classification foreground(fg) labels, dtype should be int, sized (N,). """ - return convert_masks_to_boxes(boxes_mask, self.bg_label, self.box_dtype, self.label_dtype) + return convert_mask_to_box(boxes_mask, self.bg_label, self.box_dtype, self.label_dtype) diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index c962b8f8cc..b149be5468 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -192,7 +192,7 @@ def flip_boxes( return flip_boxes -def convert_boxes_to_masks( +def convert_box_to_mask( boxes: NdarrayOrTensor, labels: NdarrayOrTensor, spatial_size: Union[Sequence[int], int], @@ -284,7 +284,7 @@ def convert_boxes_to_masks( return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0] -def convert_masks_to_boxes( +def convert_mask_to_box( boxes_mask: NdarrayOrTensor, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long ) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]: """ From 66b94d8a0e8bd304306bcb407eaef189dc65bd06 Mon Sep 17 00:00:00 2001 From: monai-bot Date: Fri, 27 May 2022 22:08:08 +0000 Subject: [PATCH 16/18] [MONAI] code formatting Signed-off-by: monai-bot --- monai/apps/detection/transforms/dictionary.py | 2 +- tests/test_box_transform.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index e765939846..5aeacfd63e 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -24,12 +24,12 @@ from monai.apps.detection.transforms.array import ( AffineBox, - MaskToBox, BoxToMask, ClipBoxToImage, ConvertBoxMode, ConvertBoxToStandardMode, FlipBox, + MaskToBox, ZoomBox, ) from monai.config import KeysCollection diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py index 0cb529ff9e..8dbd446b16 100644 --- a/tests/test_box_transform.py +++ b/tests/test_box_transform.py @@ -17,11 +17,11 @@ from monai.apps.detection.transforms.dictionary import ( AffineBoxToImageCoordinated, - MaskToBoxd, BoxToMaskd, ClipBoxToImaged, ConvertBoxModed, FlipBoxd, + MaskToBoxd, RandFlipBoxd, RandZoomBoxd, ZoomBoxd, From 1d44667c0220b0c8c6c9e0207e79fc6fbed79a65 Mon Sep 17 00:00:00 2001 From: Can Zhao Date: Fri, 27 May 2022 18:17:22 -0400 Subject: [PATCH 17/18] corrent a corner case in NMS Signed-off-by: Can Zhao --- monai/data/box_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py index dfe9f3798b..6bfd89080a 100644 --- a/monai/data/box_utils.py +++ b/monai/data/box_utils.py @@ -1039,7 +1039,7 @@ def non_max_suppression( # returns empty array if boxes is empty if boxes.shape[0] == 0: - return convert_to_dst_type(src=np.array([]), dst=boxes)[0] + return convert_to_dst_type(src=np.array([]), dst=boxes, dtype=torch.long)[0] if boxes.shape[0] != scores.shape[0]: raise ValueError( From 1994175ddcc5d29b41804f1193b3447bc99260bd Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Sat, 28 May 2022 08:57:31 +0100 Subject: [PATCH 18/18] update docstring, simplify convert_box_to_mask, add test cases Signed-off-by: Wenqi Li --- monai/apps/detection/transforms/array.py | 9 +-- monai/apps/detection/transforms/box_ops.py | 56 +++++++------------ monai/apps/detection/transforms/dictionary.py | 34 ++++++----- tests/test_box_transform.py | 26 ++++++++- 4 files changed, 70 insertions(+), 55 deletions(-) diff --git a/monai/apps/detection/transforms/array.py b/monai/apps/detection/transforms/array.py index 1c57db79f3..42aeda71cf 100644 --- a/monai/apps/detection/transforms/array.py +++ b/monai/apps/detection/transforms/array.py @@ -391,10 +391,11 @@ class BoxToMask(Transform): Args: bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels. ellipse_mask: bool. - If True, it assumes the object shape is close to ellipse or ellipsoid. - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True - See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. + + - If True, it assumes the object shape is close to ellipse or ellipsoid. + - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. + - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True + See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. """ backend = [TransformBackends.NUMPY] diff --git a/monai/apps/detection/transforms/box_ops.py b/monai/apps/detection/transforms/box_ops.py index b149be5468..6b9c2ac87b 100644 --- a/monai/apps/detection/transforms/box_ops.py +++ b/monai/apps/detection/transforms/box_ops.py @@ -17,6 +17,7 @@ from monai.config.type_definitions import NdarrayOrTensor from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims +from monai.transforms import Resize from monai.transforms.utils import create_scale from monai.utils import look_up_option, optional_import from monai.utils.misc import ensure_tuple, ensure_tuple_rep @@ -184,12 +185,12 @@ def flip_boxes( flip_axes = ensure_tuple(flip_axes) # flip box - flip_boxes = deepcopy(boxes) + _flip_boxes = deepcopy(boxes) for axis in flip_axes: - flip_boxes[:, axis + spatial_dims] = spatial_size[axis] - boxes[:, axis] - TO_REMOVE - flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE + _flip_boxes[:, axis + spatial_dims] = spatial_size[axis] - boxes[:, axis] - TO_REMOVE + _flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE - return flip_boxes + return _flip_boxes def convert_box_to_mask( @@ -208,10 +209,11 @@ def convert_box_to_mask( spatial_size: image spatial size. bg_label: background labels for the output mask image, make sure it is smaller than any fg labels. ellipse_mask: bool. - If True, it assumes the object shape is close to ellipse or ellipsoid. - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True - See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. + + - If True, it assumes the object shape is close to ellipse or ellipsoid. + - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. + - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True + See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. Return: - int16 array, sized (num_box, H, W). Each channel represents a box. @@ -230,8 +232,8 @@ def convert_box_to_mask( # bg_label should be smaller than labels if bg_label >= min(labels): raise ValueError( - f"bg_label should be smaller than any foreground box labels. \ -min(labels)={min(labels)}, while bg_label={bg_label}" + f"bg_label should be smaller than any foreground box labels.\n" + f"min(labels)={min(labels)}, while bg_label={bg_label}" ) if labels.shape[0] != boxes.shape[0]: @@ -240,8 +242,7 @@ def convert_box_to_mask( # allocate memory for boxes_mask_np boxes_mask_np = np.ones((labels.shape[0],) + spatial_size, dtype=np.int16) * np.int16(bg_label) - boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray)[0] - boxes_np = np.round(boxes_np).astype(np.int32) + boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray, dtype=np.int32)[0] labels_np, *_ = convert_to_dst_type(src=labels, dst=boxes_np) for b in range(boxes_np.shape[0]): # generate a foreground mask @@ -253,34 +254,19 @@ def convert_box_to_mask( center = (max_box_size - 1) / 2.0 boxes_only_mask = np.ones([max_box_size] * spatial_dims, dtype=np.int16) * np.int16(bg_label) # apply label intensity to circle/ball foreground - if spatial_dims == 2: - grid_y, grid_x = np.ogrid[:max_box_size, :max_box_size] - dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2 - elif spatial_dims == 3: - grid_y, grid_x, grid_z = np.ogrid[:max_box_size, :max_box_size, :max_box_size] - dist_from_center = (grid_x - center) ** 2 + (grid_y - center) ** 2 + (grid_z - center) ** 2 + ranges = tuple(slice(0, max_box_size) for _ in range(spatial_dims)) + dist_from_center = sum((grid - center) ** 2 for grid in np.ogrid[ranges]) boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b]) # squeeze it to a ellipse/ellipsoid mask - zoom_factor = [box_size[axis] / float(max_box_size) for axis in range(spatial_dims)] - # scipy zoom does not support float16 cpu - boxes_only_mask = scipy.ndimage.zoom(boxes_only_mask, zoom=zoom_factor, mode="nearest", prefilter=False) + resizer = Resize(spatial_size=box_size, mode="nearest", anti_aliasing=False) + boxes_only_mask = resizer(boxes_only_mask[None])[0] # type: ignore else: # generate a rect mask - boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b]) - + boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b]) # type: ignore # apply to global mask - if spatial_dims == 2: - boxes_mask_np[ - b, boxes_np[b, 0] : boxes_np[b, spatial_dims], boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims] - ] = boxes_only_mask - if spatial_dims == 3: - boxes_mask_np[ - b, - boxes_np[b, 0] : boxes_np[b, spatial_dims], - boxes_np[b, 1] : boxes_np[b, 1 + spatial_dims], - boxes_np[b, 2] : boxes_np[b, 2 + spatial_dims], - ] = boxes_only_mask - + slicing = [b] + slicing.extend(slice(boxes_np[b, d], boxes_np[b, d + spatial_dims]) for d in range(spatial_dims)) # type:ignore + boxes_mask_np[tuple(slicing)] = boxes_only_mask return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0] diff --git a/monai/apps/detection/transforms/dictionary.py b/monai/apps/detection/transforms/dictionary.py index 5aeacfd63e..b0d9c5f1f9 100644 --- a/monai/apps/detection/transforms/dictionary.py +++ b/monai/apps/detection/transforms/dictionary.py @@ -772,31 +772,34 @@ class BoxToMaskd(MapTransform): Dictionary-based wrapper of :py:class:`monai.apps.detection.transforms.array.BoxToMask`. Pairs with :py:class:`monai.apps.detection.transforms.dictionary.MaskToBoxd` . Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs. - The output d[box_mask_key] will have background intensity 0, since the following operations may pad 0 on the border. + The output ``d[box_mask_key]`` will have background intensity 0, since the following operations + may pad 0 on the border. This is the general solution for transforms that need to be applied on images and boxes simultaneously. It is performed with the following steps. - 1) use BoxToMaskd to covert boxes and labels to box_masks; - 2) do transforms, e.g., rotation or cropping, on images and box_masks together; - 3) use MaskToBoxd to convert box_masks back to boxes and labels. + + 1) use ``BoxToMaskd`` to covert boxes and labels to box_masks; + 2) do transforms, e.g., rotation or cropping, on images and box_masks together; + 3) use ``MaskToBoxd`` to convert box_masks back to boxes and labels. Args: box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``. - label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``. + label_keys: Keys that represents the labels corresponding to the ``box_keys``. Same length with ``box_keys``. box_ref_image_keys: Keys that represents the reference images to which ``box_keys`` are attached. min_fg_label: min foreground box label. ellipse_mask: bool. - If True, it assumes the object shape is close to ellipse or ellipsoid. - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True - See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. + + - If True, it assumes the object shape is close to ellipse or ellipsoid. + - If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box. + - If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True + See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021. allow_missing_keys: don't raise exception if key is missing. Example: .. code-block:: python - # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and image together. + # This code snippet creates transforms (random rotation and cropping) on boxes, labels, and image together. import numpy as np from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd transforms = Compose( @@ -862,14 +865,15 @@ class MaskToBoxd(MapTransform): This is the general solution for transforms that need to be applied on images and boxes simultaneously. It is performed with the following steps. - 1) use BoxToMaskd to covert boxes and labels to box_masks; - 2) do transforms, e.g., rotation or cropping, on images and box_masks together; - 3) use MaskToBoxd to convert box_masks back to boxes and labels. + + 1) use ``BoxToMaskd`` to covert boxes and labels to box_masks; + 2) do transforms, e.g., rotation or cropping, on images and box_masks together; + 3) use ``MaskToBoxd`` to convert box_masks back to boxes and labels. Args: box_keys: Keys to pick box data for transformation. The box mode is assumed to be ``StandardMode``. box_mask_keys: Keys to store output box mask results for transformation. Same length with ``box_keys``. - label_keys: Keys that represents the lables corresponding to the ``box_keys``. Same length with ``box_keys``. + label_keys: Keys that represents the labels corresponding to the ``box_keys``. Same length with ``box_keys``. min_fg_label: min foreground box label. box_dtype: output dtype for box_keys label_dtype: output dtype for label_keys @@ -878,7 +882,7 @@ class MaskToBoxd(MapTransform): Example: .. code-block:: python - # This code snippet creates transforms (random rotation and croppping) on boxes, labels, and images together. + # This code snippet creates transforms (random rotation and cropping) on boxes, labels, and images together. import numpy as np from monai.transforms import Compose, RandRotated, RandSpatialCropd, DeleteItemsd transforms = Compose( diff --git a/tests/test_box_transform.py b/tests/test_box_transform.py index 8dbd446b16..0def9e1458 100644 --- a/tests/test_box_transform.py +++ b/tests/test_box_transform.py @@ -72,7 +72,7 @@ def test_value_2d(self, data, expected_mask): box_ref_image_keys="image", label_keys="labels", min_fg_label=0, - ellipse_mask=True, + ellipse_mask=False, ) transform_to_box = MaskToBoxd( box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0 @@ -83,6 +83,30 @@ def test_value_2d(self, data, expected_mask): assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3) assert_allclose(data_back["labels"], data["labels"], type_test=False, device_test=False, atol=1e-3) + def test_value_3d_mask(self): + test_dtype = [torch.float32, torch.float16] + image = np.zeros((1, 32, 33, 34)) + boxes = np.array([[7, 8, 9, 10, 12, 13], [1, 3, 5, 2, 5, 9], [0, 0, 0, 1, 1, 1]]) + data = {"image": image, "boxes": boxes, "labels": np.array((1, 0, 3))} + for dtype in test_dtype: + data = CastToTyped(keys=["image", "boxes"], dtype=dtype)(data) + transform_to_mask = BoxToMaskd( + box_keys="boxes", + box_mask_keys="box_mask", + box_ref_image_keys="image", + label_keys="labels", + min_fg_label=0, + ellipse_mask=False, + ) + transform_to_box = MaskToBoxd( + box_keys="boxes", box_mask_keys="box_mask", label_keys="labels", min_fg_label=0 + ) + data_mask = transform_to_mask(data) + assert_allclose(data_mask["box_mask"].shape, (3, 32, 33, 34), type_test=True, device_test=True, atol=1e-3) + data_back = transform_to_box(data_mask) + assert_allclose(data_back["boxes"], data["boxes"], type_test=False, device_test=False, atol=1e-3) + assert_allclose(data_back["labels"], data["labels"], type_test=False, device_test=False, atol=1e-3) + @parameterized.expand(TESTS_3D) def test_value_3d( self,