Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 80 additions & 1 deletion monai/apps/detection/transforms/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,14 @@
from monai.utils.enums import TransformBackends
from monai.utils.type_conversion import convert_data_type, convert_to_dst_type

from .box_ops import apply_affine_to_boxes, flip_boxes, resize_boxes, zoom_boxes
from .box_ops import (
apply_affine_to_boxes,
convert_box_to_mask,
convert_mask_to_box,
flip_boxes,
resize_boxes,
zoom_boxes,
)

__all__ = [
"ConvertBoxToStandardMode",
Expand All @@ -42,6 +49,8 @@
"ResizeBox",
"FlipBox",
"ClipBoxToImage",
"BoxToMask",
"MaskToBox",
]


Expand Down Expand Up @@ -373,3 +382,73 @@ def __call__( # type: ignore
labels_t = deepcopy(labels_t[keep_t, ...])
labels_clip_list.append(convert_to_dst_type(src=labels_t, dst=labels_tuple[i])[0])
return boxes_clip, tuple(labels_clip_list)


class BoxToMask(Transform):
"""
Convert box to int16 mask image, which has the same size with the input image.

Args:
bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels.
ellipse_mask: bool.

- If True, it assumes the object shape is close to ellipse or ellipsoid.
- If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
- If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
"""

backend = [TransformBackends.NUMPY]

def __init__(self, bg_label: int = -1, ellipse_mask: bool = False) -> None:
self.bg_label = bg_label
self.ellipse_mask = ellipse_mask

def __call__( # type: ignore
self, boxes: NdarrayOrTensor, labels: NdarrayOrTensor, spatial_size: Union[Sequence[int], int]
) -> NdarrayOrTensor:
"""
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,).
spatial_size: image spatial size.

Return:
- int16 array, sized (num_box, H, W). Each channel represents a box.
The foreground region in channel c has intensity of labels[c].
The background intensity is bg_label.
"""
return convert_box_to_mask(boxes, labels, spatial_size, self.bg_label, self.ellipse_mask)


class MaskToBox(Transform):
"""
Convert int16 mask image to box, which has the same size with the input image.
Pairs with :py:class:`monai.apps.detection.transforms.array.BoxToMask`.
Please make sure the same ``min_fg_label`` is used when using the two transforms in pairs.

Args:
bg_label: background labels for the output mask image, make sure it is smaller than any foreground(fg) labels.
box_dtype: output dtype for boxes
label_dtype: output dtype for labels
"""

backend = [TransformBackends.NUMPY]

def __init__(self, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long) -> None:
self.bg_label = bg_label
self.box_dtype = box_dtype
self.label_dtype = label_dtype

def __call__(self, boxes_mask: NdarrayOrTensor) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
"""
Args:
boxes_mask: int16 array, sized (num_box, H, W). Each channel represents a box.
The foreground region in channel c has intensity of labels[c].
The background intensity is bg_label.

Return:
- bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
- classification foreground(fg) labels, dtype should be int, sized (N,).
"""
return convert_mask_to_box(boxes_mask, self.bg_label, self.box_dtype, self.label_dtype)
142 changes: 137 additions & 5 deletions monai/apps/detection/transforms/box_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,21 @@
# limitations under the License.

from copy import deepcopy
from typing import Optional, Sequence, Union
from typing import Optional, Sequence, Tuple, Union

import numpy as np
import torch

from monai.config.type_definitions import NdarrayOrTensor
from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims
from monai.transforms import Resize
from monai.transforms.utils import create_scale
from monai.utils import look_up_option, optional_import
from monai.utils.misc import ensure_tuple, ensure_tuple_rep
from monai.utils.type_conversion import convert_data_type, convert_to_dst_type

scipy, _ = optional_import("scipy")


def _apply_affine_to_points(points: torch.Tensor, affine: torch.Tensor, include_shift: bool = True) -> torch.Tensor:
"""
Expand Down Expand Up @@ -180,9 +185,136 @@ def flip_boxes(
flip_axes = ensure_tuple(flip_axes)

# flip box
flip_boxes = deepcopy(boxes)
_flip_boxes = deepcopy(boxes)
for axis in flip_axes:
flip_boxes[:, axis + spatial_dims] = spatial_size[axis] - boxes[:, axis] - TO_REMOVE
flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE
_flip_boxes[:, axis + spatial_dims] = spatial_size[axis] - boxes[:, axis] - TO_REMOVE
_flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE

return _flip_boxes


def convert_box_to_mask(
boxes: NdarrayOrTensor,
labels: NdarrayOrTensor,
spatial_size: Union[Sequence[int], int],
bg_label: int = -1,
ellipse_mask: bool = False,
) -> NdarrayOrTensor:
"""
Convert box to int16 mask image, which has the same size with the input image.

return flip_boxes
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,).
spatial_size: image spatial size.
bg_label: background labels for the output mask image, make sure it is smaller than any fg labels.
ellipse_mask: bool.

- If True, it assumes the object shape is close to ellipse or ellipsoid.
- If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
- If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.

Return:
- int16 array, sized (num_box, H, W). Each channel represents a box.
The foreground region in channel c has intensity of labels[c].
The background intensity is bg_label.
"""
spatial_dims: int = get_spatial_dims(boxes=boxes)
spatial_size = ensure_tuple_rep(spatial_size, spatial_dims)

# if no box, return empty mask
if len(labels) == 0:
boxes_mask_np = np.ones((1,) + spatial_size, dtype=np.int16) * np.int16(bg_label)
boxes_mask, *_ = convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)
return boxes_mask

# bg_label should be smaller than labels
if bg_label >= min(labels):
raise ValueError(
f"bg_label should be smaller than any foreground box labels.\n"
f"min(labels)={min(labels)}, while bg_label={bg_label}"
)

if labels.shape[0] != boxes.shape[0]:
raise ValueError("Number of labels should equal to number of boxes.")

# allocate memory for boxes_mask_np
boxes_mask_np = np.ones((labels.shape[0],) + spatial_size, dtype=np.int16) * np.int16(bg_label)

boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray, dtype=np.int32)[0]
labels_np, *_ = convert_to_dst_type(src=labels, dst=boxes_np)
for b in range(boxes_np.shape[0]):
# generate a foreground mask
box_size = [boxes_np[b, axis + spatial_dims] - boxes_np[b, axis] for axis in range(spatial_dims)]
if ellipse_mask:
# initialize a square/cube mask
max_box_size = max(box_size)
radius = max_box_size / 2.0
center = (max_box_size - 1) / 2.0
boxes_only_mask = np.ones([max_box_size] * spatial_dims, dtype=np.int16) * np.int16(bg_label)
# apply label intensity to circle/ball foreground
ranges = tuple(slice(0, max_box_size) for _ in range(spatial_dims))
dist_from_center = sum((grid - center) ** 2 for grid in np.ogrid[ranges])
boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b])
# squeeze it to a ellipse/ellipsoid mask
resizer = Resize(spatial_size=box_size, mode="nearest", anti_aliasing=False)
boxes_only_mask = resizer(boxes_only_mask[None])[0] # type: ignore
else:
# generate a rect mask
boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b]) # type: ignore
# apply to global mask
slicing = [b]
slicing.extend(slice(boxes_np[b, d], boxes_np[b, d + spatial_dims]) for d in range(spatial_dims)) # type:ignore
boxes_mask_np[tuple(slicing)] = boxes_only_mask
return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0]


def convert_mask_to_box(
boxes_mask: NdarrayOrTensor, bg_label: int = -1, box_dtype=torch.float32, label_dtype=torch.long
) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
"""
Convert int16 mask image to box, which has the same size with the input image

Args:
boxes_mask: int16 array, sized (num_box, H, W). Each channel represents a box.
The foreground region in channel c has intensity of labels[c].
The background intensity is bg_label.
bg_label: background labels for the boxes_mask
box_dtype: output dtype for boxes
label_dtype: output dtype for labels

Return:
- bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
- classification foreground(fg) labels, dtype should be int, sized (N,).
"""
look_up_option(len(boxes_mask.shape), [3, 4])
spatial_size = list(boxes_mask.shape[1:])
spatial_dims = get_spatial_dims(spatial_size=spatial_size)

boxes_mask_np, *_ = convert_data_type(boxes_mask, np.ndarray)

boxes_list = []
labels_list = []
for b in range(boxes_mask_np.shape[0]):
fg_indices = np.nonzero(boxes_mask_np[b, ...] - bg_label)
if fg_indices[0].shape[0] == 0:
continue
boxes_b = []
for fd_i in fg_indices:
boxes_b.append(min(fd_i)) # top left corner
for fd_i in fg_indices:
boxes_b.append(max(fd_i) + 1 - TO_REMOVE) # bottom right corner
if spatial_dims == 2:
labels_list.append(boxes_mask_np[b, boxes_b[0], boxes_b[1]])
if spatial_dims == 3:
labels_list.append(boxes_mask_np[b, boxes_b[0], boxes_b[1], boxes_b[2]])
boxes_list.append(boxes_b)

if len(boxes_list) == 0:
boxes_np, labels_np = np.zeros([0, 2 * spatial_dims]), np.zeros([0])
else:
boxes_np, labels_np = np.asarray(boxes_list), np.asarray(labels_list)
boxes, *_ = convert_to_dst_type(src=boxes_np, dst=boxes_mask, dtype=box_dtype)
labels, *_ = convert_to_dst_type(src=labels_np, dst=boxes_mask, dtype=label_dtype)
return boxes, labels
Loading