diff --git a/packages/examples/cvat/exchange-oracle/src/core/tasks/skeletons_from_boxes.py b/packages/examples/cvat/exchange-oracle/src/core/tasks/skeletons_from_boxes.py index dc70d33a91..d48397e5d1 100644 --- a/packages/examples/cvat/exchange-oracle/src/core/tasks/skeletons_from_boxes.py +++ b/packages/examples/cvat/exchange-oracle/src/core/tasks/skeletons_from_boxes.py @@ -23,6 +23,9 @@ class RoiInfo: bbox_y: int bbox_label: int + point_x: int + point_y: int + # RoI is centered on the bbox center # Coordinates can be out of image boundaries. # In this case RoI includes extra margins to be centered on bbox center @@ -117,7 +120,10 @@ def parse_skeleton_bbox_mapping(self, skeleton_bbox_mapping_data: bytes) -> Skel return {int(k): int(v) for k, v in parse_json(skeleton_bbox_mapping_data).items()} def parse_roi_info(self, rois_info_data: bytes) -> RoiInfos: - return [RoiInfo(**roi_info) for roi_info in parse_json(rois_info_data)] + return [ + RoiInfo(**{"point_x": 0, "point_y": 0, **roi_info}) + for roi_info in parse_json(rois_info_data) + ] def parse_roi_filenames(self, roi_filenames_data: bytes) -> RoiFilenames: return {int(k): v for k, v in parse_json(roi_filenames_data).items()} diff --git a/packages/examples/cvat/exchange-oracle/src/handlers/job_creation.py b/packages/examples/cvat/exchange-oracle/src/handlers/job_creation.py index c9a474fe67..96949a1e23 100644 --- a/packages/examples/cvat/exchange-oracle/src/handlers/job_creation.py +++ b/packages/examples/cvat/exchange-oracle/src/handlers/job_creation.py @@ -20,7 +20,7 @@ import datumaro as dm import numpy as np from datumaro.util import filter_dict, take_by -from datumaro.util.annotation_util import BboxCoords, bbox_iou +from datumaro.util.annotation_util import BboxCoords, bbox_iou, find_instances from datumaro.util.image import IMAGE_EXTENSIONS, decode_image, encode_image import src.core.tasks.boxes_from_points as boxes_from_points_task @@ -1709,13 +1709,18 @@ def __init__(self, manifest: TaskManifest, escrow_address: str, chain_id: int) - ) "Minimum absolute ROI size, (w, h)" - self.boxes_format = "coco_instances" + self.boxes_format = "coco_person_keypoints" self.embed_bbox_in_roi_image = True "Put a bbox into the extracted skeleton RoI images" self.embed_tile_border = True + self.embedded_point_radius = 15 + self.min_embedded_point_radius_percent = 0.005 + self.max_embedded_point_radius_percent = 0.01 + self.embedded_point_color = (0, 255, 255) + self.roi_embedded_bbox_color = (0, 255, 255) # BGR self.roi_background_color = (245, 240, 242) # BGR - CVAT background color @@ -1729,6 +1734,9 @@ def __init__(self, manifest: TaskManifest, escrow_address: str, chain_id: int) - GT annotations or samples for successful job launch """ + self.gt_id_attribute = "object_id" + "An additional way to match GT skeletons with input boxes" + # TODO: probably, need to also add an absolute number of minimum GT RoIs per class def _download_input_data(self): @@ -1948,7 +1956,7 @@ def _validate_boxes_filenames(self): ) ) - def _validate_boxes_annotations(self): + def _validate_boxes_annotations(self): # noqa: PLR0912 # Convert possible polygons and masks into boxes self._boxes_dataset.transform(InstanceSegmentsToBbox) self._boxes_dataset.init_cache() @@ -1962,15 +1970,70 @@ def _validate_boxes_annotations(self): # Could fail on this as well image_h, image_w = sample.media_as(dm.Image).size - sample_boxes = [a for a in sample.annotations if isinstance(a, dm.Bbox)] - valid_boxes = [] - for bbox in sample_boxes: - if not ( - (0 <= int(bbox.x) < int(bbox.x + bbox.w) <= image_w) - and (0 <= int(bbox.y) < int(bbox.y + bbox.h) <= image_h) - ): + valid_instances: list[tuple[dm.Bbox, dm.Points]] = [] + instances = find_instances( + [a for a in sample.annotations if isinstance(a, dm.Bbox | dm.Skeleton)] + ) + for instance_anns in instances: + if len(instance_anns) != 2: + excluded_boxes_info.add_message( + "Sample '{}': object #{} ({}) skipped - unexpected group size ({})".format( + sample.id, + instance_anns[0].id, + label_cat[instance_anns[0].label].name, + len(instance_anns), + ), + sample_id=sample.id, + sample_subset=sample.subset, + ) + continue + + bbox = next((a for a in instance_anns if isinstance(a, dm.Bbox)), None) + if not bbox: + excluded_boxes_info.add_message( + "Sample '{}': object #{} ({}) skipped - no matching bbox".format( + sample.id, instance_anns[0].id, label_cat[instance_anns[0].label].name + ), + sample_id=sample.id, + sample_subset=sample.subset, + ) + continue + + skeleton = next((a for a in instance_anns if isinstance(a, dm.Skeleton)), None) + if not skeleton: + excluded_boxes_info.add_message( + "Sample '{}': object #{} ({}) skipped - no matching skeleton".format( + sample.id, instance_anns[0].id, label_cat[instance_anns[0].label].name + ), + sample_id=sample.id, + sample_subset=sample.subset, + ) + continue + + if len(skeleton.elements) != 1 or len(skeleton.elements[0].points) != 2: + excluded_boxes_info.add_message( + "Sample '{}': object #{} ({}) skipped - invalid skeleton points".format( + sample.id, skeleton.id, label_cat[skeleton.label].name + ), + sample_id=sample.id, + sample_subset=sample.subset, + ) + continue + + point = skeleton.elements[0] + if not is_point_in_bbox(point.points[0], point.points[1], (0, 0, image_w, image_h)): excluded_boxes_info.add_message( - "Sample '{}': bbox #{} ({}) skipped - invalid coordinates".format( + "Sample '{}': object #{} ({}) skipped - invalid point coordinates".format( + sample.id, skeleton.id, label_cat[skeleton.label].name + ), + sample_id=sample.id, + sample_subset=sample.subset, + ) + continue + + if not is_point_in_bbox(int(bbox.x), int(bbox.y), (0, 0, image_w, image_h)): + excluded_boxes_info.add_message( + "Sample '{}': object #{} ({}) skipped - invalid bbox coordinates".format( sample.id, bbox.id, label_cat[bbox.label].name ), sample_id=sample.id, @@ -1978,6 +2041,16 @@ def _validate_boxes_annotations(self): ) continue + if not is_point_in_bbox(point.points[0], point.points[1], bbox): + excluded_boxes_info.add_message( + "Sample '{}': object #{} ({}) skipped - point is outside the bbox".format( + sample.id, skeleton.id, label_cat[skeleton.label].name + ), + sample_id=sample.id, + sample_subset=sample.subset, + ) + continue + if bbox.id in visited_ids: excluded_boxes_info.add_message( "Sample '{}': bbox #{} ({}) skipped - repeated annotation id {}".format( @@ -1988,14 +2061,18 @@ def _validate_boxes_annotations(self): ) continue - valid_boxes.append(bbox) + valid_instances.append( + (bbox, point.wrap(group=bbox.group, id=bbox.id, attributes=bbox.attributes)) + ) visited_ids.add(bbox.id) - excluded_boxes_info.excluded_count += len(sample_boxes) - len(valid_boxes) - excluded_boxes_info.total_count += len(sample_boxes) + excluded_boxes_info.excluded_count += len(instances) - len(valid_instances) + excluded_boxes_info.total_count += len(instances) - if len(valid_boxes) != len(sample.annotations): - self._boxes_dataset.put(sample.wrap(annotations=valid_boxes)) + if len(valid_instances) != len(sample.annotations): + self._boxes_dataset.put( + sample.wrap(annotations=list(chain.from_iterable(valid_instances))) + ) if excluded_boxes_info.excluded_count > ceil( excluded_boxes_info.total_count * self.max_discarded_threshold @@ -2066,8 +2143,14 @@ def _find_unambiguous_matches( input_boxes: list[dm.Bbox], gt_skeletons: list[dm.Skeleton], *, + input_points: list[dm.Points], gt_annotations: list[dm.Annotation], ) -> list[tuple[dm.Bbox, dm.Skeleton]]: + bbox_point_mapping: dict[int, dm.Points] = { + bbox.id: next(p for p in input_points if p.group == bbox.group) + for bbox in input_boxes + } + matches = [ [ (input_bbox.label == gt_skeleton.label) @@ -2077,6 +2160,18 @@ def _find_unambiguous_matches( self._get_skeleton_bbox(gt_skeleton, gt_annotations), ) ) + and (input_point := bbox_point_mapping[input_bbox.id]) + and is_point_in_bbox( + input_point.points[0], + input_point.points[1], + self._get_skeleton_bbox(gt_skeleton, gt_annotations), + ) + and ( + # a way to customize matching if the default method is too rough + not (bbox_id := input_bbox.attributes.get(self.gt_id_attribute)) + or not (skeleton_id := gt_skeleton.attributes.get(self.gt_id_attribute)) + or bbox_id == skeleton_id + ) for gt_skeleton in gt_skeletons ] for input_bbox in input_boxes @@ -2167,10 +2262,11 @@ def _find_good_gt_skeletons( input_boxes: list[dm.Bbox], gt_skeletons: list[dm.Skeleton], *, + input_points: list[dm.Points], gt_annotations: list[dm.Annotation], ) -> list[dm.Skeleton]: matches = _find_unambiguous_matches( - input_boxes, gt_skeletons, gt_annotations=gt_annotations + input_boxes, gt_skeletons, input_points=input_points, gt_annotations=gt_annotations ) matched_skeletons = [] @@ -2221,13 +2317,18 @@ def _find_good_gt_skeletons( gt_skeletons = [a for a in gt_sample.annotations if isinstance(a, dm.Skeleton)] input_boxes = [a for a in boxes_sample.annotations if isinstance(a, dm.Bbox)] + input_points = [a for a in boxes_sample.annotations if isinstance(a, dm.Points)] + assert len(input_boxes) == len(input_points) # Samples without boxes are allowed, so we just skip them without an error if not gt_skeletons: continue matched_skeletons = _find_good_gt_skeletons( - input_boxes, gt_skeletons, gt_annotations=gt_sample.annotations + input_boxes, + gt_skeletons, + input_points=input_points, + gt_annotations=gt_sample.annotations, ) if not matched_skeletons: continue @@ -2294,9 +2395,10 @@ def _prepare_roi_infos(self): rois: list[skeletons_from_boxes_task.RoiInfo] = [] for sample in self._boxes_dataset: - for bbox in sample.annotations: - if not isinstance(bbox, dm.Bbox): - continue + instances = find_instances(sample.annotations) + for instance_anns in instances: + bbox = next(a for a in instance_anns if isinstance(a, dm.Bbox)) + point = next(a for a in instance_anns if isinstance(a, dm.Points)) # RoI is centered on bbox center original_bbox_cx = int(bbox.x + bbox.w / 2) @@ -2320,6 +2422,8 @@ def _prepare_roi_infos(self): bbox_label=bbox.label, bbox_x=new_bbox_x, bbox_y=new_bbox_y, + point_x=point.points[0] - roi_x, + point_y=point.points[1] - roi_y, roi_x=roi_x, roi_y=roi_y, roi_w=roi_w, @@ -2511,6 +2615,32 @@ def _draw_roi_bbox(self, roi_image: np.ndarray, bbox: dm.Bbox) -> np.ndarray: cv2.LINE_4, ) + def _draw_roi_point(self, roi_image: np.ndarray, point: tuple[float, float]) -> np.ndarray: + roi_r = (roi_image.shape[0] ** 2 + roi_image.shape[1] ** 2) ** 0.5 / 2 + radius = int( + min( + self.max_embedded_point_radius_percent * roi_r, + max(self.embedded_point_radius, self.min_embedded_point_radius_percent * roi_r), + ) + ) + + roi_image = cv2.circle( + roi_image, + tuple(map(int, (point[0], point[1]))), + radius + 1, + (255, 255, 255), + -1, + cv2.LINE_4, + ) + return cv2.circle( + roi_image, + tuple(map(int, (point[0], point[1]))), + radius, + self.embedded_point_color, + -1, + cv2.LINE_4, + ) + def _extract_and_upload_rois(self): assert self._roi_filenames is not _unset assert self._roi_infos is not _unset @@ -2564,6 +2694,9 @@ def process_file(filename: str, image_pixels: np.ndarray): if self.embed_bbox_in_roi_image: roi_pixels = self._draw_roi_bbox(roi_pixels, bbox_by_id[roi_info.bbox_id]) + roi_pixels = self._draw_roi_point( + roi_pixels, (roi_info.point_x, roi_info.point_y) + ) filename = self._roi_filenames[roi_info.bbox_id] roi_bytes = encode_image(roi_pixels, os.path.splitext(filename)[-1]) diff --git a/packages/examples/cvat/exchange-oracle/src/utils/annotations.py b/packages/examples/cvat/exchange-oracle/src/utils/annotations.py index 075ce7d035..110ec2b069 100644 --- a/packages/examples/cvat/exchange-oracle/src/utils/annotations.py +++ b/packages/examples/cvat/exchange-oracle/src/utils/annotations.py @@ -8,7 +8,7 @@ import datumaro as dm import numpy as np from datumaro.util import filter_dict, mask_tools -from datumaro.util.annotation_util import find_group_leader, find_instances, max_bbox +from datumaro.util.annotation_util import BboxCoords, find_group_leader, find_instances, max_bbox from defusedxml import ElementTree @@ -343,8 +343,12 @@ def transform_item(self, item): return item.wrap(annotations=annotations) -def is_point_in_bbox(px: float, py: float, bbox: dm.Bbox) -> bool: - return (bbox.x <= px <= bbox.x + bbox.w) and (bbox.y <= py <= bbox.y + bbox.h) +def is_point_in_bbox(px: float, py: float, bbox: dm.Bbox | BboxCoords) -> bool: + if isinstance(bbox, dm.Bbox): + bbox = bbox.get_bbox() + + x, y, w, h = bbox + return (x <= px <= x + w) and (y <= py <= y + h) class InstanceSegmentsToBbox(dm.ItemTransform):