|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | """ | 
					
						
						|  | Common data processing utilities that are used in a | 
					
						
						|  | typical object detection data pipeline. | 
					
						
						|  | """ | 
					
						
						|  | import logging | 
					
						
						|  | import numpy as np | 
					
						
						|  | from typing import List, Union | 
					
						
						|  | import pycocotools.mask as mask_util | 
					
						
						|  | import torch | 
					
						
						|  | from PIL import Image | 
					
						
						|  |  | 
					
						
						|  | from detectron2.structures import ( | 
					
						
						|  | BitMasks, | 
					
						
						|  | Boxes, | 
					
						
						|  | BoxMode, | 
					
						
						|  | Instances, | 
					
						
						|  | Keypoints, | 
					
						
						|  | PolygonMasks, | 
					
						
						|  | RotatedBoxes, | 
					
						
						|  | polygons_to_bitmask, | 
					
						
						|  | ) | 
					
						
						|  | from detectron2.utils.file_io import PathManager | 
					
						
						|  |  | 
					
						
						|  | from . import transforms as T | 
					
						
						|  | from .catalog import MetadataCatalog | 
					
						
						|  |  | 
					
						
						|  | __all__ = [ | 
					
						
						|  | "SizeMismatchError", | 
					
						
						|  | "convert_image_to_rgb", | 
					
						
						|  | "check_image_size", | 
					
						
						|  | "transform_proposals", | 
					
						
						|  | "transform_instance_annotations", | 
					
						
						|  | "annotations_to_instances", | 
					
						
						|  | "annotations_to_instances_rotated", | 
					
						
						|  | "build_augmentation", | 
					
						
						|  | "build_transform_gen", | 
					
						
						|  | "create_keypoint_hflip_indices", | 
					
						
						|  | "filter_empty_instances", | 
					
						
						|  | "read_image", | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class SizeMismatchError(ValueError): | 
					
						
						|  | """ | 
					
						
						|  | When loaded image has difference width/height compared with annotation. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | _M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]] | 
					
						
						|  | _M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | _EXIF_ORIENT = 274 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def convert_PIL_to_numpy(image, format): | 
					
						
						|  | """ | 
					
						
						|  | Convert PIL image to numpy array of target format. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | image (PIL.Image): a PIL image | 
					
						
						|  | format (str): the format of output image | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | (np.ndarray): also see `read_image` | 
					
						
						|  | """ | 
					
						
						|  | if format is not None: | 
					
						
						|  |  | 
					
						
						|  | conversion_format = format | 
					
						
						|  | if format in ["BGR", "YUV-BT.601"]: | 
					
						
						|  | conversion_format = "RGB" | 
					
						
						|  | image = image.convert(conversion_format) | 
					
						
						|  | image = np.asarray(image) | 
					
						
						|  |  | 
					
						
						|  | if format == "L": | 
					
						
						|  | image = np.expand_dims(image, -1) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | elif format == "BGR": | 
					
						
						|  |  | 
					
						
						|  | image = image[:, :, ::-1] | 
					
						
						|  | elif format == "YUV-BT.601": | 
					
						
						|  | image = image / 255.0 | 
					
						
						|  | image = np.dot(image, np.array(_M_RGB2YUV).T) | 
					
						
						|  |  | 
					
						
						|  | return image | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def convert_image_to_rgb(image, format): | 
					
						
						|  | """ | 
					
						
						|  | Convert an image from given format to RGB. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | image (np.ndarray or Tensor): an HWC image | 
					
						
						|  | format (str): the format of input image, also see `read_image` | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8 | 
					
						
						|  | """ | 
					
						
						|  | if isinstance(image, torch.Tensor): | 
					
						
						|  | image = image.cpu().numpy() | 
					
						
						|  | if format == "BGR": | 
					
						
						|  | image = image[:, :, [2, 1, 0]] | 
					
						
						|  | elif format == "YUV-BT.601": | 
					
						
						|  | image = np.dot(image, np.array(_M_YUV2RGB).T) | 
					
						
						|  | image = image * 255.0 | 
					
						
						|  | else: | 
					
						
						|  | if format == "L": | 
					
						
						|  | image = image[:, :, 0] | 
					
						
						|  | image = image.astype(np.uint8) | 
					
						
						|  | image = np.asarray(Image.fromarray(image, mode=format).convert("RGB")) | 
					
						
						|  | return image | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def _apply_exif_orientation(image): | 
					
						
						|  | """ | 
					
						
						|  | Applies the exif orientation correctly. | 
					
						
						|  |  | 
					
						
						|  | This code exists per the bug: | 
					
						
						|  | https://github.com/python-pillow/Pillow/issues/3973 | 
					
						
						|  | with the function `ImageOps.exif_transpose`. The Pillow source raises errors with | 
					
						
						|  | various methods, especially `tobytes` | 
					
						
						|  |  | 
					
						
						|  | Function based on: | 
					
						
						|  | https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59 | 
					
						
						|  | https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527 | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | image (PIL.Image): a PIL image | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | (PIL.Image): the PIL image with exif orientation applied, if applicable | 
					
						
						|  | """ | 
					
						
						|  | if not hasattr(image, "getexif"): | 
					
						
						|  | return image | 
					
						
						|  |  | 
					
						
						|  | try: | 
					
						
						|  | exif = image.getexif() | 
					
						
						|  | except Exception: | 
					
						
						|  | exif = None | 
					
						
						|  |  | 
					
						
						|  | if exif is None: | 
					
						
						|  | return image | 
					
						
						|  |  | 
					
						
						|  | orientation = exif.get(_EXIF_ORIENT) | 
					
						
						|  |  | 
					
						
						|  | method = { | 
					
						
						|  | 2: Image.FLIP_LEFT_RIGHT, | 
					
						
						|  | 3: Image.ROTATE_180, | 
					
						
						|  | 4: Image.FLIP_TOP_BOTTOM, | 
					
						
						|  | 5: Image.TRANSPOSE, | 
					
						
						|  | 6: Image.ROTATE_270, | 
					
						
						|  | 7: Image.TRANSVERSE, | 
					
						
						|  | 8: Image.ROTATE_90, | 
					
						
						|  | }.get(orientation) | 
					
						
						|  |  | 
					
						
						|  | if method is not None: | 
					
						
						|  | return image.transpose(method) | 
					
						
						|  | return image | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def read_image(file_name, format=None): | 
					
						
						|  | """ | 
					
						
						|  | Read an image into the given format. | 
					
						
						|  | Will apply rotation and flipping if the image has such exif information. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | file_name (str): image file path | 
					
						
						|  | format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | image (np.ndarray): | 
					
						
						|  | an HWC image in the given format, which is 0-255, uint8 for | 
					
						
						|  | supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. | 
					
						
						|  | """ | 
					
						
						|  | with PathManager.open(file_name, "rb") as f: | 
					
						
						|  | image = Image.open(f) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | image = _apply_exif_orientation(image) | 
					
						
						|  | return convert_PIL_to_numpy(image, format) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def check_image_size(dataset_dict, image): | 
					
						
						|  | """ | 
					
						
						|  | Raise an error if the image does not match the size specified in the dict. | 
					
						
						|  | """ | 
					
						
						|  | if "width" in dataset_dict or "height" in dataset_dict: | 
					
						
						|  | image_wh = (image.shape[1], image.shape[0]) | 
					
						
						|  | expected_wh = (dataset_dict["width"], dataset_dict["height"]) | 
					
						
						|  | if not image_wh == expected_wh: | 
					
						
						|  | raise SizeMismatchError( | 
					
						
						|  | "Mismatched image shape{}, got {}, expect {}.".format( | 
					
						
						|  | " for image " + dataset_dict["file_name"] | 
					
						
						|  | if "file_name" in dataset_dict | 
					
						
						|  | else "", | 
					
						
						|  | image_wh, | 
					
						
						|  | expected_wh, | 
					
						
						|  | ) | 
					
						
						|  | + " Please check the width/height in your annotation." | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if "width" not in dataset_dict: | 
					
						
						|  | dataset_dict["width"] = image.shape[1] | 
					
						
						|  | if "height" not in dataset_dict: | 
					
						
						|  | dataset_dict["height"] = image.shape[0] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): | 
					
						
						|  | """ | 
					
						
						|  | Apply transformations to the proposals in dataset_dict, if any. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | dataset_dict (dict): a dict read from the dataset, possibly | 
					
						
						|  | contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" | 
					
						
						|  | image_shape (tuple): height, width | 
					
						
						|  | transforms (TransformList): | 
					
						
						|  | proposal_topk (int): only keep top-K scoring proposals | 
					
						
						|  | min_box_size (int): proposals with either side smaller than this | 
					
						
						|  | threshold are removed | 
					
						
						|  |  | 
					
						
						|  | The input dict is modified in-place, with abovementioned keys removed. A new | 
					
						
						|  | key "proposals" will be added. Its value is an `Instances` | 
					
						
						|  | object which contains the transformed proposals in its field | 
					
						
						|  | "proposal_boxes" and "objectness_logits". | 
					
						
						|  | """ | 
					
						
						|  | if "proposal_boxes" in dataset_dict: | 
					
						
						|  |  | 
					
						
						|  | boxes = transforms.apply_box( | 
					
						
						|  | BoxMode.convert( | 
					
						
						|  | dataset_dict.pop("proposal_boxes"), | 
					
						
						|  | dataset_dict.pop("proposal_bbox_mode"), | 
					
						
						|  | BoxMode.XYXY_ABS, | 
					
						
						|  | ) | 
					
						
						|  | ) | 
					
						
						|  | boxes = Boxes(boxes) | 
					
						
						|  | objectness_logits = torch.as_tensor( | 
					
						
						|  | dataset_dict.pop("proposal_objectness_logits").astype("float32") | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | boxes.clip(image_shape) | 
					
						
						|  | keep = boxes.nonempty(threshold=min_box_size) | 
					
						
						|  | boxes = boxes[keep] | 
					
						
						|  | objectness_logits = objectness_logits[keep] | 
					
						
						|  |  | 
					
						
						|  | proposals = Instances(image_shape) | 
					
						
						|  | proposals.proposal_boxes = boxes[:proposal_topk] | 
					
						
						|  | proposals.objectness_logits = objectness_logits[:proposal_topk] | 
					
						
						|  | dataset_dict["proposals"] = proposals | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_bbox(annotation): | 
					
						
						|  | """ | 
					
						
						|  | Get bbox from data | 
					
						
						|  | Args: | 
					
						
						|  | annotation (dict): dict of instance annotations for a single instance. | 
					
						
						|  | Returns: | 
					
						
						|  | bbox (ndarray): x1, y1, x2, y2 coordinates | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) | 
					
						
						|  | return bbox | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def transform_instance_annotations( | 
					
						
						|  | annotation, transforms, image_size, *, keypoint_hflip_indices=None | 
					
						
						|  | ): | 
					
						
						|  | """ | 
					
						
						|  | Apply transforms to box, segmentation and keypoints annotations of a single instance. | 
					
						
						|  |  | 
					
						
						|  | It will use `transforms.apply_box` for the box, and | 
					
						
						|  | `transforms.apply_coords` for segmentation polygons & keypoints. | 
					
						
						|  | If you need anything more specially designed for each data structure, | 
					
						
						|  | you'll need to implement your own version of this function or the transforms. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | annotation (dict): dict of instance annotations for a single instance. | 
					
						
						|  | It will be modified in-place. | 
					
						
						|  | transforms (TransformList or list[Transform]): | 
					
						
						|  | image_size (tuple): the height, width of the transformed image | 
					
						
						|  | keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | dict: | 
					
						
						|  | the same input dict with fields "bbox", "segmentation", "keypoints" | 
					
						
						|  | transformed according to `transforms`. | 
					
						
						|  | The "bbox_mode" field will be set to XYXY_ABS. | 
					
						
						|  | """ | 
					
						
						|  | if isinstance(transforms, (tuple, list)): | 
					
						
						|  | transforms = T.TransformList(transforms) | 
					
						
						|  |  | 
					
						
						|  | bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) | 
					
						
						|  |  | 
					
						
						|  | bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) | 
					
						
						|  | annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) | 
					
						
						|  | annotation["bbox_mode"] = BoxMode.XYXY_ABS | 
					
						
						|  |  | 
					
						
						|  | if "segmentation" in annotation: | 
					
						
						|  |  | 
					
						
						|  | segm = annotation["segmentation"] | 
					
						
						|  | if isinstance(segm, list): | 
					
						
						|  |  | 
					
						
						|  | polygons = [np.asarray(p).reshape(-1, 2) for p in segm] | 
					
						
						|  | annotation["segmentation"] = [ | 
					
						
						|  | p.reshape(-1) for p in transforms.apply_polygons(polygons) | 
					
						
						|  | ] | 
					
						
						|  | elif isinstance(segm, dict): | 
					
						
						|  |  | 
					
						
						|  | mask = mask_util.decode(segm) | 
					
						
						|  | mask = transforms.apply_segmentation(mask) | 
					
						
						|  | assert tuple(mask.shape[:2]) == image_size | 
					
						
						|  | annotation["segmentation"] = mask | 
					
						
						|  | else: | 
					
						
						|  | raise ValueError( | 
					
						
						|  | "Cannot transform segmentation of type '{}'!" | 
					
						
						|  | "Supported types are: polygons as list[list[float] or ndarray]," | 
					
						
						|  | " COCO-style RLE as a dict.".format(type(segm)) | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | if "keypoints" in annotation: | 
					
						
						|  | keypoints = transform_keypoint_annotations( | 
					
						
						|  | annotation["keypoints"], transforms, image_size, keypoint_hflip_indices | 
					
						
						|  | ) | 
					
						
						|  | annotation["keypoints"] = keypoints | 
					
						
						|  |  | 
					
						
						|  | return annotation | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None): | 
					
						
						|  | """ | 
					
						
						|  | Transform keypoint annotations of an image. | 
					
						
						|  | If a keypoint is transformed out of image boundary, it will be marked "unlabeled" (visibility=0) | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | keypoints (list[float]): Nx3 float in Detectron2's Dataset format. | 
					
						
						|  | Each point is represented by (x, y, visibility). | 
					
						
						|  | transforms (TransformList): | 
					
						
						|  | image_size (tuple): the height, width of the transformed image | 
					
						
						|  | keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. | 
					
						
						|  | When `transforms` includes horizontal flip, will use the index | 
					
						
						|  | mapping to flip keypoints. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3) | 
					
						
						|  | keypoints_xy = transforms.apply_coords(keypoints[:, :2]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | inside = (keypoints_xy >= np.array([0, 0])) & (keypoints_xy <= np.array(image_size[::-1])) | 
					
						
						|  | inside = inside.all(axis=1) | 
					
						
						|  | keypoints[:, :2] = keypoints_xy | 
					
						
						|  | keypoints[:, 2][~inside] = 0 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if do_hflip: | 
					
						
						|  | if keypoint_hflip_indices is None: | 
					
						
						|  | raise ValueError("Cannot flip keypoints without providing flip indices!") | 
					
						
						|  | if len(keypoints) != len(keypoint_hflip_indices): | 
					
						
						|  | raise ValueError( | 
					
						
						|  | "Keypoint data has {} points, but metadata " | 
					
						
						|  | "contains {} points!".format(len(keypoints), len(keypoint_hflip_indices)) | 
					
						
						|  | ) | 
					
						
						|  | keypoints = keypoints[np.asarray(keypoint_hflip_indices, dtype=np.int32), :] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | keypoints[keypoints[:, 2] == 0] = 0 | 
					
						
						|  | return keypoints | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def annotations_to_instances(annos, image_size, mask_format="polygon"): | 
					
						
						|  | """ | 
					
						
						|  | Create an :class:`Instances` object used by the models, | 
					
						
						|  | from instance annotations in the dataset dict. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | annos (list[dict]): a list of instance annotations in one image, each | 
					
						
						|  | element for one instance. | 
					
						
						|  | image_size (tuple): height, width | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | Instances: | 
					
						
						|  | It will contain fields "gt_boxes", "gt_classes", | 
					
						
						|  | "gt_masks", "gt_keypoints", if they can be obtained from `annos`. | 
					
						
						|  | This is the format that builtin models expect. | 
					
						
						|  | """ | 
					
						
						|  | boxes = ( | 
					
						
						|  | np.stack( | 
					
						
						|  | [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] | 
					
						
						|  | ) | 
					
						
						|  | if len(annos) | 
					
						
						|  | else np.zeros((0, 4)) | 
					
						
						|  | ) | 
					
						
						|  | target = Instances(image_size) | 
					
						
						|  | target.gt_boxes = Boxes(boxes) | 
					
						
						|  |  | 
					
						
						|  | classes = [int(obj["category_id"]) for obj in annos] | 
					
						
						|  | classes = torch.tensor(classes, dtype=torch.int64) | 
					
						
						|  | target.gt_classes = classes | 
					
						
						|  |  | 
					
						
						|  | if len(annos) and "segmentation" in annos[0]: | 
					
						
						|  | segms = [obj["segmentation"] for obj in annos] | 
					
						
						|  | if mask_format == "polygon": | 
					
						
						|  | try: | 
					
						
						|  | masks = PolygonMasks(segms) | 
					
						
						|  | except ValueError as e: | 
					
						
						|  | raise ValueError( | 
					
						
						|  | "Failed to use mask_format=='polygon' from the given annotations!" | 
					
						
						|  | ) from e | 
					
						
						|  | else: | 
					
						
						|  | assert mask_format == "bitmask", mask_format | 
					
						
						|  | masks = [] | 
					
						
						|  | for segm in segms: | 
					
						
						|  | if isinstance(segm, list): | 
					
						
						|  |  | 
					
						
						|  | masks.append(polygons_to_bitmask(segm, *image_size)) | 
					
						
						|  | elif isinstance(segm, dict): | 
					
						
						|  |  | 
					
						
						|  | masks.append(mask_util.decode(segm)) | 
					
						
						|  | elif isinstance(segm, np.ndarray): | 
					
						
						|  | assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( | 
					
						
						|  | segm.ndim | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | masks.append(segm) | 
					
						
						|  | else: | 
					
						
						|  | raise ValueError( | 
					
						
						|  | "Cannot convert segmentation of type '{}' to BitMasks!" | 
					
						
						|  | "Supported types are: polygons as list[list[float] or ndarray]," | 
					
						
						|  | " COCO-style RLE as a dict, or a binary segmentation mask " | 
					
						
						|  | " in a 2D numpy array of shape HxW.".format(type(segm)) | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | masks = BitMasks( | 
					
						
						|  | torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) | 
					
						
						|  | ) | 
					
						
						|  | target.gt_masks = masks | 
					
						
						|  |  | 
					
						
						|  | if len(annos) and "keypoints" in annos[0]: | 
					
						
						|  | kpts = [obj.get("keypoints", []) for obj in annos] | 
					
						
						|  | target.gt_keypoints = Keypoints(kpts) | 
					
						
						|  |  | 
					
						
						|  | return target | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def annotations_to_instances_rotated(annos, image_size): | 
					
						
						|  | """ | 
					
						
						|  | Create an :class:`Instances` object used by the models, | 
					
						
						|  | from instance annotations in the dataset dict. | 
					
						
						|  | Compared to `annotations_to_instances`, this function is for rotated boxes only | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | annos (list[dict]): a list of instance annotations in one image, each | 
					
						
						|  | element for one instance. | 
					
						
						|  | image_size (tuple): height, width | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | Instances: | 
					
						
						|  | Containing fields "gt_boxes", "gt_classes", | 
					
						
						|  | if they can be obtained from `annos`. | 
					
						
						|  | This is the format that builtin models expect. | 
					
						
						|  | """ | 
					
						
						|  | boxes = [obj["bbox"] for obj in annos] | 
					
						
						|  | target = Instances(image_size) | 
					
						
						|  | boxes = target.gt_boxes = RotatedBoxes(boxes) | 
					
						
						|  | boxes.clip(image_size) | 
					
						
						|  |  | 
					
						
						|  | classes = [obj["category_id"] for obj in annos] | 
					
						
						|  | classes = torch.tensor(classes, dtype=torch.int64) | 
					
						
						|  | target.gt_classes = classes | 
					
						
						|  |  | 
					
						
						|  | return target | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def filter_empty_instances( | 
					
						
						|  | instances, by_box=True, by_mask=True, box_threshold=1e-5, return_mask=False | 
					
						
						|  | ): | 
					
						
						|  | """ | 
					
						
						|  | Filter out empty instances in an `Instances` object. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | instances (Instances): | 
					
						
						|  | by_box (bool): whether to filter out instances with empty boxes | 
					
						
						|  | by_mask (bool): whether to filter out instances with empty masks | 
					
						
						|  | box_threshold (float): minimum width and height to be considered non-empty | 
					
						
						|  | return_mask (bool): whether to return boolean mask of filtered instances | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | Instances: the filtered instances. | 
					
						
						|  | tensor[bool], optional: boolean mask of filtered instances | 
					
						
						|  | """ | 
					
						
						|  | assert by_box or by_mask | 
					
						
						|  | r = [] | 
					
						
						|  | if by_box: | 
					
						
						|  | r.append(instances.gt_boxes.nonempty(threshold=box_threshold)) | 
					
						
						|  | if instances.has("gt_masks") and by_mask: | 
					
						
						|  | r.append(instances.gt_masks.nonempty()) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if not r: | 
					
						
						|  | return instances | 
					
						
						|  | m = r[0] | 
					
						
						|  | for x in r[1:]: | 
					
						
						|  | m = m & x | 
					
						
						|  | if return_mask: | 
					
						
						|  | return instances[m], m | 
					
						
						|  | return instances[m] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def create_keypoint_hflip_indices(dataset_names: Union[str, List[str]]) -> List[int]: | 
					
						
						|  | """ | 
					
						
						|  | Args: | 
					
						
						|  | dataset_names: list of dataset names | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | list[int]: a list of size=#keypoints, storing the | 
					
						
						|  | horizontally-flipped keypoint indices. | 
					
						
						|  | """ | 
					
						
						|  | if isinstance(dataset_names, str): | 
					
						
						|  | dataset_names = [dataset_names] | 
					
						
						|  |  | 
					
						
						|  | check_metadata_consistency("keypoint_names", dataset_names) | 
					
						
						|  | check_metadata_consistency("keypoint_flip_map", dataset_names) | 
					
						
						|  |  | 
					
						
						|  | meta = MetadataCatalog.get(dataset_names[0]) | 
					
						
						|  | names = meta.keypoint_names | 
					
						
						|  |  | 
					
						
						|  | flip_map = dict(meta.keypoint_flip_map) | 
					
						
						|  | flip_map.update({v: k for k, v in flip_map.items()}) | 
					
						
						|  | flipped_names = [i if i not in flip_map else flip_map[i] for i in names] | 
					
						
						|  | flip_indices = [names.index(i) for i in flipped_names] | 
					
						
						|  | return flip_indices | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_fed_loss_cls_weights(dataset_names: Union[str, List[str]], freq_weight_power=1.0): | 
					
						
						|  | """ | 
					
						
						|  | Get frequency weight for each class sorted by class id. | 
					
						
						|  | We now calcualte freqency weight using image_count to the power freq_weight_power. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | dataset_names: list of dataset names | 
					
						
						|  | freq_weight_power: power value | 
					
						
						|  | """ | 
					
						
						|  | if isinstance(dataset_names, str): | 
					
						
						|  | dataset_names = [dataset_names] | 
					
						
						|  |  | 
					
						
						|  | check_metadata_consistency("class_image_count", dataset_names) | 
					
						
						|  |  | 
					
						
						|  | meta = MetadataCatalog.get(dataset_names[0]) | 
					
						
						|  | class_freq_meta = meta.class_image_count | 
					
						
						|  | class_freq = torch.tensor( | 
					
						
						|  | [c["image_count"] for c in sorted(class_freq_meta, key=lambda x: x["id"])] | 
					
						
						|  | ) | 
					
						
						|  | class_freq_weight = class_freq.float() ** freq_weight_power | 
					
						
						|  | return class_freq_weight | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def gen_crop_transform_with_instance(crop_size, image_size, instance): | 
					
						
						|  | """ | 
					
						
						|  | Generate a CropTransform so that the cropping region contains | 
					
						
						|  | the center of the given instance. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | crop_size (tuple): h, w in pixels | 
					
						
						|  | image_size (tuple): h, w | 
					
						
						|  | instance (dict): an annotation dict of one instance, in Detectron2's | 
					
						
						|  | dataset format. | 
					
						
						|  | """ | 
					
						
						|  | crop_size = np.asarray(crop_size, dtype=np.int32) | 
					
						
						|  | bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) | 
					
						
						|  | center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 | 
					
						
						|  | assert ( | 
					
						
						|  | image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] | 
					
						
						|  | ), "The annotation bounding box is outside of the image!" | 
					
						
						|  | assert ( | 
					
						
						|  | image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] | 
					
						
						|  | ), "Crop size is larger than image size!" | 
					
						
						|  |  | 
					
						
						|  | min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) | 
					
						
						|  | max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) | 
					
						
						|  | max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) | 
					
						
						|  |  | 
					
						
						|  | y0 = np.random.randint(min_yx[0], max_yx[0] + 1) | 
					
						
						|  | x0 = np.random.randint(min_yx[1], max_yx[1] + 1) | 
					
						
						|  | return T.CropTransform(x0, y0, crop_size[1], crop_size[0]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def check_metadata_consistency(key, dataset_names): | 
					
						
						|  | """ | 
					
						
						|  | Check that the datasets have consistent metadata. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | key (str): a metadata key | 
					
						
						|  | dataset_names (list[str]): a list of dataset names | 
					
						
						|  |  | 
					
						
						|  | Raises: | 
					
						
						|  | AttributeError: if the key does not exist in the metadata | 
					
						
						|  | ValueError: if the given datasets do not have the same metadata values defined by key | 
					
						
						|  | """ | 
					
						
						|  | if len(dataset_names) == 0: | 
					
						
						|  | return | 
					
						
						|  | logger = logging.getLogger(__name__) | 
					
						
						|  | entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names] | 
					
						
						|  | for idx, entry in enumerate(entries_per_dataset): | 
					
						
						|  | if entry != entries_per_dataset[0]: | 
					
						
						|  | logger.error( | 
					
						
						|  | "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry)) | 
					
						
						|  | ) | 
					
						
						|  | logger.error( | 
					
						
						|  | "Metadata '{}' for dataset '{}' is '{}'".format( | 
					
						
						|  | key, dataset_names[0], str(entries_per_dataset[0]) | 
					
						
						|  | ) | 
					
						
						|  | ) | 
					
						
						|  | raise ValueError("Datasets have different metadata '{}'!".format(key)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def build_augmentation(cfg, is_train): | 
					
						
						|  | """ | 
					
						
						|  | Create a list of default :class:`Augmentation` from config. | 
					
						
						|  | Now it includes resizing and flipping. | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | list[Augmentation] | 
					
						
						|  | """ | 
					
						
						|  | if is_train: | 
					
						
						|  | min_size = cfg.INPUT.MIN_SIZE_TRAIN | 
					
						
						|  | max_size = cfg.INPUT.MAX_SIZE_TRAIN | 
					
						
						|  | sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING | 
					
						
						|  | else: | 
					
						
						|  | min_size = cfg.INPUT.MIN_SIZE_TEST | 
					
						
						|  | max_size = cfg.INPUT.MAX_SIZE_TEST | 
					
						
						|  | sample_style = "choice" | 
					
						
						|  | augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)] | 
					
						
						|  | if is_train and cfg.INPUT.RANDOM_FLIP != "none": | 
					
						
						|  | augmentation.append( | 
					
						
						|  | T.RandomFlip( | 
					
						
						|  | horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", | 
					
						
						|  | vertical=cfg.INPUT.RANDOM_FLIP == "vertical", | 
					
						
						|  | ) | 
					
						
						|  | ) | 
					
						
						|  | return augmentation | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | build_transform_gen = build_augmentation | 
					
						
						|  | """ | 
					
						
						|  | Alias for backward-compatibility. | 
					
						
						|  | """ | 
					
						
						|  |  |