Spaces:
Sleeping
Sleeping
| from typing import List, Optional, Union, Tuple | |
| import cv2 | |
| import numpy as np | |
| from supervision.detection.core import Detections | |
| from supervision.draw.color import Color, ColorPalette | |
| class BoxAnnotator: | |
| """ | |
| A class for drawing bounding boxes on an image using detections provided. | |
| Attributes: | |
| color (Union[Color, ColorPalette]): The color to draw the bounding box, | |
| can be a single color or a color palette | |
| thickness (int): The thickness of the bounding box lines, default is 2 | |
| text_color (Color): The color of the text on the bounding box, default is white | |
| text_scale (float): The scale of the text on the bounding box, default is 0.5 | |
| text_thickness (int): The thickness of the text on the bounding box, | |
| default is 1 | |
| text_padding (int): The padding around the text on the bounding box, | |
| default is 5 | |
| """ | |
| def __init__( | |
| self, | |
| color: Union[Color, ColorPalette] = ColorPalette.DEFAULT, | |
| thickness: int = 3, # 1 for seeclick 2 for mind2web and 3 for demo | |
| text_color: Color = Color.BLACK, | |
| text_scale: float = 0.5, # 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web | |
| text_thickness: int = 2, #1, # 2 for demo | |
| text_padding: int = 10, | |
| avoid_overlap: bool = True, | |
| ): | |
| self.color: Union[Color, ColorPalette] = color | |
| self.thickness: int = thickness | |
| self.text_color: Color = text_color | |
| self.text_scale: float = text_scale | |
| self.text_thickness: int = text_thickness | |
| self.text_padding: int = text_padding | |
| self.avoid_overlap: bool = avoid_overlap | |
| def annotate( | |
| self, | |
| scene: np.ndarray, | |
| detections: Detections, | |
| labels: Optional[List[str]] = None, | |
| skip_label: bool = False, | |
| image_size: Optional[Tuple[int, int]] = None, | |
| ) -> np.ndarray: | |
| """ | |
| Draws bounding boxes on the frame using the detections provided. | |
| Args: | |
| scene (np.ndarray): The image on which the bounding boxes will be drawn | |
| detections (Detections): The detections for which the | |
| bounding boxes will be drawn | |
| labels (Optional[List[str]]): An optional list of labels | |
| corresponding to each detection. If `labels` are not provided, | |
| corresponding `class_id` will be used as label. | |
| skip_label (bool): Is set to `True`, skips bounding box label annotation. | |
| Returns: | |
| np.ndarray: The image with the bounding boxes drawn on it | |
| Example: | |
| ```python | |
| import supervision as sv | |
| classes = ['person', ...] | |
| image = ... | |
| detections = sv.Detections(...) | |
| box_annotator = sv.BoxAnnotator() | |
| labels = [ | |
| f"{classes[class_id]} {confidence:0.2f}" | |
| for _, _, confidence, class_id, _ in detections | |
| ] | |
| annotated_frame = box_annotator.annotate( | |
| scene=image.copy(), | |
| detections=detections, | |
| labels=labels | |
| ) | |
| ``` | |
| """ | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| for i in range(len(detections)): | |
| x1, y1, x2, y2 = detections.xyxy[i].astype(int) | |
| class_id = ( | |
| detections.class_id[i] if detections.class_id is not None else None | |
| ) | |
| idx = class_id if class_id is not None else i | |
| color = ( | |
| self.color.by_idx(idx) | |
| if isinstance(self.color, ColorPalette) | |
| else self.color | |
| ) | |
| cv2.rectangle( | |
| img=scene, | |
| pt1=(x1, y1), | |
| pt2=(x2, y2), | |
| color=color.as_bgr(), | |
| thickness=self.thickness, | |
| ) | |
| if skip_label: | |
| continue | |
| text = ( | |
| f"{class_id}" | |
| if (labels is None or len(detections) != len(labels)) | |
| else labels[i] | |
| ) | |
| text_width, text_height = cv2.getTextSize( | |
| text=text, | |
| fontFace=font, | |
| fontScale=self.text_scale, | |
| thickness=self.text_thickness, | |
| )[0] | |
| if not self.avoid_overlap: | |
| text_x = x1 + self.text_padding | |
| text_y = y1 - self.text_padding | |
| text_background_x1 = x1 | |
| text_background_y1 = y1 - 2 * self.text_padding - text_height | |
| text_background_x2 = x1 + 2 * self.text_padding + text_width | |
| text_background_y2 = y1 | |
| # text_x = x1 - self.text_padding - text_width | |
| # text_y = y1 + self.text_padding + text_height | |
| # text_background_x1 = x1 - 2 * self.text_padding - text_width | |
| # text_background_y1 = y1 | |
| # text_background_x2 = x1 | |
| # text_background_y2 = y1 + 2 * self.text_padding + text_height | |
| else: | |
| text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 = get_optimal_label_pos(self.text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size) | |
| cv2.rectangle( | |
| img=scene, | |
| pt1=(text_background_x1, text_background_y1), | |
| pt2=(text_background_x2, text_background_y2), | |
| color=color.as_bgr(), | |
| thickness=cv2.FILLED, | |
| ) | |
| # import pdb; pdb.set_trace() | |
| box_color = color.as_rgb() | |
| luminance = 0.299 * box_color[0] + 0.587 * box_color[1] + 0.114 * box_color[2] | |
| text_color = (0,0,0) if luminance > 160 else (255,255,255) | |
| cv2.putText( | |
| img=scene, | |
| text=text, | |
| org=(text_x, text_y), | |
| fontFace=font, | |
| fontScale=self.text_scale, | |
| # color=self.text_color.as_rgb(), | |
| color=text_color, | |
| thickness=self.text_thickness, | |
| lineType=cv2.LINE_AA, | |
| ) | |
| return scene | |
| def box_area(box): | |
| return (box[2] - box[0]) * (box[3] - box[1]) | |
| def intersection_area(box1, box2): | |
| x1 = max(box1[0], box2[0]) | |
| y1 = max(box1[1], box2[1]) | |
| x2 = min(box1[2], box2[2]) | |
| y2 = min(box1[3], box2[3]) | |
| return max(0, x2 - x1) * max(0, y2 - y1) | |
| def IoU(box1, box2, return_max=True): | |
| intersection = intersection_area(box1, box2) | |
| union = box_area(box1) + box_area(box2) - intersection | |
| if box_area(box1) > 0 and box_area(box2) > 0: | |
| ratio1 = intersection / box_area(box1) | |
| ratio2 = intersection / box_area(box2) | |
| else: | |
| ratio1, ratio2 = 0, 0 | |
| if return_max: | |
| return max(intersection / union, ratio1, ratio2) | |
| else: | |
| return intersection / union | |
| def get_optimal_label_pos(text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size): | |
| """ check overlap of text and background detection box, and get_optimal_label_pos, | |
| pos: str, position of the text, must be one of 'top left', 'top right', 'outer left', 'outer right' TODO: if all are overlapping, return the last one, i.e. outer right | |
| Threshold: default to 0.3 | |
| """ | |
| def get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size): | |
| is_overlap = False | |
| for i in range(len(detections)): | |
| detection = detections.xyxy[i].astype(int) | |
| if IoU([text_background_x1, text_background_y1, text_background_x2, text_background_y2], detection) > 0.3: | |
| is_overlap = True | |
| break | |
| # check if the text is out of the image | |
| if text_background_x1 < 0 or text_background_x2 > image_size[0] or text_background_y1 < 0 or text_background_y2 > image_size[1]: | |
| is_overlap = True | |
| return is_overlap | |
| # if pos == 'top left': | |
| text_x = x1 + text_padding | |
| text_y = y1 - text_padding | |
| text_background_x1 = x1 | |
| text_background_y1 = y1 - 2 * text_padding - text_height | |
| text_background_x2 = x1 + 2 * text_padding + text_width | |
| text_background_y2 = y1 | |
| is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size) | |
| if not is_overlap: | |
| return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 | |
| # elif pos == 'outer left': | |
| text_x = x1 - text_padding - text_width | |
| text_y = y1 + text_padding + text_height | |
| text_background_x1 = x1 - 2 * text_padding - text_width | |
| text_background_y1 = y1 | |
| text_background_x2 = x1 | |
| text_background_y2 = y1 + 2 * text_padding + text_height | |
| is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size) | |
| if not is_overlap: | |
| return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 | |
| # elif pos == 'outer right': | |
| text_x = x2 + text_padding | |
| text_y = y1 + text_padding + text_height | |
| text_background_x1 = x2 | |
| text_background_y1 = y1 | |
| text_background_x2 = x2 + 2 * text_padding + text_width | |
| text_background_y2 = y1 + 2 * text_padding + text_height | |
| is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size) | |
| if not is_overlap: | |
| return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 | |
| # elif pos == 'top right': | |
| text_x = x2 - text_padding - text_width | |
| text_y = y1 - text_padding | |
| text_background_x1 = x2 - 2 * text_padding - text_width | |
| text_background_y1 = y1 - 2 * text_padding - text_height | |
| text_background_x2 = x2 | |
| text_background_y2 = y1 | |
| is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size) | |
| if not is_overlap: | |
| return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 | |
| return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 | |