Spaces:
Running
Running
import math | |
from typing import List | |
import numpy as np | |
from PIL import Image | |
def _yolo_xywh2xyxy(x: np.ndarray) -> np.ndarray: | |
""" | |
Copied from yolov8. | |
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the | |
top-left corner and (x2, y2) is the bottom-right corner. | |
Args: | |
x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format. | |
Returns: | |
y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. | |
""" | |
y = np.copy(x) | |
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x | |
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y | |
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x | |
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y | |
return y | |
def _yolo_nms(boxes, scores, thresh: float = 0.7) -> List[int]: | |
""" | |
dets: ndarray, (num_boxes, 5) | |
每一行表示一个bounding box:[xmin, ymin, xmax, ymax, score] | |
其中xmin, ymin, xmax, ymax分别表示框的左上角和右下角坐标,score表示框的分数 | |
thresh: float | |
两个框的IoU阈值 | |
""" | |
x1 = boxes[:, 0] | |
y1 = boxes[:, 1] | |
x2 = boxes[:, 2] | |
y2 = boxes[:, 3] | |
areas = (x2 - x1 + 1) * (y2 - y1 + 1) | |
# 按照score降序排列 | |
order = scores.argsort()[::-1] | |
keep = [] | |
while order.size > 0: | |
i = order[0] | |
keep.append(i) | |
# 计算其他所有框与当前框的IoU | |
xx1 = np.maximum(x1[i], x1[order[1:]]) | |
yy1 = np.maximum(y1[i], y1[order[1:]]) | |
xx2 = np.minimum(x2[i], x2[order[1:]]) | |
yy2 = np.minimum(y2[i], y2[order[1:]]) | |
w = np.maximum(0.0, xx2 - xx1 + 1) | |
h = np.maximum(0.0, yy2 - yy1 + 1) | |
inter = w * h | |
iou = inter / (areas[i] + areas[order[1:]] - inter) | |
# 保留IoU小于阈值的框 | |
inds = np.where(iou <= thresh)[0] | |
order = order[inds + 1] | |
return keep | |
def _image_preprocess(image: Image.Image, max_infer_size: int = 640, align: int = 32): | |
old_width, old_height = image.width, image.height | |
new_width, new_height = old_width, old_height | |
r = max_infer_size / max(new_width, new_height) | |
if r < 1: | |
new_width, new_height = new_width * r, new_height * r | |
new_width = int(math.ceil(new_width / align) * align) | |
new_height = int(math.ceil(new_height / align) * align) | |
image = image.resize((new_width, new_height)) | |
return image, (old_width, old_height), (new_width, new_height) | |
def _xy_postprocess(x, y, old_size, new_size): | |
old_width, old_height = old_size | |
new_width, new_height = new_size | |
x, y = x / new_width * old_width, y / new_height * old_height | |
x = int(np.clip(x, a_min=0, a_max=old_width).round()) | |
y = int(np.clip(y, a_min=0, a_max=old_height).round()) | |
return x, y | |
def _data_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, labels: List[str]): | |
max_scores = output[4:, :].max(axis=0) | |
output = output[:, max_scores > conf_threshold].transpose(1, 0) | |
boxes = output[:, :4] | |
scores = output[:, 4:] | |
filtered_max_scores = scores.max(axis=1) | |
if not boxes.size: | |
return [] | |
boxes = _yolo_xywh2xyxy(boxes) | |
idx = _yolo_nms(boxes, filtered_max_scores, thresh=iou_threshold) | |
boxes, scores = boxes[idx], scores[idx] | |
detections = [] | |
for box, score in zip(boxes, scores): | |
x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size) | |
x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size) | |
max_score_id = score.argmax() | |
detections.append(((x0, y0, x1, y1), labels[max_score_id], float(score[max_score_id]))) | |
return detections | |