Spaces:

deepghs
/

anime_object_detection

Running

narugo1992

dev(narugo): update sizes

4818b14 over 1 year ago

3.75 kB

	import math
	from typing import List

	import numpy as np
	from PIL import Image


	def _yolo_xywh2xyxy(x: np.ndarray) -> np.ndarray:
	"""
	Copied from yolov8.

	Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
	top-left corner and (x2, y2) is the bottom-right corner.

	Args:
	x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
	Returns:
	y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
	"""
	y = np.copy(x)
	y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
	y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
	y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
	y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
	return y


	def _yolo_nms(boxes, scores, thresh: float = 0.7) -> List[int]:
	"""
	dets: ndarray, (num_boxes, 5)
	每一行表示一个bounding box：[xmin, ymin, xmax, ymax, score]
	其中xmin, ymin, xmax, ymax分别表示框的左上角和右下角坐标，score表示框的分数
	thresh: float
	两个框的IoU阈值
	"""
	x1 = boxes[:, 0]
	y1 = boxes[:, 1]
	x2 = boxes[:, 2]
	y2 = boxes[:, 3]
	areas = (x2 - x1 + 1) * (y2 - y1 + 1)

	# 按照score降序排列
	order = scores.argsort()[::-1]

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)
	# 计算其他所有框与当前框的IoU
	xx1 = np.maximum(x1[i], x1[order[1:]])
	yy1 = np.maximum(y1[i], y1[order[1:]])
	xx2 = np.minimum(x2[i], x2[order[1:]])
	yy2 = np.minimum(y2[i], y2[order[1:]])

	w = np.maximum(0.0, xx2 - xx1 + 1)
	h = np.maximum(0.0, yy2 - yy1 + 1)

	inter = w * h
	iou = inter / (areas[i] + areas[order[1:]] - inter)

	# 保留IoU小于阈值的框
	inds = np.where(iou <= thresh)[0]
	order = order[inds + 1]

	return keep


	def _image_preprocess(image: Image.Image, max_infer_size: int = 640, align: int = 32):
	old_width, old_height = image.width, image.height
	new_width, new_height = old_width, old_height
	r = max_infer_size / max(new_width, new_height)
	if r < 1:
	new_width, new_height = new_width * r, new_height * r
	new_width = int(math.ceil(new_width / align) * align)
	new_height = int(math.ceil(new_height / align) * align)
	image = image.resize((new_width, new_height))
	return image, (old_width, old_height), (new_width, new_height)


	def _xy_postprocess(x, y, old_size, new_size):
	old_width, old_height = old_size
	new_width, new_height = new_size
	x, y = x / new_width * old_width, y / new_height * old_height
	x = int(np.clip(x, a_min=0, a_max=old_width).round())
	y = int(np.clip(y, a_min=0, a_max=old_height).round())
	return x, y


	def _data_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, labels: List[str]):
	max_scores = output[4:, :].max(axis=0)
	output = output[:, max_scores > conf_threshold].transpose(1, 0)
	boxes = output[:, :4]
	scores = output[:, 4:]
	filtered_max_scores = scores.max(axis=1)

	if not boxes.size:
	return []

	boxes = _yolo_xywh2xyxy(boxes)
	idx = _yolo_nms(boxes, filtered_max_scores, thresh=iou_threshold)
	boxes, scores = boxes[idx], scores[idx]

	detections = []
	for box, score in zip(boxes, scores):
	x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
	x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
	max_score_id = score.argmax()
	detections.append(((x0, y0, x1, y1), labels[max_score_id], float(score[max_score_id])))

	return detections