Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
def prepare_input(image, input_shape): | |
input_height, input_width = input_shape | |
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
# Resize input image | |
input_img = cv2.resize(input_img, (input_width, input_height)) | |
# Scale input pixel values to 0 to 1 | |
input_img = input_img / 255.0 | |
input_img = input_img.transpose(2, 0, 1) | |
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32) | |
return input_tensor | |
def process_output(output, ori_shape, input_shape, conf_threshold, iou_threshold, classes=[]): | |
predictions = output[0] | |
# predictions = np.squeeze(output[0]) | |
# print(predictions.shape) | |
# print([p[5] for p in predictions]) | |
# exit() | |
# Filter out object confidence scores below threshold | |
# obj_conf = predictions[:, 4] | |
obj_conf = predictions[:, 6] | |
# predictions = predictions[obj_conf > conf_threshold] | |
# obj_conf = obj_conf[obj_conf > conf_threshold] | |
# print(obj_conf[0]) | |
# Multiply class confidence with bounding box confidence | |
# predictions[:, 5] *= obj_conf[:, np.newaxis] | |
# predictions[:, 6] *= obj_conf | |
# Get the scores | |
# scores = np.max(predictions[:, 5:], axis=1) | |
scores = predictions[:, 6] | |
# Filter out the objects with a low score | |
predictions = predictions[obj_conf > conf_threshold] | |
scores = scores[scores > conf_threshold] | |
if len(scores) == 0: | |
return [], [], [] | |
# Get the class with the highest confidence | |
# class_ids = np.argmax(predictions[:, 5:], axis=1) | |
class_ids = predictions[:, 5].astype(np.uint16) | |
# Extract boxes from predictions | |
boxes = predictions[:, 1:5] | |
# Scale boxes to original image dimensions | |
boxes = rescale_boxes(boxes, ori_shape, input_shape) | |
# Convert boxes to xyxy format | |
# boxes = xywh2xyxy(boxes) | |
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes | |
indices = nms(boxes, scores, iou_threshold) | |
dets = [] | |
for i in indices: | |
if len(classes) > 0: | |
if class_ids[i] in classes: | |
dets.append([*boxes[i], scores[i], class_ids[i]]) | |
else: | |
dets.append([*boxes[i], scores[i], class_ids[i]]) | |
# return boxes[indices], scores[indices], class_ids[indices] | |
return np.array(dets) | |
def rescale_boxes(boxes, ori_shape, input_shape): | |
input_height, input_width = input_shape | |
img_height, img_width = ori_shape | |
# Rescale boxes to original image dimensions | |
input_shape = np.array([input_width, input_height, input_width, input_height]) | |
boxes = np.divide(boxes, input_shape, dtype=np.float32) | |
boxes *= np.array([img_width, img_height, img_width, img_height]) | |
return boxes | |
class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', | |
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', | |
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', | |
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', | |
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', | |
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', | |
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', | |
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', | |
'scissors', 'teddy bear', 'hair drier', 'toothbrush'] | |
# Create a list of colors for each class where each color is a tuple of 3 integer values | |
rng = np.random.default_rng(3) | |
colors = rng.uniform(0, 255, size=(len(class_names), 3)) | |
def nms(boxes, scores, iou_threshold): | |
# Sort by score | |
sorted_indices = np.argsort(scores)[::-1] | |
keep_boxes = [] | |
while sorted_indices.size > 0: | |
# Pick the last box | |
box_id = sorted_indices[0] | |
keep_boxes.append(box_id) | |
# Compute IoU of the picked box with the rest | |
ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) | |
# Remove boxes with IoU over the threshold | |
keep_indices = np.where(ious < iou_threshold)[0] | |
# print(keep_indices.shape, sorted_indices.shape) | |
sorted_indices = sorted_indices[keep_indices + 1] | |
return keep_boxes | |
def compute_iou(box, boxes): | |
# Compute xmin, ymin, xmax, ymax for both boxes | |
xmin = np.maximum(box[0], boxes[:, 0]) | |
ymin = np.maximum(box[1], boxes[:, 1]) | |
xmax = np.minimum(box[2], boxes[:, 2]) | |
ymax = np.minimum(box[3], boxes[:, 3]) | |
# Compute intersection area | |
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) | |
# Compute union area | |
box_area = (box[2] - box[0]) * (box[3] - box[1]) | |
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) | |
union_area = box_area + boxes_area - intersection_area | |
# Compute IoU | |
iou = intersection_area / union_area | |
return iou | |
def xywh2xyxy(x): | |
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2) | |
y = np.copy(x) | |
y[..., 0] = x[..., 0] - x[..., 2] / 2 | |
y[..., 1] = x[..., 1] - x[..., 3] / 2 | |
y[..., 2] = x[..., 0] + x[..., 2] / 2 | |
y[..., 3] = x[..., 1] + x[..., 3] / 2 | |
return y | |
def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3): | |
mask_img = image.copy() | |
det_img = image.copy() | |
img_height, img_width = image.shape[:2] | |
size = min([img_height, img_width]) * 0.0006 | |
text_thickness = int(min([img_height, img_width]) * 0.001) | |
# Draw bounding boxes and labels of detections | |
for box, score, class_id in zip(boxes, scores, class_ids): | |
color = colors[class_id] | |
x1, y1, x2, y2 = box.astype(int) | |
# Draw rectangle | |
cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2) | |
# Draw fill rectangle in mask image | |
cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1) | |
label = class_names[class_id] | |
caption = f'{label} {int(score * 100)}%' | |
(tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX, | |
fontScale=size, thickness=text_thickness) | |
th = int(th * 1.2) | |
cv2.rectangle(det_img, (x1, y1), | |
(x1 + tw, y1 - th), color, -1) | |
cv2.rectangle(mask_img, (x1, y1), | |
(x1 + tw, y1 - th), color, -1) | |
cv2.putText(det_img, caption, (x1, y1), | |
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA) | |
cv2.putText(mask_img, caption, (x1, y1), | |
cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA) | |
cv2.imwrite('reult.png', mask_img) | |
exit() | |
# def draw_detections(image, boxes, scores, class_ids, draw_scores=True, mask_alpha=0.4): | |
# return draw_detections(image, boxes, scores, | |
# class_ids, mask_alpha) |