|
|
|
|
|
|
|
|
|
import cv2 |
|
import tensorflow as tf |
|
import numpy as np |
|
import time |
|
import random |
|
|
|
random.seed(42) |
|
|
|
OBJECT_DETECTOR_TFLITE = 'yolov4-tiny_416_quant.tflite' |
|
LABELS_FILE = 'coco-labels-2014_2017.txt' |
|
IMAGE_FILENAME = 'example_input.jpg' |
|
|
|
SCORE_THRESHOLD = 0.20 |
|
NMS_IOU_THRESHOLD = 0.5 |
|
INFERENCE_IMG_SIZE = 416 |
|
MAX_DETS = 100 |
|
|
|
ANCHORS = [[[81, 82], [135, 169], [344, 319]], [[23, 27], [37, 58], [81, 82]]] |
|
SIGMOID_FACTOR = [1.05, 1.05] |
|
NUM_ANCHORS = 3 |
|
STRIDES = [32, 16] |
|
GRID_SIZES = [int(INFERENCE_IMG_SIZE / s) for s in STRIDES] |
|
|
|
with open(LABELS_FILE, 'r') as f: |
|
COCO_CLASSES = [line.strip() for line in f.readlines()] |
|
|
|
interpreter = tf.lite.Interpreter(OBJECT_DETECTOR_TFLITE) |
|
interpreter.allocate_tensors() |
|
|
|
|
|
def gen_box_colors(): |
|
colors = [] |
|
for _ in range(len(COCO_CLASSES)): |
|
r = random.randint(100, 255) |
|
g = random.randint(100, 255) |
|
b = random.randint(100, 255) |
|
colors.append((r, g, b)) |
|
|
|
return colors |
|
|
|
|
|
BOX_COLORS = gen_box_colors() |
|
|
|
|
|
def load_image(filename): |
|
orig_image = cv2.imread(filename, 1) |
|
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) |
|
image = cv2.resize(image, (INFERENCE_IMG_SIZE, INFERENCE_IMG_SIZE)) |
|
image = np.expand_dims(image, axis=0) |
|
image = image / 255.0 |
|
return orig_image, image |
|
|
|
|
|
def np_sigmoid(x): |
|
return 1 / (1 + np.exp(-x)) |
|
|
|
|
|
def reciprocal_sigmoid(x): |
|
return -np.log(1 / x - 1) |
|
|
|
|
|
def decode_boxes_prediction(yolo_output): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
boxes_list = [] |
|
scores_list = [] |
|
classes_list = [] |
|
|
|
for idx, feats in enumerate(yolo_output): |
|
|
|
features = np.reshape(feats, (NUM_ANCHORS * GRID_SIZES[idx] ** 2, 85)) |
|
|
|
anchor = np.array(ANCHORS[idx]) |
|
factor = SIGMOID_FACTOR[idx] |
|
grid_size = GRID_SIZES[idx] |
|
stride = STRIDES[idx] |
|
|
|
cell_confidence = features[..., 4] |
|
logit_threshold = reciprocal_sigmoid(SCORE_THRESHOLD) |
|
over_threshold_list = np.where(cell_confidence > logit_threshold) |
|
|
|
if over_threshold_list[0].size > 0: |
|
indices = np.array(over_threshold_list[0]) |
|
|
|
box_positions = np.floor_divide(indices, 3) |
|
|
|
list_xy = np.array(np.divmod(box_positions, grid_size)).T |
|
list_xy = list_xy[..., ::-1] |
|
boxes_xy = np.reshape(list_xy, (int(list_xy.size / 2), 2)) |
|
|
|
outxy = features[indices, :2] |
|
|
|
|
|
centers = np_sigmoid(outxy * factor) - 0.5 * (factor - 1) |
|
centers += boxes_xy |
|
centers *= stride |
|
|
|
|
|
width_height = np.exp(features[indices, 2:4]) |
|
width_height *= anchor[np.divmod(indices, NUM_ANCHORS)[1]] |
|
|
|
boxes_list.append(np.stack([centers[:, 0] - width_height[:, 0]/2, |
|
centers[:, 1] - width_height[:, 1]/2, |
|
centers[:, 0] + width_height[:, 0]/2, |
|
centers[:, 1] + width_height[:, 1]/2], |
|
axis=1)) |
|
|
|
|
|
scores_list.append(np_sigmoid(features[indices, 4:5])) |
|
|
|
|
|
classes_list.append(np.argmax(features[indices, 5:], axis=1)) |
|
|
|
if len(boxes_list) > 0: |
|
boxes = np.concatenate(boxes_list, axis=0) |
|
scores = np.concatenate(scores_list, axis=0)[:, 0] |
|
classes = np.concatenate(classes_list, axis=0) |
|
|
|
return boxes, scores, classes |
|
else: |
|
return np.zeros((0, 4)), np.zeros((0)), np.zeros((0)) |
|
|
|
|
|
def decode_output(yolo_outputs, |
|
score_threshold=SCORE_THRESHOLD, |
|
iou_threshold=NMS_IOU_THRESHOLD): |
|
''' |
|
Decode output from YOLOv4 tiny in inference size referential (416x416) |
|
''' |
|
boxes, scores, classes = decode_boxes_prediction(yolo_outputs) |
|
|
|
|
|
inds = tf.image.non_max_suppression(boxes, scores, MAX_DETS, |
|
score_threshold=score_threshold, |
|
iou_threshold=iou_threshold) |
|
|
|
|
|
boxes = tf.gather(boxes, inds) |
|
scores = tf.gather(scores, inds) |
|
classes = tf.gather(classes, inds) |
|
|
|
return scores, boxes, classes |
|
|
|
|
|
def run_inference(interpreter, image, threshold=SCORE_THRESHOLD): |
|
|
|
input_details = interpreter.get_input_details() |
|
output_details = interpreter.get_output_details() |
|
input_scale, input_zero_point = input_details[0]["quantization"] |
|
image = image / input_scale + input_zero_point |
|
image = image.astype(np.int8) |
|
|
|
interpreter.set_tensor(input_details[0]['index'], image) |
|
interpreter.invoke() |
|
|
|
boxes = interpreter.get_tensor(output_details[0]['index']) |
|
boxes2 = interpreter.get_tensor(output_details[1]['index']) |
|
|
|
return [boxes, boxes2] |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
orig_image, processed_image = load_image(IMAGE_FILENAME) |
|
|
|
start = time.time() |
|
yolo_output = run_inference(interpreter, processed_image) |
|
end = time.time() |
|
|
|
scores, boxes, classes = decode_output(yolo_output) |
|
|
|
|
|
shp = orig_image.shape |
|
boxes = boxes.numpy() |
|
boxes /= INFERENCE_IMG_SIZE |
|
boxes *= np.array([shp[1], shp[0], shp[1], shp[0]]) |
|
|
|
boxes = boxes.astype(np.int32) |
|
|
|
print("Inference time", end - start, "ms") |
|
print("Detected", boxes.shape[0], "object(s)") |
|
print("Box coordinates:") |
|
|
|
for i in range(boxes.shape[0]): |
|
box = boxes[i, :] |
|
print(box, end=" ") |
|
class_name = COCO_CLASSES[classes[i].numpy()] |
|
score = scores[i].numpy() |
|
color = BOX_COLORS[classes[i]] |
|
print("class", class_name, end=" ") |
|
print("score", score) |
|
cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), |
|
color, 3) |
|
cv2.putText(orig_image, f"{class_name} {score:.2f}", |
|
(box[0], box[1] - 10), |
|
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) |
|
|
|
cv2.imwrite('example_output.jpg', orig_image) |
|
cv2.imshow('', orig_image) |
|
cv2.waitKey() |
|
|