#!/usr/bin/env python3 """this module contains the class Yolo""" import tensorflow.keras as K import numpy as np import os import cv2 def sigmoid(x): """sigmoid function""" return 1 / (1 + np.exp(-x)) class Yolo: """Yolo class""" def __init__(self, model_path, classes_path, class_t, nms_t, anchors): """Constructor method """ if not os.path.exists(model_path): raise FileNotFoundError("Wrong model file path") if not os.path.exists(classes_path): raise FileNotFoundError("Wrong classes file path") self.model = K.models.load_model(model_path) with open(classes_path, 'r') as f: self.class_names = [line[:-1] for line in f] self.class_t = class_t self.nms_t = nms_t self.anchors = anchors def process_outputs(self, outputs, image_size): """Process and normalize the output of the YoloV3 model outputs: list of numpy.ndarrays containing the predictions from the Darknet model for a single image image_size: numpy.ndarray containing the image’s original size [image_height, image_width] Returns a tuple of (boxes, box_confidences, box_class_probs)""" boxes = [] box_confidences = [] box_class_probs = [] img_h, img_w = image_size i = 0 for output in outputs: grid_h, grid_w, nb_box, _ = output.shape box_conf = sigmoid(output[:, :, :, 4:5]) box_prob = sigmoid(output[:, :, :, 5:]) box_confidences.append(box_conf) box_class_probs.append(box_prob) # t_x, t_y : x and y coordinates of the center pt of the anchor box # t_w, t_h : width and height of the anchor box t_x = output[:, :, :, 0] t_y = output[:, :, :, 1] t_w = output[:, :, :, 2] t_h = output[:, :, :, 3] # c_x, c_y : represents the grid of model c_x = np.arange(grid_w) c_x = np.tile(c_x, grid_h) c_x = c_x.reshape(grid_h, grid_w, 1) c_y = np.arange(grid_h) c_y = np.tile(c_y, grid_w) c_y = c_y.reshape(1, grid_h, grid_w).T # p_w, p_h : anchors dimensions in the c p_w = self.anchors[i, :, 0] p_h = self.anchors[i, :, 1] # yolo formula (get the coordinates in the prediction box) b_x = (sigmoid(t_x) + c_x) b_y = (sigmoid(t_y) + c_y) b_w = (np.exp(t_w) * p_w) b_h = (np.exp(t_h) * p_h) # normalize to the input size b_x = b_x / grid_w b_y = b_y / grid_h b_w = b_w / self.model.input.shape[1] b_h = b_h / self.model.input.shape[2] # scale to the image size (in pixels) # top left corner x1 = (b_x - b_w / 2) * img_w y1 = (b_y - b_h / 2) * img_h # bottom right corner x2 = (b_x + b_w / 2) * img_w y2 = (b_y + b_h / 2) * img_h # create the current box box = np.zeros((grid_h, grid_w, nb_box, 4)) box[:, :, :, 0] = x1 box[:, :, :, 1] = y1 box[:, :, :, 2] = x2 box[:, :, :, 3] = y2 boxes.append(box) i += 1 return boxes, box_confidences, box_class_probs def filter_boxes(self, boxes, box_confidences, box_class_probs): """Filter boxes based on class confidence score. Args: boxes: (list of numpy.ndarray) List of numpy.ndarrays with shape (grid_height, grid_width, anchor_boxes, 4) containing the processed boundary boxes for each output. box_confidences: (list of numpy.ndarray) List of np with shape (grid_height, grid_width, anchor_boxes, 1) box_class_probs: (list of numpy.ndarray) List of np with shape (grid_height, grid_width, anchor_boxes, classes) the processed box class probabilities for output. Returns: - filtered_boxes: (?,4) ? = num of boxes, 4 = coordinates - box_classes: (?,) ? = num of boxes and contains the class number - box_scores: (?,) ? = num of boxes and contains the box scores """ # Extract confidence scores for each class class_t = self.class_t scores = [] filtered_boxes = [] box_classes = [] box_scores = [] for i in range(len(boxes)): # box_conf = conf_prob for box i box_conf = box_confidences[i][..., 0] # box_class_prob = class_prob for box i box_class_prob = box_class_probs[i] # box_class_indices = class index with highest score for box i class_indices = np.argmax(box_class_prob, axis=-1) # class_prob = highest score for box i class_prob = np.max(box_class_prob, axis=-1) # score for box i score = box_conf * class_prob # Filter based on the class threshold # mask = boolean variable that tells if the score >= class_t mask = score >= class_t scores.append(score[mask]) filtered_boxes.append(boxes[i][mask]) box_classes.append(class_indices[mask]) box_scores.append(score[mask]) # Concatenate results scores = np.concatenate(scores) filtered_boxes = np.concatenate(filtered_boxes) box_classes = np.concatenate(box_classes) box_scores = np.concatenate(box_scores) return filtered_boxes, box_classes, box_scores def non_max_suppression(self, filtered_boxes, box_classes, box_scores): """Non-max suppression. filtered_boxes: (?, 4) contains all filtered bounding boxes box_classes: (?,) contains the class number for the class that filtered_boxes predicts, respectively box_scores: (?,) contains the box scores for each box in filtered_boxes, respectively returns a tuple of (box_predictions, predicted_box_classes, predicted_box_scores) """ nms_t = self.nms_t box_predictions = [] predicted_box_classes = [] predicted_box_scores = [] unique_classes = np.unique(box_classes) for cls in unique_classes: # Filter boxes, classes, and scores for the current class idx = np.where(box_classes == cls) boxes_of_cls = filtered_boxes[idx] classes_of_cls = box_classes[idx] scores_of_cls = box_scores[idx] # Sort by confidence scores from high to low order = scores_of_cls.argsort()[::-1] keep = [] x1 = boxes_of_cls[:, 0] y1 = boxes_of_cls[:, 1] x2 = boxes_of_cls[:, 2] y2 = boxes_of_cls[:, 3] # Calculate areas for all boxes in this class areas = (x2 - x1 + 1) * (y2 - y1 + 1) while order.shape[0] > 0: i = order[0] keep.append(i) # Intersection coord of the crnt box with the rest of boxes xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) # Intersection width and height w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) # Intersection area inter = w * h all_area = areas[i] + areas[order[1:]] - inter overlap = inter / all_area # First filter: boxes with overlap > nms_t inds = np.where(overlap <= nms_t)[0] # Second filter: remove boxes that match the current box order = order[inds + 1] box_predictions.append(boxes_of_cls[keep]) predicted_box_classes.append(classes_of_cls[keep]) predicted_box_scores.append(scores_of_cls[keep]) box_predictions = np.concatenate(box_predictions) predicted_box_classes = np.concatenate(predicted_box_classes) predicted_box_scores = np.concatenate(predicted_box_scores) return box_predictions, predicted_box_classes, predicted_box_scores @staticmethod def load_images(folder_path): """Load images from a folder""" if not os.path.exists(folder_path): return None images = [] paths = [] image_paths = os.listdir(folder_path) for image in image_paths: img = cv2.imread(os.path.join(folder_path, image)) if img is not None: images.append(img) paths.append(os.path.join('./yolo', image)) return (images, paths) def preprocess_images(self, images): """Resize and rescale the images before process""" input_w = self.model.input.shape[1] input_h = self.model.input.shape[2] image_shapes = [] pimages = [] for image in images: image_shapes.append(image.shape[:2]) pimage = cv2.resize(image, (input_w, input_h), interpolation=cv2.INTER_CUBIC) pimage = pimage / 255 pimages.append(pimage) return np.array(pimages), np.array(image_shapes) def show_boxes(self, image, boxes, box_classes, box_scores, file_name): """Show the boxes in an image""" imagec = image.copy() for idx, box in enumerate(boxes): top_left_x = int(box[0]) top_left_y = int(box[1]) bottom_right_x = int(box[2]) bottom_right_y = int(box[3]) class_name = self.class_names[box_classes[idx]] score = box_scores[idx] color = (255, 0, 0) cv2.rectangle(imagec, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), color, 2) text = class_name + " " + "{:.2f}".format(score) cv2.putText(imagec, text, (top_left_x, top_left_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA) cv2.imshow(file_name, imagec) key = cv2.waitKey(0) if key == ord('s'): if not os.path.exists('./detections'): os.mkdir('./detections') cv2.imwrite(os.path.join('./detections', file_name), imagec) cv2.destroyAllWindows() def predict(self, folder_path): """Predict the bounding boxes for all images in a folder""" predictions = [] images, image_paths = self.load_images(folder_path) pimages, image_shapes = self.preprocess_images(images) outputs = self.model.predict(pimages) for i in range(len(images)): # process_outputs poutouts = [out[i] for out in outputs] boxes, box_confidences, box_class_probs = self.process_outputs( poutouts, image_shapes[i]) # filter_boxes filtered_boxes, box_classes, box_scores = self.filter_boxes( boxes, box_confidences, box_class_probs) # non_max_suppression boxes, box_classes, box_scores = self.non_max_suppression( filtered_boxes, box_classes, box_scores) # show_boxes self.show_boxes(images[i], boxes, box_classes, box_scores, image_paths[i].split('/')[-1]) predictions.append((boxes, box_classes, box_scores)) return predictions, image_paths def predict_frame(self, frame): """yolo algorithm on frame""" # process frame image = cv2.resize(frame, (self.model.input.shape[1], self.model.input.shape[2]), interpolation=cv2.INTER_CUBIC) image = image / 255 image = np.expand_dims(image, axis=0) # predict outputs = self.model.predict(image) # process_outputs poutouts = [out[0] for out in outputs] boxes, box_confidences, box_class_probs = self.process_outputs( poutouts, frame.shape[:2]) # filter_boxes filtered_boxes, box_classes, box_scores = self.filter_boxes( boxes, box_confidences, box_class_probs) # non_max_suppression boxes, box_classes, box_scores = self.non_max_suppression( filtered_boxes, box_classes, box_scores) # show_boxes return boxes, box_classes, box_scores