#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Nov 8 16:18:28 2022 @author: ariellee """ # import argparse from pathlib import Path import cv2 import numpy as np from imutils.video import FPS import pandas as pd import os # def str2bool(v): # """ # Converts string to bool type, enables command line # arguments in the format of '--arg1 true --arg2 false' # """ # if isinstance(v, bool): # return v # if v.lower() in ('yes', 'true', 't', 'y', '1'): # return True # elif v.lower() in ('no', 'false', 'f', 'n', '0'): # return False # else: # raise argparse.ArgumentTypeError('Boolean value expected (true/false)') # def get_args_parser(): # parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', # add_help=False) # parser.add_argument('--output_dir', default='', type=str, # help='path to save the feature extraction results') # parser.add_argument('--output_name', default='video_out', type=str, help='name of csv \ # file with object features and annotated video with object tracking \ # and bounding boxes') # parser.add_argument('--video_path', default='short', # type=str, help='path to input video, do not include file extension') # parser.add_argument('--is_mp4', type=str2bool, default=False, # help='must be an mp4 file') # parser.add_argument('--save_csv', type=str2bool, default=True, # help='if true, a csv file of extracted features will be saved in output_dir') # parser.add_argument('--labels', default='coco.names', type=str, # help='labels for classes model can detect') # parser.add_argument('--weights', default='yolov3.weights', type=str, # help='weights for pretrained yolo model') # parser.add_argument('--cfg', default='yolov3.cfg', type=str, # help='model configuration parameters') # return parser def video_object_extraction(video_path, frames): ''' Object detection and feature extraction with yolov3 Uses darknet repo by pjreddie Returns: (1) csv file with extracted object features columns: frame_number, x_start, y_start, x_end, y_end, label, confidence (2) mp4 video with object bounding boxes and tracking ''' # video_path = args.video_path + '.mp4' print('Reading from video {}...'.format(video_path)) cap = cv2.VideoCapture(video_path) # get total number of frames in the video total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) # get height and width of video H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) fps = FPS().start() fourcc = cv2.VideoWriter_fourcc(*'mp4v') # (cols, rows) format # root = os.path.join(args.output_dir, args.output_name) wp = 'object_detection.mp4' g_fps = int(cap.get(cv2.CAP_PROP_FPS)) writer = cv2.VideoWriter(wp, fourcc, g_fps, (W, H)) # labels = open(args.labels).read().strip().split('\n') labels = open('coco.names').read().strip().split('\n') bbox_colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8') yolo = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights') out_layers = yolo.getLayerNames() layers = [out_layers[i - 1] for i in yolo.getUnconnectedOutLayers()] count = 0 stat_list = [] while count < total_frames: _, frame = cap.read() if count == 0 or count % frames == 0: blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True) yolo.setInput(blob) layer_outputs = yolo.forward(layers) boxes = [] confidences = [] classes = [] # loop over layer outputs and objects detected for output in layer_outputs: for obj in output: # extract class and detection likelihood of current object scores = obj[5:] obj_class = np.argmax(scores) confidence = scores[obj_class] # get rid of bad predictions if confidence > 0.4: # scale bbox coordinates relative to frame size box = obj[0:4] * np.array([W, H, W, H]) centerX, centerY, width, height = box.astype('int') # final coordiantes x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) # update list of bbox coordinates, confidences, classes boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) classes.append(obj_class) # non-max suppression for overlapping bounding boxes idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.4) for i in idxs.flatten(): # extract coordinates (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # set up + add bboxes to frame color = [int(c) for c in bbox_colors[classes[i]]] cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) text = "{}: {:.4f}".format(labels[classes[i]], confidences[i]) (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) cv2.rectangle(frame, (x, y - text_height), (x + text_width, y), color, cv2.FILLED) cv2.putText(frame, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 38, 38), 2) # format of each csv file is: frame number / x / y / w / h / label / confidence stat_list.append([count, x, y, w, h, labels[classes[i]], confidences[i]]) writer.write(frame) fps.update() count += 1 df = pd.DataFrame(stat_list, columns=['frame', 'x_left', 'y_top', 'x_right', 'y_bottom', 'label', 'confidence']) fps.stop() print('Time elapsed (seconds): {:.2f}'.format(fps.elapsed())) writer.release() cap.release() return wp, df # if __name__ == '__main__': # parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', parents=[get_args_parser()]) # args = parser.parse_args() # if not args.is_mp4: # print('Video must be an mp4 file.') # else: # if args.output_dir: # Path(args.output_dir).mkdir(parents=True, exist_ok=True) # main(args)