Spaces:
Build error
Build error
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Nov 8 16:18:28 2022 | |
@author: ariellee | |
""" | |
# import argparse | |
from pathlib import Path | |
import cv2 | |
import numpy as np | |
from imutils.video import FPS | |
import pandas as pd | |
import os | |
# def str2bool(v): | |
# """ | |
# Converts string to bool type, enables command line | |
# arguments in the format of '--arg1 true --arg2 false' | |
# """ | |
# if isinstance(v, bool): | |
# return v | |
# if v.lower() in ('yes', 'true', 't', 'y', '1'): | |
# return True | |
# elif v.lower() in ('no', 'false', 'f', 'n', '0'): | |
# return False | |
# else: | |
# raise argparse.ArgumentTypeError('Boolean value expected (true/false)') | |
# def get_args_parser(): | |
# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', | |
# add_help=False) | |
# parser.add_argument('--output_dir', default='', type=str, | |
# help='path to save the feature extraction results') | |
# parser.add_argument('--output_name', default='video_out', type=str, help='name of csv \ | |
# file with object features and annotated video with object tracking \ | |
# and bounding boxes') | |
# parser.add_argument('--video_path', default='short', | |
# type=str, help='path to input video, do not include file extension') | |
# parser.add_argument('--is_mp4', type=str2bool, default=False, | |
# help='must be an mp4 file') | |
# parser.add_argument('--save_csv', type=str2bool, default=True, | |
# help='if true, a csv file of extracted features will be saved in output_dir') | |
# parser.add_argument('--labels', default='coco.names', type=str, | |
# help='labels for classes model can detect') | |
# parser.add_argument('--weights', default='yolov3.weights', type=str, | |
# help='weights for pretrained yolo model') | |
# parser.add_argument('--cfg', default='yolov3.cfg', type=str, | |
# help='model configuration parameters') | |
# return parser | |
def video_object_extraction(video_path, frames): | |
''' | |
Object detection and feature extraction with yolov3 | |
Uses darknet repo by pjreddie | |
Returns: (1) csv file with extracted object features | |
columns: frame_number, x_start, y_start, x_end, y_end, label, confidence | |
(2) mp4 video with object bounding boxes and tracking | |
''' | |
# video_path = args.video_path + '.mp4' | |
print('Reading from video {}...'.format(video_path)) | |
cap = cv2.VideoCapture(video_path) | |
# get total number of frames in the video | |
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) | |
# get height and width of video | |
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
fps = FPS().start() | |
fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
# (cols, rows) format | |
# root = os.path.join(args.output_dir, args.output_name) | |
wp = 'object_detection.mp4' | |
g_fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
writer = cv2.VideoWriter(wp, fourcc, g_fps, (W, H)) | |
# labels = open(args.labels).read().strip().split('\n') | |
labels = open('coco.names').read().strip().split('\n') | |
bbox_colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8') | |
yolo = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights') | |
out_layers = yolo.getLayerNames() | |
layers = [out_layers[i - 1] for i in yolo.getUnconnectedOutLayers()] | |
count = 0 | |
stat_list = [] | |
while count < total_frames: | |
_, frame = cap.read() | |
if count == 0 or count % frames == 0: | |
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True) | |
yolo.setInput(blob) | |
layer_outputs = yolo.forward(layers) | |
boxes = [] | |
confidences = [] | |
classes = [] | |
# loop over layer outputs and objects detected | |
for output in layer_outputs: | |
for obj in output: | |
# extract class and detection likelihood of current object | |
scores = obj[5:] | |
obj_class = np.argmax(scores) | |
confidence = scores[obj_class] | |
# get rid of bad predictions | |
if confidence > 0.4: | |
# scale bbox coordinates relative to frame size | |
box = obj[0:4] * np.array([W, H, W, H]) | |
centerX, centerY, width, height = box.astype('int') | |
# final coordiantes | |
x = int(centerX - (width / 2)) | |
y = int(centerY - (height / 2)) | |
# update list of bbox coordinates, confidences, classes | |
boxes.append([x, y, int(width), int(height)]) | |
confidences.append(float(confidence)) | |
classes.append(obj_class) | |
# non-max suppression for overlapping bounding boxes | |
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.4) | |
for i in idxs.flatten(): | |
# extract coordinates | |
(x, y) = (boxes[i][0], boxes[i][1]) | |
(w, h) = (boxes[i][2], boxes[i][3]) | |
# set up + add bboxes to frame | |
color = [int(c) for c in bbox_colors[classes[i]]] | |
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) | |
text = "{}: {:.4f}".format(labels[classes[i]], confidences[i]) | |
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) | |
cv2.rectangle(frame, (x, y - text_height), (x + text_width, y), color, cv2.FILLED) | |
cv2.putText(frame, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 38, 38), 2) | |
# format of each csv file is: frame number / x / y / w / h / label / confidence | |
stat_list.append([count, x, y, w, h, labels[classes[i]], confidences[i]]) | |
writer.write(frame) | |
fps.update() | |
count += 1 | |
df = pd.DataFrame(stat_list, columns=['frame', 'x_left', 'y_top', 'x_right', | |
'y_bottom', 'label', 'confidence']) | |
fps.stop() | |
print('Time elapsed (seconds): {:.2f}'.format(fps.elapsed())) | |
writer.release() | |
cap.release() | |
return wp, df | |
# if __name__ == '__main__': | |
# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', parents=[get_args_parser()]) | |
# args = parser.parse_args() | |
# if not args.is_mp4: | |
# print('Video must be an mp4 file.') | |
# else: | |
# if args.output_dir: | |
# Path(args.output_dir).mkdir(parents=True, exist_ok=True) | |
# main(args) |