fal2022-videoanalysis-v2 / video_object_extraction.py
Frank Pacini
copy repo
6155c0e
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 8 16:18:28 2022
@author: ariellee
"""
# import argparse
from pathlib import Path
import cv2
import numpy as np
from imutils.video import FPS
import pandas as pd
import os
# def str2bool(v):
# """
# Converts string to bool type, enables command line
# arguments in the format of '--arg1 true --arg2 false'
# """
# if isinstance(v, bool):
# return v
# if v.lower() in ('yes', 'true', 't', 'y', '1'):
# return True
# elif v.lower() in ('no', 'false', 'f', 'n', '0'):
# return False
# else:
# raise argparse.ArgumentTypeError('Boolean value expected (true/false)')
# def get_args_parser():
# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection',
# add_help=False)
# parser.add_argument('--output_dir', default='', type=str,
# help='path to save the feature extraction results')
# parser.add_argument('--output_name', default='video_out', type=str, help='name of csv \
# file with object features and annotated video with object tracking \
# and bounding boxes')
# parser.add_argument('--video_path', default='short',
# type=str, help='path to input video, do not include file extension')
# parser.add_argument('--is_mp4', type=str2bool, default=False,
# help='must be an mp4 file')
# parser.add_argument('--save_csv', type=str2bool, default=True,
# help='if true, a csv file of extracted features will be saved in output_dir')
# parser.add_argument('--labels', default='coco.names', type=str,
# help='labels for classes model can detect')
# parser.add_argument('--weights', default='yolov3.weights', type=str,
# help='weights for pretrained yolo model')
# parser.add_argument('--cfg', default='yolov3.cfg', type=str,
# help='model configuration parameters')
# return parser
def video_object_extraction(video_path, frames):
'''
Object detection and feature extraction with yolov3
Uses darknet repo by pjreddie
Returns: (1) csv file with extracted object features
columns: frame_number, x_start, y_start, x_end, y_end, label, confidence
(2) mp4 video with object bounding boxes and tracking
'''
# video_path = args.video_path + '.mp4'
print('Reading from video {}...'.format(video_path))
cap = cv2.VideoCapture(video_path)
# get total number of frames in the video
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
# get height and width of video
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = FPS().start()
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# (cols, rows) format
# root = os.path.join(args.output_dir, args.output_name)
wp = 'object_detection.mp4'
g_fps = int(cap.get(cv2.CAP_PROP_FPS))
writer = cv2.VideoWriter(wp, fourcc, g_fps, (W, H))
# labels = open(args.labels).read().strip().split('\n')
labels = open('coco.names').read().strip().split('\n')
bbox_colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')
yolo = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
out_layers = yolo.getLayerNames()
layers = [out_layers[i - 1] for i in yolo.getUnconnectedOutLayers()]
count = 0
stat_list = []
while count < total_frames:
_, frame = cap.read()
if count == 0 or count % frames == 0:
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True)
yolo.setInput(blob)
layer_outputs = yolo.forward(layers)
boxes = []
confidences = []
classes = []
# loop over layer outputs and objects detected
for output in layer_outputs:
for obj in output:
# extract class and detection likelihood of current object
scores = obj[5:]
obj_class = np.argmax(scores)
confidence = scores[obj_class]
# get rid of bad predictions
if confidence > 0.4:
# scale bbox coordinates relative to frame size
box = obj[0:4] * np.array([W, H, W, H])
centerX, centerY, width, height = box.astype('int')
# final coordiantes
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update list of bbox coordinates, confidences, classes
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classes.append(obj_class)
# non-max suppression for overlapping bounding boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.4)
for i in idxs.flatten():
# extract coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# set up + add bboxes to frame
color = [int(c) for c in bbox_colors[classes[i]]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(labels[classes[i]], confidences[i])
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
cv2.rectangle(frame, (x, y - text_height), (x + text_width, y), color, cv2.FILLED)
cv2.putText(frame, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 38, 38), 2)
# format of each csv file is: frame number / x / y / w / h / label / confidence
stat_list.append([count, x, y, w, h, labels[classes[i]], confidences[i]])
writer.write(frame)
fps.update()
count += 1
df = pd.DataFrame(stat_list, columns=['frame', 'x_left', 'y_top', 'x_right',
'y_bottom', 'label', 'confidence'])
fps.stop()
print('Time elapsed (seconds): {:.2f}'.format(fps.elapsed()))
writer.release()
cap.release()
return wp, df
# if __name__ == '__main__':
# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', parents=[get_args_parser()])
# args = parser.parse_args()
# if not args.is_mp4:
# print('Video must be an mp4 file.')
# else:
# if args.output_dir:
# Path(args.output_dir).mkdir(parents=True, exist_ok=True)
# main(args)