Spaces:

spark-ds549
/

fal2022-videoanalysis-v2

Build error

fal2022-videoanalysis-v2 / video_object_extraction.py

Frank Pacini

copy repo

6155c0e over 1 year ago

No virus

7.04 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Created on Tue Nov 8 16:18:28 2022

	@author: ariellee
	"""

	# import argparse
	from pathlib import Path
	import cv2
	import numpy as np
	from imutils.video import FPS
	import pandas as pd
	import os


	# def str2bool(v):
	# """
	# Converts string to bool type, enables command line
	# arguments in the format of '--arg1 true --arg2 false'
	# """
	# if isinstance(v, bool):
	# return v
	# if v.lower() in ('yes', 'true', 't', 'y', '1'):
	# return True
	# elif v.lower() in ('no', 'false', 'f', 'n', '0'):
	# return False
	# else:
	# raise argparse.ArgumentTypeError('Boolean value expected (true/false)')


	# def get_args_parser():
	# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection',
	# add_help=False)

	# parser.add_argument('--output_dir', default='', type=str,
	# help='path to save the feature extraction results')

	# parser.add_argument('--output_name', default='video_out', type=str, help='name of csv \
	# file with object features and annotated video with object tracking \
	# and bounding boxes')

	# parser.add_argument('--video_path', default='short',
	# type=str, help='path to input video, do not include file extension')

	# parser.add_argument('--is_mp4', type=str2bool, default=False,
	# help='must be an mp4 file')

	# parser.add_argument('--save_csv', type=str2bool, default=True,
	# help='if true, a csv file of extracted features will be saved in output_dir')

	# parser.add_argument('--labels', default='coco.names', type=str,
	# help='labels for classes model can detect')

	# parser.add_argument('--weights', default='yolov3.weights', type=str,
	# help='weights for pretrained yolo model')

	# parser.add_argument('--cfg', default='yolov3.cfg', type=str,
	# help='model configuration parameters')
	# return parser


	def video_object_extraction(video_path, frames):
	'''
	Object detection and feature extraction with yolov3
	Uses darknet repo by pjreddie

	Returns: (1) csv file with extracted object features
	columns: frame_number, x_start, y_start, x_end, y_end, label, confidence
	(2) mp4 video with object bounding boxes and tracking

	'''
	# video_path = args.video_path + '.mp4'
	print('Reading from video {}...'.format(video_path))
	cap = cv2.VideoCapture(video_path)

	# get total number of frames in the video
	total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)

	# get height and width of video
	H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

	fps = FPS().start()
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')

	# (cols, rows) format
	# root = os.path.join(args.output_dir, args.output_name)
	wp = 'object_detection.mp4'
	g_fps = int(cap.get(cv2.CAP_PROP_FPS))
	writer = cv2.VideoWriter(wp, fourcc, g_fps, (W, H))
	# labels = open(args.labels).read().strip().split('\n')
	labels = open('coco.names').read().strip().split('\n')
	bbox_colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

	yolo = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
	out_layers = yolo.getLayerNames()
	layers = [out_layers[i - 1] for i in yolo.getUnconnectedOutLayers()]
	count = 0
	stat_list = []

	while count < total_frames:

	_, frame = cap.read()

	if count == 0 or count % frames == 0:
	blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True)
	yolo.setInput(blob)

	layer_outputs = yolo.forward(layers)
	boxes = []
	confidences = []
	classes = []

	# loop over layer outputs and objects detected
	for output in layer_outputs:
	for obj in output:

	# extract class and detection likelihood of current object
	scores = obj[5:]
	obj_class = np.argmax(scores)
	confidence = scores[obj_class]

	# get rid of bad predictions
	if confidence > 0.4:

	# scale bbox coordinates relative to frame size
	box = obj[0:4] * np.array([W, H, W, H])
	centerX, centerY, width, height = box.astype('int')

	# final coordiantes
	x = int(centerX - (width / 2))
	y = int(centerY - (height / 2))

	# update list of bbox coordinates, confidences, classes
	boxes.append([x, y, int(width), int(height)])
	confidences.append(float(confidence))
	classes.append(obj_class)

	# non-max suppression for overlapping bounding boxes
	idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.4)

	for i in idxs.flatten():

	# extract coordinates
	(x, y) = (boxes[i][0], boxes[i][1])
	(w, h) = (boxes[i][2], boxes[i][3])

	# set up + add bboxes to frame
	color = [int(c) for c in bbox_colors[classes[i]]]
	cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
	text = "{}: {:.4f}".format(labels[classes[i]], confidences[i])
	(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
	cv2.rectangle(frame, (x, y - text_height), (x + text_width, y), color, cv2.FILLED)
	cv2.putText(frame, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 38, 38), 2)

	# format of each csv file is: frame number / x / y / w / h / label / confidence
	stat_list.append([count, x, y, w, h, labels[classes[i]], confidences[i]])

	writer.write(frame)
	fps.update()
	count += 1

	df = pd.DataFrame(stat_list, columns=['frame', 'x_left', 'y_top', 'x_right',
	'y_bottom', 'label', 'confidence'])
	fps.stop()
	print('Time elapsed (seconds): {:.2f}'.format(fps.elapsed()))
	writer.release()
	cap.release()

	return wp, df


	# if __name__ == '__main__':

	# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', parents=[get_args_parser()])
	# args = parser.parse_args()

	# if not args.is_mp4:
	# print('Video must be an mp4 file.')
	# else:
	# if args.output_dir:
	# Path(args.output_dir).mkdir(parents=True, exist_ok=True)
	# main(args)