Spaces:
Build error
Build error
File size: 7,037 Bytes
6155c0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 8 16:18:28 2022
@author: ariellee
"""
# import argparse
from pathlib import Path
import cv2
import numpy as np
from imutils.video import FPS
import pandas as pd
import os
# def str2bool(v):
# """
# Converts string to bool type, enables command line
# arguments in the format of '--arg1 true --arg2 false'
# """
# if isinstance(v, bool):
# return v
# if v.lower() in ('yes', 'true', 't', 'y', '1'):
# return True
# elif v.lower() in ('no', 'false', 'f', 'n', '0'):
# return False
# else:
# raise argparse.ArgumentTypeError('Boolean value expected (true/false)')
# def get_args_parser():
# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection',
# add_help=False)
# parser.add_argument('--output_dir', default='', type=str,
# help='path to save the feature extraction results')
# parser.add_argument('--output_name', default='video_out', type=str, help='name of csv \
# file with object features and annotated video with object tracking \
# and bounding boxes')
# parser.add_argument('--video_path', default='short',
# type=str, help='path to input video, do not include file extension')
# parser.add_argument('--is_mp4', type=str2bool, default=False,
# help='must be an mp4 file')
# parser.add_argument('--save_csv', type=str2bool, default=True,
# help='if true, a csv file of extracted features will be saved in output_dir')
# parser.add_argument('--labels', default='coco.names', type=str,
# help='labels for classes model can detect')
# parser.add_argument('--weights', default='yolov3.weights', type=str,
# help='weights for pretrained yolo model')
# parser.add_argument('--cfg', default='yolov3.cfg', type=str,
# help='model configuration parameters')
# return parser
def video_object_extraction(video_path, frames):
'''
Object detection and feature extraction with yolov3
Uses darknet repo by pjreddie
Returns: (1) csv file with extracted object features
columns: frame_number, x_start, y_start, x_end, y_end, label, confidence
(2) mp4 video with object bounding boxes and tracking
'''
# video_path = args.video_path + '.mp4'
print('Reading from video {}...'.format(video_path))
cap = cv2.VideoCapture(video_path)
# get total number of frames in the video
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
# get height and width of video
H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = FPS().start()
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# (cols, rows) format
# root = os.path.join(args.output_dir, args.output_name)
wp = 'object_detection.mp4'
g_fps = int(cap.get(cv2.CAP_PROP_FPS))
writer = cv2.VideoWriter(wp, fourcc, g_fps, (W, H))
# labels = open(args.labels).read().strip().split('\n')
labels = open('coco.names').read().strip().split('\n')
bbox_colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')
yolo = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
out_layers = yolo.getLayerNames()
layers = [out_layers[i - 1] for i in yolo.getUnconnectedOutLayers()]
count = 0
stat_list = []
while count < total_frames:
_, frame = cap.read()
if count == 0 or count % frames == 0:
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True)
yolo.setInput(blob)
layer_outputs = yolo.forward(layers)
boxes = []
confidences = []
classes = []
# loop over layer outputs and objects detected
for output in layer_outputs:
for obj in output:
# extract class and detection likelihood of current object
scores = obj[5:]
obj_class = np.argmax(scores)
confidence = scores[obj_class]
# get rid of bad predictions
if confidence > 0.4:
# scale bbox coordinates relative to frame size
box = obj[0:4] * np.array([W, H, W, H])
centerX, centerY, width, height = box.astype('int')
# final coordiantes
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update list of bbox coordinates, confidences, classes
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classes.append(obj_class)
# non-max suppression for overlapping bounding boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.4)
for i in idxs.flatten():
# extract coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# set up + add bboxes to frame
color = [int(c) for c in bbox_colors[classes[i]]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(labels[classes[i]], confidences[i])
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
cv2.rectangle(frame, (x, y - text_height), (x + text_width, y), color, cv2.FILLED)
cv2.putText(frame, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 38, 38), 2)
# format of each csv file is: frame number / x / y / w / h / label / confidence
stat_list.append([count, x, y, w, h, labels[classes[i]], confidences[i]])
writer.write(frame)
fps.update()
count += 1
df = pd.DataFrame(stat_list, columns=['frame', 'x_left', 'y_top', 'x_right',
'y_bottom', 'label', 'confidence'])
fps.stop()
print('Time elapsed (seconds): {:.2f}'.format(fps.elapsed()))
writer.release()
cap.release()
return wp, df
# if __name__ == '__main__':
# parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', parents=[get_args_parser()])
# args = parser.parse_args()
# if not args.is_mp4:
# print('Video must be an mp4 file.')
# else:
# if args.output_dir:
# Path(args.output_dir).mkdir(parents=True, exist_ok=True)
# main(args) |