Spaces:

spark-ds549
/

fal2022-videoanalysis-v2

Build error

fal2022-videoanalysis-v2

File size: 7,037 Bytes

6155c0e

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 8 16:18:28 2022

@author: ariellee
"""

# import argparse
from pathlib import Path
import cv2 
import numpy as np
from imutils.video import FPS
import pandas as pd
import os


# def str2bool(v):
#     """
#     Converts string to bool type, enables command line 
#     arguments in the format of '--arg1 true --arg2 false'
#     """
#     if isinstance(v, bool):
#         return v
#     if v.lower() in ('yes', 'true', 't', 'y', '1'):
#         return True
#     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
#         return False
#     else:
#         raise argparse.ArgumentTypeError('Boolean value expected (true/false)')
        

# def get_args_parser():
#     parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', 
#                                      add_help=False)
    
#     parser.add_argument('--output_dir', default='', type=str,
#                         help='path to save the feature extraction results')
    
#     parser.add_argument('--output_name', default='video_out', type=str, help='name of csv \
#                         file with object features and annotated video with object tracking \
#                             and bounding boxes')
    
#     parser.add_argument('--video_path', default='short', 
#                         type=str, help='path to input video, do not include file extension')
    
#     parser.add_argument('--is_mp4', type=str2bool, default=False,
#                         help='must be an mp4 file')
    
#     parser.add_argument('--save_csv', type=str2bool, default=True,
#                         help='if true, a csv file of extracted features will be saved in output_dir')
    
#     parser.add_argument('--labels', default='coco.names', type=str,
#                         help='labels for classes model can detect')
    
#     parser.add_argument('--weights', default='yolov3.weights', type=str,
#                         help='weights for pretrained yolo model')
    
#     parser.add_argument('--cfg', default='yolov3.cfg', type=str,
#                         help='model configuration parameters')
#     return parser


def video_object_extraction(video_path, frames):
    '''
    Object detection and feature extraction with yolov3
    Uses darknet repo by pjreddie
    
    Returns: (1) csv file with extracted object features 
                 columns: frame_number, x_start, y_start, x_end, y_end, label, confidence
             (2) mp4 video with object bounding boxes and tracking
             
    '''
    # video_path = args.video_path + '.mp4'
    print('Reading from video {}...'.format(video_path))
    cap = cv2.VideoCapture(video_path)
    
    # get total number of frames in the video
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    
    # get height and width of video
    H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    
    fps = FPS().start()
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    
    # (cols, rows) format
    # root = os.path.join(args.output_dir, args.output_name)
    wp = 'object_detection.mp4'
    g_fps = int(cap.get(cv2.CAP_PROP_FPS))
    writer = cv2.VideoWriter(wp, fourcc, g_fps, (W, H))
    # labels = open(args.labels).read().strip().split('\n')
    labels = open('coco.names').read().strip().split('\n')
    bbox_colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')
    
    yolo = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
    out_layers = yolo.getLayerNames()
    layers = [out_layers[i - 1] for i in yolo.getUnconnectedOutLayers()]
    count = 0
    stat_list = []
    
    while count < total_frames:
            
        _, frame = cap.read()

        if count == 0 or count % frames == 0:
            blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True)
            yolo.setInput(blob)
            
            layer_outputs = yolo.forward(layers)
            boxes = []
            confidences = []
            classes = []
    
            # loop over layer outputs and objects detected
            for output in layer_outputs:
                for obj in output:
                    
                    # extract class and detection likelihood of current object
                    scores = obj[5:]
                    obj_class = np.argmax(scores)
                    confidence = scores[obj_class]
    
                    # get rid of bad predictions
                    if confidence > 0.4:
                
                        # scale bbox coordinates relative to frame size
                        box = obj[0:4] * np.array([W, H, W, H])
                        centerX, centerY, width, height = box.astype('int')
                        
                        # final coordiantes
                        x = int(centerX - (width / 2))
                        y = int(centerY - (height / 2))
    
                        # update list of bbox coordinates, confidences, classes
                        boxes.append([x, y, int(width), int(height)])
                        confidences.append(float(confidence))
                        classes.append(obj_class)

            # non-max suppression for overlapping bounding boxes
            idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.4)

        for i in idxs.flatten():
            
            # extract coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # set up + add bboxes to frame
            color = [int(c) for c in bbox_colors[classes[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(labels[classes[i]], confidences[i])
            (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
            cv2.rectangle(frame, (x, y - text_height), (x + text_width, y), color, cv2.FILLED)
            cv2.putText(frame, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 38, 38), 2)
        
            # format of each csv file is: frame number / x / y / w / h / label / confidence
            stat_list.append([count, x, y, w, h, labels[classes[i]], confidences[i]])

        writer.write(frame)
        fps.update()
        count += 1
        
    df = pd.DataFrame(stat_list, columns=['frame', 'x_left', 'y_top', 'x_right',
                                          'y_bottom', 'label', 'confidence'])
    fps.stop()
    print('Time elapsed (seconds): {:.2f}'.format(fps.elapsed()))
    writer.release()
    cap.release()
    
    return wp, df


# if __name__ == '__main__':
    
#     parser = argparse.ArgumentParser('Wheelock evaluation script for classroom object detection', parents=[get_args_parser()])
#     args = parser.parse_args()
    
#     if not args.is_mp4:
#         print('Video must be an mp4 file.')
#     else:
#         if args.output_dir:
#             Path(args.output_dir).mkdir(parents=True, exist_ok=True)
#         main(args)