Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
import yolov5 | |
class CropVideo: | |
"""Base class for cropping a video frame-by-frame using various object | |
detection method such as YOLO or cv2.Tracker | |
Warning: This class should not be used directly. | |
Use derived classes instead. | |
Parameters: | |
method : name of the object detection method | |
model_path : path to object detection model | |
""" | |
def __init__(self, method=None): | |
self.method = method | |
def video_crop(self, video_frames): | |
"""Crops given list of frames by detecting object using different | |
methods such as YOLO or cv2.Tracker. | |
Args: | |
video_frames: A list of numpy arrays representing the input images | |
Returns: | |
A numpy array containing cropped frames | |
""" | |
raise NotImplementedError | |
class YOLOCrop(CropVideo): | |
"""Class for cropping a video frame-by-frame using YOLO object detection | |
method | |
Parameters : | |
cropping_model_path : path to object detection model | |
""" | |
def __init__(self, method=None, model_path=None): | |
super().__init__('yolo') | |
self.model_path = model_path or 'models/yolo/yolov5x.pt' | |
self.load_model(self.model_path) | |
def load_model(self, model_path): | |
"""Loads object detection model. | |
""" | |
self.model = yolov5.load(model_path) | |
self.model.classes = 0 | |
def get_yolo_bbox(self, frame): | |
"""Runs YOLO object detection on an input image. | |
Args: | |
frame: A [height, width, 3] numpy array representing the input image | |
Returns: | |
A list conating boundig box parameters [x_min, y_min, x_max, y_max] | |
""" | |
results = self.model(frame) | |
predictions = results.pred[0] | |
boxes = predictions[:, :4].numpy().astype(np.int32) | |
if len(boxes) == 0: | |
return [] | |
elif len(boxes) == 1: | |
return list(boxes[0]) | |
else: | |
area = [] | |
for i in boxes: | |
area.append(cv2.contourArea(np.array([[i[:2]], [i[2:]]]))) | |
largest_bbox = boxes[np.argmax(np.array(area))] | |
return list(largest_bbox) | |
def video_crop(self, video_frames): | |
"""Crops given list of frames by detecting object using YOLO | |
Args: | |
video_frames: A list of numpy arrays representing the input images | |
Returns: | |
A numpy array containing cropped frames | |
""" | |
x_width_start = [] | |
y_height_start = [] | |
x_width_end = [] | |
y_height_end = [] | |
frame_height, frame_width = 0, 0 | |
widths = [] | |
heights = [] | |
for frame in video_frames: | |
frame_height, frame_width, _ = frame.shape | |
bbox = self.get_yolo_bbox(frame) | |
if len(bbox) == 0: | |
continue | |
else: | |
x_width_start.append(int(max(bbox[0] - 100, 0))) | |
y_height_start.append(int(max(bbox[1] - 100, 0))) | |
x_width_end.append(int(min(bbox[2] + 100, frame.shape[1]))) | |
y_height_end.append(int(min(bbox[3] + 100, frame.shape[0]))) | |
widths.append(x_width_end[-1] - x_width_start[-1]) | |
heights.append(y_height_end[-1] - y_height_start[-1]) | |
width = np.percentile(np.array(widths), 95) | |
height = np.percentile(np.array(heights), 95) | |
box_len = int(max(width, height)) | |
cropped_frames = [] | |
for i in range(len(widths)): | |
frame = video_frames[i] | |
xs = x_width_start[i] | |
xe = x_width_start[i] + box_len | |
ys = y_height_start[i] | |
ye = y_height_start[i] + box_len | |
if ye > frame_height: | |
ye = frame_height | |
ys = max(0, ye - box_len) | |
if xe > frame_width: | |
xe = frame_width | |
xs = max(0, xe - box_len) | |
cropped = frame[int(ys): int(ye), int(xs): int(xe), :] | |
cropped_frames.append(np.array(cropped)) | |
return np.array(cropped_frames) | |
class TrackerCrop(YOLOCrop): | |
def __init__(self, model_path=None): | |
super().__init__(method='yolo') | |
self.tracker = cv2.TrackerMIL.create() | |
def expand_bbox(bbox, frame_shape): | |
"""Expands given bounding box by 50 pixels | |
Args: | |
bbox: A list [x,y, width, height] consits of bounding box | |
parameters of | |
object | |
frame_shape: (height, width) of a frame | |
""" | |
bbox[0] = max(bbox[0] - 50, 0) | |
bbox[1] = max(bbox[1] - 50, 0) | |
bbox[2] = min(bbox[3] + 50, frame_shape[1] - bbox[0] - 1) | |
bbox[3] = min(bbox[3] + 50, frame_shape[0] - bbox[1] - 1) | |
def pad_bbox(crop_frame, box_len): | |
"""Pads given cropped frame | |
Args: | |
crop_frame: A numpy array representing the cropped frame | |
box_len: An integer value representing maximum out of width and height | |
Returns: | |
A numpy array containing cropped frame with padding | |
""" | |
if box_len > crop_frame.shape[0] or box_len > crop_frame.shape[1]: | |
crop_frame = np.pad( | |
crop_frame, pad_width=( | |
(0, box_len - crop_frame.shape[0]), | |
(0, box_len - crop_frame.shape[1]), (0, 0)) | |
) | |
return crop_frame | |
def clip_coordinates(x, y, box_len, frame_shape): | |
"""Clips (x,y) coordinates representing the centre of bounding box | |
Args: | |
x: x-coordinate of the centre of bounding box | |
y: y-coordinate of the centre of bounding box | |
box_len: An integer value representing maximum out of width and height | |
frame_shape: (height, width) of a frame | |
Returns: | |
(x,y) clipped coordinates | |
""" | |
if x + box_len > frame_shape[1]: | |
diff = x + box_len - frame_shape[1] | |
x = max(0, x - diff) | |
if y + box_len > frame_shape[0]: | |
diff = y + box_len - frame_shape[0] | |
y = max(0, y - diff) | |
return (x, y) | |
def video_crop(self, video_frames): | |
"""Crops given list of frames by detecting object using cv2.Tracker | |
Args: | |
video_frames: A list of numpy arrays representing the input images | |
Returns: | |
A numpy array containing cropped frames | |
""" | |
frame = video_frames[0] | |
bbox = self.get_yolo_bbox(frame) | |
TrackerCrop.expand_bbox(bbox, frame.shape) | |
self.tracker.init(frame, bbox) | |
output_frame_list = [] | |
for frame in video_frames: | |
_, bbox = self.tracker.update(frame) | |
x, y, w, h = bbox | |
box_len = max(w, h) | |
x, y = TrackerCrop.clip_coordinates(x, y, box_len, frame.shape) | |
crop_frame = np.array(frame[y:y + box_len, x:x + box_len, :]) | |
crop_frame = TrackerCrop.pad_bbox(crop_frame, box_len) | |
output_frame_list.append(crop_frame) | |
output_frame_array = np.array(output_frame_list) | |
return output_frame_array | |