Spaces:

infocusp
/

PoseSync-Video-Matching-Tool

Runtime error

App Files Files Community

PoseSync-Video-Matching-Tool / src /crop_video.py

RishitJavia

Fix model dependency.

a62a4b8 over 1 year ago

raw

history blame contribute delete

7.15 kB

	import cv2
	import numpy as np
	import yolov5


	class CropVideo:
	"""Base class for cropping a video frame-by-frame using various object
	detection method such as YOLO or cv2.Tracker

	Warning: This class should not be used directly.
	Use derived classes instead.

	Parameters:
	method : name of the object detection method
	model_path : path to object detection model

	"""

	def __init__(self, method=None):
	self.method = method

	def video_crop(self, video_frames):
	"""Crops given list of frames by detecting object using different
	methods such as YOLO or cv2.Tracker.

	Args:
	video_frames: A list of numpy arrays representing the input images

	Returns:
	A numpy array containing cropped frames
	"""
	raise NotImplementedError


	class YOLOCrop(CropVideo):

	"""Class for cropping a video frame-by-frame using YOLO object detection
	method


	Parameters :
	cropping_model_path : path to object detection model

	"""

	def __init__(self, method=None, model_path=None):
	super().__init__('yolo')
	self.model_path = model_path or 'models/yolo/yolov5x.pt'
	self.load_model(self.model_path)

	def load_model(self, model_path):
	"""Loads object detection model.
	"""
	self.model = yolov5.load(model_path)
	self.model.classes = 0

	def get_yolo_bbox(self, frame):
	"""Runs YOLO object detection on an input image.

	Args:
	frame: A [height, width, 3] numpy array representing the input image

	Returns:
	A list conating boundig box parameters [x_min, y_min, x_max, y_max]
	"""

	results = self.model(frame)
	predictions = results.pred[0]

	boxes = predictions[:, :4].numpy().astype(np.int32)
	if len(boxes) == 0:
	return []
	elif len(boxes) == 1:
	return list(boxes[0])
	else:
	area = []
	for i in boxes:
	area.append(cv2.contourArea(np.array([[i[:2]], [i[2:]]])))
	largest_bbox = boxes[np.argmax(np.array(area))]
	return list(largest_bbox)

	def video_crop(self, video_frames):
	"""Crops given list of frames by detecting object using YOLO

	Args:
	video_frames: A list of numpy arrays representing the input images

	Returns:
	A numpy array containing cropped frames
	"""

	x_width_start = []
	y_height_start = []
	x_width_end = []
	y_height_end = []
	frame_height, frame_width = 0, 0

	widths = []
	heights = []
	for frame in video_frames:
	frame_height, frame_width, _ = frame.shape
	bbox = self.get_yolo_bbox(frame)

	if len(bbox) == 0:
	continue
	else:
	x_width_start.append(int(max(bbox[0] - 100, 0)))
	y_height_start.append(int(max(bbox[1] - 100, 0)))
	x_width_end.append(int(min(bbox[2] + 100, frame.shape[1])))
	y_height_end.append(int(min(bbox[3] + 100, frame.shape[0])))

	widths.append(x_width_end[-1] - x_width_start[-1])
	heights.append(y_height_end[-1] - y_height_start[-1])

	width = np.percentile(np.array(widths), 95)
	height = np.percentile(np.array(heights), 95)
	box_len = int(max(width, height))

	cropped_frames = []

	for i in range(len(widths)):
	frame = video_frames[i]
	xs = x_width_start[i]
	xe = x_width_start[i] + box_len
	ys = y_height_start[i]
	ye = y_height_start[i] + box_len

	if ye > frame_height:
	ye = frame_height
	ys = max(0, ye - box_len)

	if xe > frame_width:
	xe = frame_width
	xs = max(0, xe - box_len)

	cropped = frame[int(ys): int(ye), int(xs): int(xe), :]
	cropped_frames.append(np.array(cropped))

	return np.array(cropped_frames)


	class TrackerCrop(YOLOCrop):
	def __init__(self, model_path=None):
	super().__init__(method='yolo')
	self.tracker = cv2.TrackerMIL.create()

	@staticmethod
	def expand_bbox(bbox, frame_shape):
	"""Expands given bounding box by 50 pixels

	Args:
	bbox: A list [x,y, width, height] consits of bounding box
	parameters of
	object
	frame_shape: (height, width) of a frame

	"""
	bbox[0] = max(bbox[0] - 50, 0)
	bbox[1] = max(bbox[1] - 50, 0)
	bbox[2] = min(bbox[3] + 50, frame_shape[1] - bbox[0] - 1)
	bbox[3] = min(bbox[3] + 50, frame_shape[0] - bbox[1] - 1)

	@staticmethod
	def pad_bbox(crop_frame, box_len):
	"""Pads given cropped frame

	Args:
	crop_frame: A numpy array representing the cropped frame
	box_len: An integer value representing maximum out of width and height

	Returns:
	A numpy array containing cropped frame with padding
	"""
	if box_len > crop_frame.shape[0] or box_len > crop_frame.shape[1]:
	crop_frame = np.pad(
	crop_frame, pad_width=(
	(0, box_len - crop_frame.shape[0]),
	(0, box_len - crop_frame.shape[1]), (0, 0))
	)
	return crop_frame

	@staticmethod
	def clip_coordinates(x, y, box_len, frame_shape):
	"""Clips (x,y) coordinates representing the centre of bounding box

	Args:
	x: x-coordinate of the centre of bounding box
	y: y-coordinate of the centre of bounding box
	box_len: An integer value representing maximum out of width and height
	frame_shape: (height, width) of a frame

	Returns:
	(x,y) clipped coordinates
	"""
	if x + box_len > frame_shape[1]:
	diff = x + box_len - frame_shape[1]
	x = max(0, x - diff)
	if y + box_len > frame_shape[0]:
	diff = y + box_len - frame_shape[0]
	y = max(0, y - diff)

	return (x, y)

	def video_crop(self, video_frames):
	"""Crops given list of frames by detecting object using cv2.Tracker

	Args:
	video_frames: A list of numpy arrays representing the input images

	Returns:
	A numpy array containing cropped frames
	"""

	frame = video_frames[0]
	bbox = self.get_yolo_bbox(frame)
	TrackerCrop.expand_bbox(bbox, frame.shape)
	self.tracker.init(frame, bbox)
	output_frame_list = []
	for frame in video_frames:
	_, bbox = self.tracker.update(frame)
	x, y, w, h = bbox
	box_len = max(w, h)
	x, y = TrackerCrop.clip_coordinates(x, y, box_len, frame.shape)
	crop_frame = np.array(frame[y:y + box_len, x:x + box_len, :])
	crop_frame = TrackerCrop.pad_bbox(crop_frame, box_len)
	output_frame_list.append(crop_frame)

	output_frame_array = np.array(output_frame_list)

	return output_frame_array