VITRA / visualization /video_utils.py
arnoldland's picture
Initial commit
aae3ba1
# Utils for video processing, frame manipulation, and visualization.
import cv2
import numpy as np
from typing import List
import imageio
def rotate_frame(frame: np.ndarray) -> np.ndarray:
"""Rotate a frame 90 degrees clockwise."""
return cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
def center_crop_image(frame: np.ndarray, crop_percent: float = 1.0) -> np.ndarray:
"""
Center crop an image to a specified percentage of its original size.
Args:
frame (np.ndarray): Input image.
crop_percent (float): Percentage of original size to crop to (0 < crop_percent <= 1).
Returns:
np.ndarray: Cropped image.
"""
if crop_percent == 1.0:
return frame
# Get original dimensions
original_height, original_width = frame.shape[:2]
# Calculate new dimensions
new_width = int(original_width * crop_percent)
new_height = int(original_height * crop_percent)
# Calculate top-left corner of the cropping box
start_x = (original_width - new_width) // 2
start_y = (original_height - new_height) // 2
# Perform the crop
cropped_frame = frame[start_y:start_y + new_height, start_x:start_x + new_width]
return cropped_frame
def read_video_frames(
cap,
start_frame: int = None,
end_frame: int = None,
interval: int = 1,
rotate: bool = False,
crop_percent: float = 1.0
) -> List[np.ndarray]:
"""
Read frames from a video capture object with optional rotation and cropping.
Args:
cap: OpenCV VideoCapture object.
start_frame (int): Starting frame index.
end_frame (int): Ending frame index.
interval (int): Frame interval for sampling.
rotate (bool): Whether to rotate frames 90 degrees clockwise.
crop_percent (float): Center crop percentage.
Returns:
List[np.ndarray]: List of frames.
"""
frame_count = 0
frame_list = []
# If a start frame is specified, move the video pointer
if start_frame is not None:
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
frame_count += start_frame
# If no end frame is specified, read until the end of the video
if end_frame is None:
end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Read frames in a loop
while frame_count < end_frame:
ret, frame = cap.read()
if not ret:
break
# Process frames at the specified interval
if frame_count % interval == 0:
# Optionally rotate the frame
if rotate:
frame = rotate_frame(frame)
# Optionally center crop the frame
frame = center_crop_image(frame, crop_percent=crop_percent)
frame_list.append(frame)
frame_count += 1
return frame_list
def save_to_video(frames: List[np.ndarray], output_path: str, fps: int = 30):
"""
Save a list of frames to a video file.
Args:
frames (List[np.ndarray]): List of frames.
output_path (str): Output video path.
fps (int): Frames per second.
"""
imageio.mimsave(output_path, frames, fps=fps, codec='libx264')
def resize_frames_to_long_side(frames: List[np.ndarray], target_long_side: int) -> List[np.ndarray]:
"""
Resize frames so the longer side matches the target size, preserving aspect ratio.
Args:
frames (List[np.ndarray]): List of frames.
target_long_side (int): Desired length of the longer side.
Returns:
List[np.ndarray]: Resized frames.
"""
# If target_long_side is None, return original frames without resizing
if target_long_side is None:
return frames
resized_frames = []
# Loop over each frame
for frame in frames:
height, width = frame.shape[:2]
# Determine the scaling factor and new dimensions
if width > height:
scale_factor = target_long_side / width
else:
scale_factor = target_long_side / height
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# Resize the frame while maintaining the aspect ratio
resized_frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_AREA)
resized_frames.append(resized_frame)
return resized_frames
def sample_frames_evenly(video_frames: List[np.ndarray], num_frames: int) -> List[np.ndarray]:
"""
Sample frames evenly from a video sequence.
Args:
video_frames (List[np.ndarray]): List of frames.
num_frames (int): Number of frames to sample.
Returns:
List[np.ndarray]: Sampled frames.
"""
total = len(video_frames)
if num_frames >= total:
return video_frames.copy()
indices = np.linspace(0, total - 1, num=num_frames, dtype=int)
return [video_frames[i] for i in indices]
def wrap_text(text: str, max_width: int, font, font_scale: float) -> List[str]:
"""
Wraps text to fit within a given width.
Args:
text (str): The text to wrap.
max_width (int): The maximum width in pixels.
font: The font to use.
font_scale (float): The scale of the font.
Returns:
List of lines of wrapped text.
"""
words = text.split(' ')
lines = []
current_line = ''
for word in words:
test_line = current_line + word + ' '
# Measure the width of the line
size = cv2.getTextSize(test_line, font, font_scale, 1)[0]
if size[0] > max_width:
lines.append(current_line.strip())
current_line = word + ' '
else:
current_line = test_line
if current_line:
lines.append(current_line.strip())
return lines
def add_overlay_text(frame: np.ndarray, caption: str) -> np.ndarray:
"""
Add overlay text to a frame.
Args:
frame (np.ndarray): Input image.
caption (str): Text to overlay.
Returns:
np.ndarray: Frame with text.
"""
w = frame.shape[1] # Width of the frame
y0, dy = 30, 20 # Starting Y position and line height
for i, line in enumerate(wrap_text(caption, w, cv2.FONT_HERSHEY_SIMPLEX, 1.0)):
y = y0 + i * dy
cv2.putText(frame, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
return frame