Par-ity_Project / app /utils /video_processor.py
chenemii's picture
llm
bf56017
"""
Video processing and object detection module
"""
import cv2
import platform
import numpy as np
from tqdm import tqdm
from ultralytics import YOLO
class Detection:
"""Class to store detection results"""
def __init__(self, frame_idx, class_id, class_name, bbox, confidence):
self.frame_idx = frame_idx
self.class_id = class_id
self.class_name = class_name
self.bbox = bbox # [x1, y1, x2, y2]
self.confidence = confidence
def process_video(video_path, sample_rate=1):
"""
Process video and detect golfer, club, and ball
Args:
video_path (str): Path to the video file
sample_rate (int): Process every nth frame (default: 1 for all frames)
Returns:
tuple: (frames, detections)
- frames: List of processed frames
- detections: List of Detection objects
"""
model = YOLO("yolov8n.pt")
class_names = model.names
# On macOS ("Darwin"), the AVFoundation backend is often more reliable.
# For other systems, FFMPEG is a good choice.
backend = cv2.CAP_AVFOUNDATION if platform.system() == "Darwin" else cv2.CAP_FFMPEG
cap = cv2.VideoCapture(video_path, backend)
if not cap.isOpened():
backend_name = "AVFoundation" if platform.system() == "Darwin" else "FFMPEG"
print(f"Warning: Could not open video with {backend_name} backend. Trying default.")
cap = cv2.VideoCapture(video_path) # Fallback to default
if not cap.isOpened():
raise ValueError("Error opening video file with any available backend.")
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Process all frames by default
frames = []
detections = []
for frame_idx in tqdm(range(0, frame_count, sample_rate), desc="Processing frames"):
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
ret, frame = cap.read()
if not ret:
print(f"Warning: Could not read frame {frame_idx}")
continue
# Store original frame
frames.append(frame)
# Run YOLOv8 detection
results = model(frame)
# Process detection results
for result in results:
boxes = result.boxes
for box in boxes:
class_id = int(box.cls.item())
class_name = class_names[class_id]
bbox = box.xyxy[0].tolist()
confidence = box.conf.item()
detections.append(Detection(frame_idx, class_id, class_name, bbox, confidence))
cap.release()
return frames, detections