Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
from ultralytics import YOLO, SAM | |
import argparse | |
def process_video(video_path, output_path): | |
# Load models - Ultralytics will handle caching automatically | |
yolo_model = YOLO("yolo11n.pt") | |
sam2_model = SAM("sam2_b.pt") | |
cap = cv2.VideoCapture(video_path) | |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# Detect people using YOLO model | |
yolo_results = yolo_model(frame) | |
# Filter for person class (adjust the class index if needed) | |
person_boxes = yolo_results[0].boxes[yolo_results[0].boxes.cls == 0].xyxy.cpu().numpy() | |
# Use SAM 2 for segmentation | |
sam_results = sam2_model(frame, bboxes=person_boxes) | |
# Combine all person masks | |
combined_mask = np.zeros(frame.shape[:2], dtype=bool) | |
for mask in sam_results[0].masks.data: | |
combined_mask |= mask.cpu().numpy() | |
# Apply the mask to the original frame | |
segmented_frame = frame.copy() | |
segmented_frame[~combined_mask] = [ | |
0, | |
255, | |
0, | |
] # Green background, you can change this | |
out.write(segmented_frame) | |
cap.release() | |
out.release() | |
def main(): | |
parser = argparse.ArgumentParser(description="Process video with YOLO and SAM2") | |
parser.add_argument("input_video", help="Path to the input video file") | |
parser.add_argument("output_video", help="Path to the output video file") | |
args = parser.parse_args() | |
process_video(args.input_video, args.output_video) | |
if __name__ == "__main__": | |
main() | |