| import cv2 | |
| import torch | |
| import os | |
| import numpy as np | |
| from torchvision import transforms | |
| def process_video(video_path, model, output_path, device): | |
| transform = transforms.Compose([ | |
| transforms.ToTensor() | |
| ]) | |
| cap = cv2.VideoCapture(str(video_path)) | |
| if not cap.isOpened(): | |
| raise IOError(f"Cannot open video: {video_path}") | |
| fourcc = cv2.VideoWriter_fourcc(*'XVID') | |
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
| frame_skip = 2 | |
| frame_count = 0 | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| resized_frame = cv2.resize(frame, (640, 480)) | |
| if frame_count % frame_skip == 0: | |
| rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB) | |
| img_tensor = transform(rgb_frame).to(device) | |
| with torch.no_grad(): | |
| prediction = model([img_tensor])[0] | |
| for box, score, label in zip(prediction["boxes"], prediction["scores"], prediction["labels"]): | |
| if score > 0.5: | |
| x1, y1, x2, y2 = map(int, box) | |
| cv2.rectangle(resized_frame, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
| cv2.putText(resized_frame, f"{label.item()}:{score:.2f}", (x1, y1 - 10), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) | |
| output_frame = cv2.resize(resized_frame, (width, height)) | |
| out.write(output_frame) | |
| frame_count += 1 | |
| cap.release() | |
| out.release() | |
| return output_path | |