Spaces:
Runtime error
Runtime error
import numpy as np | |
from base64 import b64encode | |
import cv2 | |
import torch | |
import supervision as sv | |
from models.common import DetectMultiBackend, AutoShape | |
from utils.torch_utils import select_device | |
from utils.general import set_logging | |
from supervision import Detections as BaseDetections | |
from supervision.config import CLASS_NAME_DATA_FIELD | |
from IPython.display import HTML | |
# Extending Supervision's `Detections` to Handle YOLOv9 Results | |
class ExtendedDetections(BaseDetections): | |
def from_yolov9(cls, yolov9_results) -> 'ExtendedDetections': | |
xyxy, confidences, class_ids = [], [], [] | |
for det in yolov9_results.pred: | |
for *xyxy_coords, conf, cls_id in reversed(det): | |
xyxy.append(torch.stack(xyxy_coords).cpu().numpy()) | |
confidences.append(float(conf)) | |
class_ids.append(int(cls_id)) | |
class_names = np.array([yolov9_results.names[i] for i in class_ids]) | |
if not xyxy: | |
return cls.empty() | |
return cls( | |
xyxy=np.vstack(xyxy), | |
confidence=np.array(confidences), | |
class_id=np.array(class_ids), | |
data={CLASS_NAME_DATA_FIELD: class_names}, | |
) | |
# Loading the Model | |
set_logging(verbose=False) | |
device = select_device('cpu') | |
model = DetectMultiBackend(weights='best.pt', device=device, data='data/coco.yaml', fuse=True) | |
model = AutoShape(model) | |
# Function to Set YOLOv9 Post-processing Parameters | |
def prepare_yolov9(model, conf=0.2, iou=0.7, classes=None, agnostic_nms=False, max_det=1000): | |
model.conf = conf | |
model.iou = iou | |
model.classes = classes | |
model.agnostic = agnostic_nms | |
model.max_det = max_det | |
return model | |
# Function to Play Videos | |
def play(filename, width=500): | |
html = '' | |
video = open(filename, 'rb').read() | |
src = 'data:video/mp4;base64,' + b64encode(video).decode() | |
html += fr'<video width=500 controls autoplay loop><source src="%s" type="video/mp4"></video>' % src | |
return HTML(html) | |
# Constants | |
SOURCE_VIDEO_PATH = "test.mp4" | |
TARGET_VIDEO_PATH = "output.mp4" | |
# Simple Object Detection with YOLOv9 and Supervision | |
def prepare_model_and_video_info(model, config, source_path): | |
model = prepare_yolov9(model, **config) | |
video_info = sv.VideoInfo.from_video_path(source_path) | |
return model, video_info | |
def setup_annotator(): | |
return sv.BoundingBoxAnnotator(thickness=2) | |
def simple_annotate_frame(frame, model, annotator): | |
frame_rgb = frame[..., ::-1] | |
results = model(frame_rgb, size=640, augment=False) | |
detections = ExtendedDetections.from_yolov9(results) | |
# Display the frame with detections using cv2.imshow | |
annotated_frame = annotator.annotate(scene=frame.copy(), detections=detections) | |
cv2.imshow("Detections", annotated_frame) | |
cv2.waitKey(1) # Adjust the delay as needed | |
return annotated_frame | |
def simple_process_video(model, config=dict(conf=0.1, iou=0.45, classes=None,), source_path=SOURCE_VIDEO_PATH, target_path=TARGET_VIDEO_PATH): | |
model, _ = prepare_model_and_video_info(model, config, source_path) | |
annotator = setup_annotator() | |
def callback(frame: np.ndarray, index: int) -> np.ndarray: | |
return simple_annotate_frame(frame, model, annotator) | |
sv.process_video(source_path=source_path, target_path=target_path, callback=callback) | |
# Advanced Detection, Tracking, and Counting with YOLOv9 and Supervision | |
def setup_model_and_video_info(model, config, source_path): | |
model = prepare_yolov9(model, **config) | |
video_info = sv.VideoInfo.from_video_path(source_path) | |
return model, video_info | |
def create_byte_tracker(video_info): | |
return sv.ByteTrack(track_thresh=0.25, track_buffer=250, match_thresh=0.95, frame_rate=video_info.fps) | |
def setup_annotators(): | |
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=2, color_lookup=sv.ColorLookup.TRACK) | |
round_box_annotator = sv.RoundBoxAnnotator(thickness=2, color_lookup=sv.ColorLookup.TRACK) | |
corner_annotator = sv.BoxCornerAnnotator(thickness=2, color_lookup=sv.ColorLookup.TRACK) | |
trace_annotator = sv.TraceAnnotator(thickness=2, trace_length=50, color_lookup=sv.ColorLookup.TRACK) | |
label_annotator = sv.LabelAnnotator(text_scale=0.5, color_lookup=sv.ColorLookup.TRACK) | |
return [bounding_box_annotator, round_box_annotator, corner_annotator], trace_annotator, label_annotator | |
def setup_counting_zone(counting_zone, video_info): | |
if counting_zone == 'whole_frame': | |
polygon = np.array([[0, 0], [video_info.width-1, 0], [video_info.width-1, video_info.height-1], [0, video_info.height-1]]) | |
else: | |
polygon = np.array(counting_zone) | |
polygon_zone = sv.PolygonZone(polygon=polygon, frame_resolution_wh=(video_info.width, video_info.height), triggering_position=sv.Position.CENTER) | |
polygon_zone_annotator = sv.PolygonZoneAnnotator(polygon_zone, sv.Color.ROBOFLOW, thickness=2*(2 if counting_zone=='whole_frame' else 1), text_thickness=1, text_scale=0.5) | |
return polygon_zone, polygon_zone_annotator | |
def annotate_frame(frame, index, video_info, detections, byte_tracker, counting_zone, polygon_zone, polygon_zone_annotator, trace_annotator, annotators_list, label_annotator, show_labels, model): | |
detections = byte_tracker.update_with_detections(detections) | |
annotated_frame = frame.copy() | |
if counting_zone is not None: | |
is_inside_polygon = polygon_zone.trigger(detections) | |
detections = detections[is_inside_polygon] | |
annotated_frame = polygon_zone_annotator.annotate(annotated_frame) | |
annotated_frame = trace_annotator.annotate(scene=annotated_frame, detections=detections) | |
section_index = int(index / (video_info.total_frames / len(annotators_list))) | |
annotated_frame = annotators_list[section_index].annotate(scene=annotated_frame, detections=detections) | |
if show_labels: | |
annotated_frame = add_labels_to_frame(label_annotator, annotated_frame, detections, model) | |
return annotated_frame | |
def add_labels_to_frame(annotator, frame, detections, model): | |
labels = [f"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f}" for confidence, class_id, tracker_id in zip(detections.confidence, detections.class_id, detections.tracker_id)] | |
return annotator.annotate(scene=frame, detections=detections, labels=labels) | |
def process_video(model, config=dict(conf=0.1, iou=0.45, classes=True,), counting_zone=True, show_labels=True, source_path=SOURCE_VIDEO_PATH, target_path=TARGET_VIDEO_PATH): | |
model, video_info = setup_model_and_video_info(model, config, source_path) | |
byte_tracker = create_byte_tracker(video_info) | |
annotators_list, trace_annotator, label_annotator = setup_annotators() | |
polygon_zone, polygon_zone_annotator = setup_counting_zone(counting_zone, video_info) if counting_zone else (None, None) | |
def callback(frame: np.ndarray, index: int) -> np.ndarray: | |
frame_rgb = frame[..., ::-1] | |
results = model(frame_rgb, size=608, augment=False) | |
detections = ExtendedDetections.from_yolov9(results) | |
# Display the frame with detections using cv2.imshow | |
annotated_frame = annotate_frame(frame, index, video_info, detections, byte_tracker, counting_zone, polygon_zone, polygon_zone_annotator, trace_annotator, annotators_list, label_annotator, show_labels, model) | |
cv2.imshow("Detections", annotated_frame) | |
cv2.waitKey(1) # Adjust the delay as needed | |
return annotated_frame | |
sv.process_video(source_path=source_path, target_path=target_path, callback=callback) | |
# Detection, Tracking, and Counting in Full Frame | |
yolov9_config=dict(conf=0.3, iou=0.45, classes=[0, 2, 3]) | |
process_video(model, config=yolov9_config, counting_zone='whole_frame', show_labels=True, target_path='demo_file.mp4') | |
cv2.destroyAllWindows() |