supervision / app.py
tree3po's picture
Update app.py
2b85a9e verified
raw
history blame
3.32 kB
import cv2
import gradio as gr
import supervision as sv
from ultralytics import YOLO
from PIL import Image
import torch
import time
import numpy as np
import uuid
import spaces
print(YOLO)
"""| YOLOv8 | `yolov8n.pt` `yolov8s.pt` `yolov8m.pt` `yolov8l.pt` `yolov8x.pt` …
| YOLOv8-seg | `yolov8n-seg.pt` `yolov8s-seg.pt` `yolov8m-seg.pt` `yolov8l-seg.pt` `yolov8x-seg.pt` …
| YOLOv8-pose | `yolov8n-pose.pt` `yolov8s-pose.pt` `yolov8m-pose.pt` `yolov8l-pose.pt` `yolov8x-pose.pt` `yolov8x-pose…
| YOLOv8-obb | `yolov8n-obb.pt` `yolov8s-obb.pt` `yolov8m-obb.pt` `yolov8l-obb.pt` `yolov8x-obb.pt` …
| YOLOv8-cls | `yolov8n-cls.pt` `yolov8s-cls.pt` `yolov8m-cls.pt` `yolov8l-cls.pt` `yolov8x-cls.pt` """
ver=[6:11]
ltr=["n","s","m","1","x"]
tsk=["","seg","pose","obb","cls"]
#yolov8s.pt
modin=f"yolov{ver[2]}{ltr[1]}-{tsk[0]}.pt"
print(modin)
model = YOLO(modin)
def draw_box(image,det):
height, width, channels = image.shape
for ea in det.xyxy:
#bbox = convert_coords(ea, width, height)
#print(bbox)
start_point = ((int(ea[0]),int(ea[1])))
end_point = ((int(ea[2]),int(ea[3])))
color = (255, 0, 0)
thickness = 2
image = cv2.rectangle(image, start_point, end_point, color, thickness)
return image
@spaces.GPU
def stream_object_detection(video):
SUBSAMPLE=1
cap = cv2.VideoCapture(video)
# This means we will output mp4 videos
video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
fps = int(cap.get(cv2.CAP_PROP_FPS))
desired_fps = fps // SUBSAMPLE
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2
iterating, frame = cap.read()
n_frames = 0
output_video_name = f"output_{uuid.uuid4()}.mp4"
output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore
while iterating:
frame = cv2.resize( frame, (0,0), fx=0.5, fy=0.5)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
result = model(Image.fromarray(frame))[0]
detections = sv.Detections.from_ultralytics(result)
outp = draw_box(frame,detections)
frame = np.array(outp)
# Convert RGB to BGR
frame = frame[:, :, ::-1].copy()
output_video.write(frame)
batch = []
output_video.release()
yield output_video_name
output_video_name = f"output_{uuid.uuid4()}.mp4"
output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore
iterating, frame = cap.read()
n_frames += 1
with gr.Blocks() as app:
gr.HTML("<div style='font-size: 50px;font-weight: 800;'>Supervision</div><div style='font-size: 30px;'>Video Object Detection</div><div>Github:<a href='https://github.com/roboflow/supervision' target='_blank'>https://github.com/roboflow/supervision</a></div>")
#inp = gr.Image(type="filepath")
with gr.Row():
with gr.Column():
inp = gr.Video()
btn = gr.Button()
outp_v = gr.Video(label="Processed Video", streaming=True, autoplay=True)
outp_j = gr.JSON()
btn.click(stream_object_detection,inp,[outp_v,outp_j])
app.queue().launch()