|
|
|
|
|
""" |
|
|
YOLO Object Detection with Gradio Interface |
|
|
Optimized for Hugging Face Spaces deployment |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from ultralytics import YOLO |
|
|
from PIL import Image |
|
|
import torch |
|
|
import spaces |
|
|
import os |
|
|
import tempfile |
|
|
|
|
|
|
|
|
models = {} |
|
|
current_model_size = 'nano' |
|
|
|
|
|
def load_model(model_size='nano'): |
|
|
""" |
|
|
Load YOLO model based on selected size |
|
|
""" |
|
|
global models, current_model_size |
|
|
|
|
|
model_names = { |
|
|
'nano': 'yolov8n.pt', |
|
|
'small': 'yolov8s.pt', |
|
|
'medium': 'yolov8m.pt', |
|
|
'large': 'yolov8l.pt', |
|
|
'xlarge': 'yolov8x.pt' |
|
|
} |
|
|
|
|
|
model_name = model_names.get(model_size, 'yolov8n.pt') |
|
|
|
|
|
|
|
|
if model_size not in models: |
|
|
print(f"Loading {model_name}...") |
|
|
models[model_size] = YOLO(model_name) |
|
|
current_model_size = model_size |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
return f"β
Model {model_name} loaded successfully! (GPU enabled)" |
|
|
else: |
|
|
return f"β
Model {model_name} loaded successfully! (CPU mode)" |
|
|
else: |
|
|
current_model_size = model_size |
|
|
return f"β
Model {model_name} already loaded!" |
|
|
|
|
|
|
|
|
@spaces.GPU(duration=60) |
|
|
def detect_image(input_image, model_size, conf_threshold=0.25, iou_threshold=0.45): |
|
|
""" |
|
|
Perform object detection on a single image |
|
|
""" |
|
|
if model_size not in models: |
|
|
load_model(model_size) |
|
|
|
|
|
model = models[model_size] |
|
|
|
|
|
if input_image is None: |
|
|
return None, "No image provided" |
|
|
|
|
|
|
|
|
if isinstance(input_image, Image.Image): |
|
|
input_image = np.array(input_image) |
|
|
|
|
|
|
|
|
results = model(input_image, conf=conf_threshold, iou=iou_threshold) |
|
|
|
|
|
|
|
|
annotated_image = results[0].plot() |
|
|
|
|
|
|
|
|
detections = [] |
|
|
for r in results: |
|
|
if r.boxes is not None: |
|
|
for box in r.boxes: |
|
|
if box.cls is not None: |
|
|
class_id = int(box.cls) |
|
|
class_name = model.names[class_id] |
|
|
confidence = float(box.conf) |
|
|
bbox = box.xyxy[0].tolist() |
|
|
detections.append({ |
|
|
'class': class_name, |
|
|
'confidence': f"{confidence:.2%}", |
|
|
'bbox': [int(x) for x in bbox] |
|
|
}) |
|
|
|
|
|
|
|
|
summary = f"Found {len(detections)} object(s)\n\n" |
|
|
if detections: |
|
|
|
|
|
class_counts = {} |
|
|
for det in detections: |
|
|
class_name = det['class'] |
|
|
if class_name not in class_counts: |
|
|
class_counts[class_name] = 0 |
|
|
class_counts[class_name] += 1 |
|
|
|
|
|
summary += "Summary by class:\n" |
|
|
for class_name, count in class_counts.items(): |
|
|
summary += f" β’ {class_name}: {count}\n" |
|
|
|
|
|
summary += "\nDetailed detections:\n" |
|
|
for i, det in enumerate(detections, 1): |
|
|
summary += f"{i}. {det['class']} ({det['confidence']})\n" |
|
|
|
|
|
return annotated_image, summary |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
|
def detect_video(input_video, model_size, conf_threshold=0.25, iou_threshold=0.45, max_frames=300): |
|
|
""" |
|
|
Perform object detection on video |
|
|
""" |
|
|
if model_size not in models: |
|
|
load_model(model_size) |
|
|
|
|
|
model = models[model_size] |
|
|
|
|
|
if input_video is None: |
|
|
return None, "No video provided" |
|
|
|
|
|
|
|
|
cap = cv2.VideoCapture(input_video) |
|
|
fps = int(cap.get(cv2.CAP_PROP_FPS)) |
|
|
if fps == 0: |
|
|
fps = 25 |
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
|
|
|
|
|
|
if max_frames and total_frames > max_frames: |
|
|
total_frames = max_frames |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file: |
|
|
output_path = tmp_file.name |
|
|
|
|
|
|
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
|
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) |
|
|
|
|
|
frame_count = 0 |
|
|
detected_objects = set() |
|
|
|
|
|
|
|
|
def progress_callback(current, total): |
|
|
return (current / total) if total > 0 else 0 |
|
|
|
|
|
|
|
|
progress = gr.Progress() |
|
|
while cap.isOpened() and frame_count < total_frames: |
|
|
ret, frame = cap.read() |
|
|
if not ret: |
|
|
break |
|
|
|
|
|
|
|
|
results = model(frame, conf=conf_threshold, iou=iou_threshold) |
|
|
|
|
|
|
|
|
for r in results: |
|
|
if r.boxes is not None: |
|
|
for box in r.boxes: |
|
|
if box.cls is not None: |
|
|
class_id = int(box.cls) |
|
|
detected_objects.add(model.names[class_id]) |
|
|
|
|
|
|
|
|
annotated_frame = results[0].plot() |
|
|
|
|
|
|
|
|
out.write(annotated_frame) |
|
|
frame_count += 1 |
|
|
|
|
|
|
|
|
if frame_count % 10 == 0: |
|
|
progress(frame_count / total_frames, desc=f"Processing frame {frame_count}/{total_frames}") |
|
|
|
|
|
|
|
|
cap.release() |
|
|
out.release() |
|
|
|
|
|
|
|
|
summary = f"Processed {frame_count} frames\n" |
|
|
summary += f"Detected objects: {', '.join(sorted(detected_objects))}" if detected_objects else "No objects detected" |
|
|
|
|
|
return output_path, summary |
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
with gr.Blocks( |
|
|
title="YOLO Object Detection", |
|
|
theme=gr.themes.Soft(), |
|
|
css=""" |
|
|
.gradio-container { |
|
|
max-width: 1200px !important; |
|
|
} |
|
|
#title { |
|
|
text-align: center; |
|
|
margin-bottom: 1rem; |
|
|
} |
|
|
""" |
|
|
) as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
<div id="title"> |
|
|
|
|
|
# π― YOLO Real-Time Object Detection |
|
|
|
|
|
<p>Powered by <b>Ultralytics YOLOv8</b> - State-of-the-art object detection in your browser!</p> |
|
|
|
|
|
[](https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME?duplicate=true) |
|
|
[](https://github.com/ultralytics/ultralytics) |
|
|
[](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) |
|
|
|
|
|
</div> |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tabs() as tabs: |
|
|
|
|
|
with gr.TabItem("π· Image Detection", id=0): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
image_input = gr.Image( |
|
|
label="Upload Image", |
|
|
type="numpy", |
|
|
elem_id="image_input" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
image_model_size = gr.Dropdown( |
|
|
choices=['nano', 'small', 'medium', 'large', 'xlarge'], |
|
|
value='nano', |
|
|
label="Model Size", |
|
|
info="Larger = more accurate but slower" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
image_conf = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
value=0.25, |
|
|
step=0.05, |
|
|
label="Confidence Threshold", |
|
|
info="Higher = fewer but more confident detections" |
|
|
) |
|
|
image_iou = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
value=0.45, |
|
|
step=0.05, |
|
|
label="IoU Threshold", |
|
|
info="Higher = less overlap between boxes" |
|
|
) |
|
|
|
|
|
image_button = gr.Button("π Detect Objects", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(): |
|
|
image_output = gr.Image(label="Detection Result", elem_id="image_output") |
|
|
image_text_output = gr.Textbox( |
|
|
label="Detection Details", |
|
|
lines=10, |
|
|
max_lines=20 |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["https://ultralytics.com/images/bus.jpg"], |
|
|
["https://ultralytics.com/images/zidane.jpg"], |
|
|
], |
|
|
inputs=image_input, |
|
|
label="Try these examples" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("π₯ Video Detection", id=1): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
video_input = gr.Video( |
|
|
label="Upload Video", |
|
|
elem_id="video_input" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
video_model_size = gr.Dropdown( |
|
|
choices=['nano', 'small', 'medium'], |
|
|
value='nano', |
|
|
label="Model Size", |
|
|
info="Nano recommended for videos" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
video_conf = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
value=0.25, |
|
|
step=0.05, |
|
|
label="Confidence Threshold" |
|
|
) |
|
|
video_iou = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
value=0.45, |
|
|
step=0.05, |
|
|
label="IoU Threshold" |
|
|
) |
|
|
|
|
|
max_frames = gr.Slider( |
|
|
minimum=10, |
|
|
maximum=300, |
|
|
value=100, |
|
|
step=10, |
|
|
label="Max Frames to Process", |
|
|
info="Limit for Spaces resources" |
|
|
) |
|
|
|
|
|
video_button = gr.Button("π¬ Process Video", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(): |
|
|
video_output = gr.Video( |
|
|
label="Processed Video", |
|
|
elem_id="video_output" |
|
|
) |
|
|
video_text_output = gr.Textbox( |
|
|
label="Processing Summary", |
|
|
lines=4 |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("βΉοΈ About", id=2): |
|
|
gr.Markdown( |
|
|
""" |
|
|
## About YOLO (You Only Look Once) |
|
|
|
|
|
YOLO is a state-of-the-art, real-time object detection system. This app uses **YOLOv8** from Ultralytics, |
|
|
the latest evolution building on Joseph Redmon's original YOLO architecture. |
|
|
|
|
|
### π Model Sizes |
|
|
|
|
|
| Model | Parameters | Speed (CPU) | mAP | Use Case | |
|
|
|-------|-----------|-------------|-----|----------| |
|
|
| Nano | 3.2M | ~100ms | 37.3 | Real-time, edge devices | |
|
|
| Small | 11.2M | ~200ms | 44.9 | Balanced performance | |
|
|
| Medium | 25.9M | ~400ms | 50.2 | Good accuracy | |
|
|
| Large | 43.7M | ~800ms | 52.9 | High accuracy | |
|
|
| XLarge | 68.2M | ~1600ms | 53.9 | Best accuracy | |
|
|
|
|
|
### π― Detectable Objects (COCO Dataset) |
|
|
|
|
|
YOLOv8 can detect 80 different object classes including: |
|
|
- **People**: person |
|
|
- **Vehicles**: bicycle, car, motorcycle, airplane, bus, train, truck, boat |
|
|
- **Animals**: bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe |
|
|
- **Sports**: frisbee, skis, snowboard, sports ball, kite, baseball bat, skateboard, surfboard, tennis racket |
|
|
- **Food**: banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake |
|
|
- **Household**: chair, couch, bed, dining table, toilet, TV, laptop, mouse, keyboard, cell phone, book, clock |
|
|
- And many more! |
|
|
|
|
|
### π Resources |
|
|
|
|
|
- [Ultralytics YOLOv8 Documentation](https://docs.ultralytics.com/) |
|
|
- [Original YOLO Paper](https://arxiv.org/abs/1506.02640) |
|
|
- [GitHub Repository](https://github.com/ultralytics/ultralytics) |
|
|
|
|
|
### π€ Credits |
|
|
|
|
|
- Original YOLO by Joseph Redmon |
|
|
- YOLOv8 by Ultralytics |
|
|
- Gradio by Hugging Face |
|
|
- Deployed on Hugging Face Spaces |
|
|
|
|
|
--- |
|
|
|
|
|
Made with β€οΈ using Gradio and Ultralytics |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
image_button.click( |
|
|
fn=detect_image, |
|
|
inputs=[image_input, image_model_size, image_conf, image_iou], |
|
|
outputs=[image_output, image_text_output] |
|
|
) |
|
|
|
|
|
video_button.click( |
|
|
fn=detect_video, |
|
|
inputs=[video_input, video_model_size, video_conf, video_iou, max_frames], |
|
|
outputs=[video_output, video_text_output] |
|
|
) |
|
|
|
|
|
|
|
|
demo.load( |
|
|
fn=lambda: load_model('nano'), |
|
|
inputs=None, |
|
|
outputs=None |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo = create_interface() |
|
|
demo.queue() |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
show_error=True |
|
|
) |