Spaces:

jfeldm02
/

YOLOv8

Sleeping

App Files Files Community

jfeldm02 commited on Oct 7

Commit

078038e

verified ·

1 Parent(s): b05daaa

Upload app.py

Browse files

Files changed (1) hide show

app.py +417 -0

app.py ADDED Viewed

	@@ -0,0 +1,417 @@

+#!/usr/bin/env python3
+"""
+YOLO Object Detection with Gradio Interface
+Optimized for Hugging Face Spaces deployment
+"""
+import gradio as gr
+import cv2
+import numpy as np
+from ultralytics import YOLO
+from PIL import Image
+import torch
+import spaces
+import os
+import tempfile
+# Global variable for models
+models = {}
+current_model_size = 'nano'
+def load_model(model_size='nano'):
+    """
+    Load YOLO model based on selected size
+    """
+    global models, current_model_size
+    model_names = {
+        'nano': 'yolov8n.pt',
+        'small': 'yolov8s.pt',
+        'medium': 'yolov8m.pt',
+        'large': 'yolov8l.pt',
+        'xlarge': 'yolov8x.pt'
+    }
+    model_name = model_names.get(model_size, 'yolov8n.pt')
+    # Check if model already loaded
+    if model_size not in models:
+        print(f"Loading {model_name}...")
+        models[model_size] = YOLO(model_name)
+        current_model_size = model_size
+        # Check if CUDA is available
+        if torch.cuda.is_available():
+            return f"✅ Model {model_name} loaded successfully! (GPU enabled)"
+        else:
+            return f"✅ Model {model_name} loaded successfully! (CPU mode)"
+    else:
+        current_model_size = model_size
+        return f"✅ Model {model_name} already loaded!"
+# Use @spaces.GPU decorator for GPU functions on Hugging Face Spaces
+@spaces.GPU(duration=60)
+def detect_image(input_image, model_size, conf_threshold=0.25, iou_threshold=0.45):
+    """
+    Perform object detection on a single image
+    """
+    if model_size not in models:
+        load_model(model_size)
+    model = models[model_size]
+    if input_image is None:
+        return None, "No image provided"
+    # Convert PIL Image to numpy array if necessary
+    if isinstance(input_image, Image.Image):
+        input_image = np.array(input_image)
+    # Run inference
+    results = model(input_image, conf=conf_threshold, iou=iou_threshold)
+    # Get annotated image
+    annotated_image = results[0].plot()
+    # Get detection details
+    detections = []
+    for r in results:
+        if r.boxes is not None:
+            for box in r.boxes:
+                if box.cls is not None:
+                    class_id = int(box.cls)
+                    class_name = model.names[class_id]
+                    confidence = float(box.conf)
+                    bbox = box.xyxy[0].tolist()
+                    detections.append({
+                        'class': class_name,
+                        'confidence': f"{confidence:.2%}",
+                        'bbox': [int(x) for x in bbox]
+                    })
+    # Create detection summary
+    summary = f"Found {len(detections)} object(s)\n\n"
+    if detections:
+        # Count occurrences of each class
+        class_counts = {}
+        for det in detections:
+            class_name = det['class']
+            if class_name not in class_counts:
+                class_counts[class_name] = 0
+            class_counts[class_name] += 1
+        summary += "Summary by class:\n"
+        for class_name, count in class_counts.items():
+            summary += f"  • {class_name}: {count}\n"
+        summary += "\nDetailed detections:\n"
+        for i, det in enumerate(detections, 1):
+            summary += f"{i}. {det['class']} ({det['confidence']})\n"
+    return annotated_image, summary
+@spaces.GPU(duration=120)
+def detect_video(input_video, model_size, conf_threshold=0.25, iou_threshold=0.45, max_frames=300):
+    """
+    Perform object detection on video
+    """
+    if model_size not in models:
+        load_model(model_size)
+    model = models[model_size]
+    if input_video is None:
+        return None, "No video provided"
+    # Open video
+    cap = cv2.VideoCapture(input_video)
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    if fps == 0:
+        fps = 25  # Default fallback FPS
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Limit processing for Spaces
+    if max_frames and total_frames > max_frames:
+        total_frames = max_frames
+    # Create temporary output file
+    with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
+        output_path = tmp_file.name
+    # Setup video writer
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    frame_count = 0
+    detected_objects = set()
+    # Process progress callback
+    def progress_callback(current, total):
+        return (current / total) if total > 0 else 0
+    # Process video
+    progress = gr.Progress()
+    while cap.isOpened() and frame_count < total_frames:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Run detection
+        results = model(frame, conf=conf_threshold, iou=iou_threshold)
+        # Collect detected classes
+        for r in results:
+            if r.boxes is not None:
+                for box in r.boxes:
+                    if box.cls is not None:
+                        class_id = int(box.cls)
+                        detected_objects.add(model.names[class_id])
+        # Get annotated frame
+        annotated_frame = results[0].plot()
+        # Write frame
+        out.write(annotated_frame)
+        frame_count += 1
+        # Update progress
+        if frame_count % 10 == 0:
+            progress(frame_count / total_frames, desc=f"Processing frame {frame_count}/{total_frames}")
+    # Clean up
+    cap.release()
+    out.release()
+    # Create summary
+    summary = f"Processed {frame_count} frames\n"
+    summary += f"Detected objects: {', '.join(sorted(detected_objects))}" if detected_objects else "No objects detected"
+    return output_path, summary
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(
+        title="YOLO Object Detection",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+        }
+        #title {
+            text-align: center;
+            margin-bottom: 1rem;
+        }
+        """
+    ) as demo:
+        gr.Markdown(
+            """
+            <div id="title">
+            # 🎯 YOLO Real-Time Object Detection
+            <p>Powered by <b>Ultralytics YOLOv8</b> - State-of-the-art object detection in your browser!</p>
+            [![Duplicate Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Duplicate%20Space-blue)](https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME?duplicate=true)
+            [![Model](https://img.shields.io/badge/Model-YOLOv8-green)](https://github.com/ultralytics/ultralytics)
+            [![License](https://img.shields.io/badge/License-AGPL--3.0-red)](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)
+            </div>
+            """
+        )
+        # Main tabs
+        with gr.Tabs() as tabs:
+            # Image detection tab
+            with gr.TabItem("📷 Image Detection", id=0):
+                with gr.Row():
+                    with gr.Column():
+                        image_input = gr.Image(
+                            label="Upload Image",
+                            type="numpy",
+                            elem_id="image_input"
+                        )
+                        with gr.Row():
+                            image_model_size = gr.Dropdown(
+                                choices=['nano', 'small', 'medium', 'large', 'xlarge'],
+                                value='nano',
+                                label="Model Size",
+                                info="Larger = more accurate but slower"
+                            )
+                        with gr.Row():
+                            image_conf = gr.Slider(
+                                minimum=0.0,
+                                maximum=1.0,
+                                value=0.25,
+                                step=0.05,
+                                label="Confidence Threshold",
+                                info="Higher = fewer but more confident detections"
+                            )
+                            image_iou = gr.Slider(
+                                minimum=0.0,
+                                maximum=1.0,
+                                value=0.45,
+                                step=0.05,
+                                label="IoU Threshold",
+                                info="Higher = less overlap between boxes"
+                            )
+                        image_button = gr.Button("🔍 Detect Objects", variant="primary", size="lg")
+                    with gr.Column():
+                        image_output = gr.Image(label="Detection Result", elem_id="image_output")
+                        image_text_output = gr.Textbox(
+                            label="Detection Details",
+                            lines=10,
+                            max_lines=20
+                        )
+                # Example images
+                with gr.Row():
+                    gr.Examples(
+                        examples=[
+                            ["https://ultralytics.com/images/bus.jpg"],
+                            ["https://ultralytics.com/images/zidane.jpg"],
+                        ],
+                        inputs=image_input,
+                        label="Try these examples"
+                    )
+            # Video detection tab
+            with gr.TabItem("🎥 Video Detection", id=1):
+                with gr.Row():
+                    with gr.Column():
+                        video_input = gr.Video(
+                            label="Upload Video",
+                            elem_id="video_input"
+                        )
+                        with gr.Row():
+                            video_model_size = gr.Dropdown(
+                                choices=['nano', 'small', 'medium'],
+                                value='nano',
+                                label="Model Size",
+                                info="Nano recommended for videos"
+                            )
+                        with gr.Row():
+                            video_conf = gr.Slider(
+                                minimum=0.0,
+                                maximum=1.0,
+                                value=0.25,
+                                step=0.05,
+                                label="Confidence Threshold"
+                            )
+                            video_iou = gr.Slider(
+                                minimum=0.0,
+                                maximum=1.0,
+                                value=0.45,
+                                step=0.05,
+                                label="IoU Threshold"
+                            )
+                        max_frames = gr.Slider(
+                            minimum=10,
+                            maximum=300,
+                            value=100,
+                            step=10,
+                            label="Max Frames to Process",
+                            info="Limit for Spaces resources"
+                        )
+                        video_button = gr.Button("🎬 Process Video", variant="primary", size="lg")
+                    with gr.Column():
+                        video_output = gr.Video(
+                            label="Processed Video",
+                            elem_id="video_output"
+                        )
+                        video_text_output = gr.Textbox(
+                            label="Processing Summary",
+                            lines=4
+                        )
+            # About tab
+            with gr.TabItem("ℹ️ About", id=2):
+                gr.Markdown(
+                    """
+                    ## About YOLO (You Only Look Once)
+                    YOLO is a state-of-the-art, real-time object detection system. This app uses **YOLOv8** from Ultralytics,
+                    the latest evolution building on Joseph Redmon's original YOLO architecture.
+                    ### 🚀 Model Sizes
+                    | Model | Parameters | Speed (CPU) | mAP | Use Case |
+                    |-------|-----------|-------------|-----|----------|
+                    | Nano | 3.2M | ~100ms | 37.3 | Real-time, edge devices |
+                    | Small | 11.2M | ~200ms | 44.9 | Balanced performance |
+                    | Medium | 25.9M | ~400ms | 50.2 | Good accuracy |
+                    | Large | 43.7M | ~800ms | 52.9 | High accuracy |
+                    | XLarge | 68.2M | ~1600ms | 53.9 | Best accuracy |
+                    ### 🎯 Detectable Objects (COCO Dataset)
+                    YOLOv8 can detect 80 different object classes including:
+                    - **People**: person
+                    - **Vehicles**: bicycle, car, motorcycle, airplane, bus, train, truck, boat
+                    - **Animals**: bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe
+                    - **Sports**: frisbee, skis, snowboard, sports ball, kite, baseball bat, skateboard, surfboard, tennis racket
+                    - **Food**: banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake
+                    - **Household**: chair, couch, bed, dining table, toilet, TV, laptop, mouse, keyboard, cell phone, book, clock
+                    - And many more!
+                    ### 📖 Resources
+                    - [Ultralytics YOLOv8 Documentation](https://docs.ultralytics.com/)
+                    - [Original YOLO Paper](https://arxiv.org/abs/1506.02640)
+                    - [GitHub Repository](https://github.com/ultralytics/ultralytics)
+                    ### 🤝 Credits
+                    - Original YOLO by Joseph Redmon
+                    - YOLOv8 by Ultralytics
+                    - Gradio by Hugging Face
+                    - Deployed on Hugging Face Spaces
+                    ---
+                    Made with ❤️ using Gradio and Ultralytics
+                    """
+                )
+        # Event handlers
+        image_button.click(
+            fn=detect_image,
+            inputs=[image_input, image_model_size, image_conf, image_iou],
+            outputs=[image_output, image_text_output]
+        )
+        video_button.click(
+            fn=detect_video,
+            inputs=[video_input, video_model_size, video_conf, video_iou, max_frames],
+            outputs=[video_output, video_text_output]
+        )
+        # Load initial model on startup
+        demo.load(
+            fn=lambda: load_model('nano'),
+            inputs=None,
+            outputs=None
+        )
+    return demo
+# Main execution
+if __name__ == "__main__":
+    # Create and launch interface
+    demo = create_interface()
+    demo.queue()  # Enable queue for better performance
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )