jfeldm02 commited on
Commit
078038e
Β·
verified Β·
1 Parent(s): b05daaa

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +417 -0
app.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ YOLO Object Detection with Gradio Interface
4
+ Optimized for Hugging Face Spaces deployment
5
+ """
6
+
7
+ import gradio as gr
8
+ import cv2
9
+ import numpy as np
10
+ from ultralytics import YOLO
11
+ from PIL import Image
12
+ import torch
13
+ import spaces
14
+ import os
15
+ import tempfile
16
+
17
+ # Global variable for models
18
+ models = {}
19
+ current_model_size = 'nano'
20
+
21
+ def load_model(model_size='nano'):
22
+ """
23
+ Load YOLO model based on selected size
24
+ """
25
+ global models, current_model_size
26
+
27
+ model_names = {
28
+ 'nano': 'yolov8n.pt',
29
+ 'small': 'yolov8s.pt',
30
+ 'medium': 'yolov8m.pt',
31
+ 'large': 'yolov8l.pt',
32
+ 'xlarge': 'yolov8x.pt'
33
+ }
34
+
35
+ model_name = model_names.get(model_size, 'yolov8n.pt')
36
+
37
+ # Check if model already loaded
38
+ if model_size not in models:
39
+ print(f"Loading {model_name}...")
40
+ models[model_size] = YOLO(model_name)
41
+ current_model_size = model_size
42
+
43
+ # Check if CUDA is available
44
+ if torch.cuda.is_available():
45
+ return f"βœ… Model {model_name} loaded successfully! (GPU enabled)"
46
+ else:
47
+ return f"βœ… Model {model_name} loaded successfully! (CPU mode)"
48
+ else:
49
+ current_model_size = model_size
50
+ return f"βœ… Model {model_name} already loaded!"
51
+
52
+ # Use @spaces.GPU decorator for GPU functions on Hugging Face Spaces
53
+ @spaces.GPU(duration=60)
54
+ def detect_image(input_image, model_size, conf_threshold=0.25, iou_threshold=0.45):
55
+ """
56
+ Perform object detection on a single image
57
+ """
58
+ if model_size not in models:
59
+ load_model(model_size)
60
+
61
+ model = models[model_size]
62
+
63
+ if input_image is None:
64
+ return None, "No image provided"
65
+
66
+ # Convert PIL Image to numpy array if necessary
67
+ if isinstance(input_image, Image.Image):
68
+ input_image = np.array(input_image)
69
+
70
+ # Run inference
71
+ results = model(input_image, conf=conf_threshold, iou=iou_threshold)
72
+
73
+ # Get annotated image
74
+ annotated_image = results[0].plot()
75
+
76
+ # Get detection details
77
+ detections = []
78
+ for r in results:
79
+ if r.boxes is not None:
80
+ for box in r.boxes:
81
+ if box.cls is not None:
82
+ class_id = int(box.cls)
83
+ class_name = model.names[class_id]
84
+ confidence = float(box.conf)
85
+ bbox = box.xyxy[0].tolist()
86
+ detections.append({
87
+ 'class': class_name,
88
+ 'confidence': f"{confidence:.2%}",
89
+ 'bbox': [int(x) for x in bbox]
90
+ })
91
+
92
+ # Create detection summary
93
+ summary = f"Found {len(detections)} object(s)\n\n"
94
+ if detections:
95
+ # Count occurrences of each class
96
+ class_counts = {}
97
+ for det in detections:
98
+ class_name = det['class']
99
+ if class_name not in class_counts:
100
+ class_counts[class_name] = 0
101
+ class_counts[class_name] += 1
102
+
103
+ summary += "Summary by class:\n"
104
+ for class_name, count in class_counts.items():
105
+ summary += f" β€’ {class_name}: {count}\n"
106
+
107
+ summary += "\nDetailed detections:\n"
108
+ for i, det in enumerate(detections, 1):
109
+ summary += f"{i}. {det['class']} ({det['confidence']})\n"
110
+
111
+ return annotated_image, summary
112
+
113
+ @spaces.GPU(duration=120)
114
+ def detect_video(input_video, model_size, conf_threshold=0.25, iou_threshold=0.45, max_frames=300):
115
+ """
116
+ Perform object detection on video
117
+ """
118
+ if model_size not in models:
119
+ load_model(model_size)
120
+
121
+ model = models[model_size]
122
+
123
+ if input_video is None:
124
+ return None, "No video provided"
125
+
126
+ # Open video
127
+ cap = cv2.VideoCapture(input_video)
128
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
129
+ if fps == 0:
130
+ fps = 25 # Default fallback FPS
131
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
132
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
133
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
134
+
135
+ # Limit processing for Spaces
136
+ if max_frames and total_frames > max_frames:
137
+ total_frames = max_frames
138
+
139
+ # Create temporary output file
140
+ with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
141
+ output_path = tmp_file.name
142
+
143
+ # Setup video writer
144
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
145
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
146
+
147
+ frame_count = 0
148
+ detected_objects = set()
149
+
150
+ # Process progress callback
151
+ def progress_callback(current, total):
152
+ return (current / total) if total > 0 else 0
153
+
154
+ # Process video
155
+ progress = gr.Progress()
156
+ while cap.isOpened() and frame_count < total_frames:
157
+ ret, frame = cap.read()
158
+ if not ret:
159
+ break
160
+
161
+ # Run detection
162
+ results = model(frame, conf=conf_threshold, iou=iou_threshold)
163
+
164
+ # Collect detected classes
165
+ for r in results:
166
+ if r.boxes is not None:
167
+ for box in r.boxes:
168
+ if box.cls is not None:
169
+ class_id = int(box.cls)
170
+ detected_objects.add(model.names[class_id])
171
+
172
+ # Get annotated frame
173
+ annotated_frame = results[0].plot()
174
+
175
+ # Write frame
176
+ out.write(annotated_frame)
177
+ frame_count += 1
178
+
179
+ # Update progress
180
+ if frame_count % 10 == 0:
181
+ progress(frame_count / total_frames, desc=f"Processing frame {frame_count}/{total_frames}")
182
+
183
+ # Clean up
184
+ cap.release()
185
+ out.release()
186
+
187
+ # Create summary
188
+ summary = f"Processed {frame_count} frames\n"
189
+ summary += f"Detected objects: {', '.join(sorted(detected_objects))}" if detected_objects else "No objects detected"
190
+
191
+ return output_path, summary
192
+
193
+ # Create Gradio interface
194
+ def create_interface():
195
+ with gr.Blocks(
196
+ title="YOLO Object Detection",
197
+ theme=gr.themes.Soft(),
198
+ css="""
199
+ .gradio-container {
200
+ max-width: 1200px !important;
201
+ }
202
+ #title {
203
+ text-align: center;
204
+ margin-bottom: 1rem;
205
+ }
206
+ """
207
+ ) as demo:
208
+ gr.Markdown(
209
+ """
210
+ <div id="title">
211
+
212
+ # 🎯 YOLO Real-Time Object Detection
213
+
214
+ <p>Powered by <b>Ultralytics YOLOv8</b> - State-of-the-art object detection in your browser!</p>
215
+
216
+ [![Duplicate Space](https://img.shields.io/badge/πŸ€—%20Hugging%20Face-Duplicate%20Space-blue)](https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME?duplicate=true)
217
+ [![Model](https://img.shields.io/badge/Model-YOLOv8-green)](https://github.com/ultralytics/ultralytics)
218
+ [![License](https://img.shields.io/badge/License-AGPL--3.0-red)](https://github.com/ultralytics/ultralytics/blob/main/LICENSE)
219
+
220
+ </div>
221
+ """
222
+ )
223
+
224
+ # Main tabs
225
+ with gr.Tabs() as tabs:
226
+ # Image detection tab
227
+ with gr.TabItem("πŸ“· Image Detection", id=0):
228
+ with gr.Row():
229
+ with gr.Column():
230
+ image_input = gr.Image(
231
+ label="Upload Image",
232
+ type="numpy",
233
+ elem_id="image_input"
234
+ )
235
+
236
+ with gr.Row():
237
+ image_model_size = gr.Dropdown(
238
+ choices=['nano', 'small', 'medium', 'large', 'xlarge'],
239
+ value='nano',
240
+ label="Model Size",
241
+ info="Larger = more accurate but slower"
242
+ )
243
+
244
+ with gr.Row():
245
+ image_conf = gr.Slider(
246
+ minimum=0.0,
247
+ maximum=1.0,
248
+ value=0.25,
249
+ step=0.05,
250
+ label="Confidence Threshold",
251
+ info="Higher = fewer but more confident detections"
252
+ )
253
+ image_iou = gr.Slider(
254
+ minimum=0.0,
255
+ maximum=1.0,
256
+ value=0.45,
257
+ step=0.05,
258
+ label="IoU Threshold",
259
+ info="Higher = less overlap between boxes"
260
+ )
261
+
262
+ image_button = gr.Button("πŸ” Detect Objects", variant="primary", size="lg")
263
+
264
+ with gr.Column():
265
+ image_output = gr.Image(label="Detection Result", elem_id="image_output")
266
+ image_text_output = gr.Textbox(
267
+ label="Detection Details",
268
+ lines=10,
269
+ max_lines=20
270
+ )
271
+
272
+ # Example images
273
+ with gr.Row():
274
+ gr.Examples(
275
+ examples=[
276
+ ["https://ultralytics.com/images/bus.jpg"],
277
+ ["https://ultralytics.com/images/zidane.jpg"],
278
+ ],
279
+ inputs=image_input,
280
+ label="Try these examples"
281
+ )
282
+
283
+ # Video detection tab
284
+ with gr.TabItem("πŸŽ₯ Video Detection", id=1):
285
+ with gr.Row():
286
+ with gr.Column():
287
+ video_input = gr.Video(
288
+ label="Upload Video",
289
+ elem_id="video_input"
290
+ )
291
+
292
+ with gr.Row():
293
+ video_model_size = gr.Dropdown(
294
+ choices=['nano', 'small', 'medium'],
295
+ value='nano',
296
+ label="Model Size",
297
+ info="Nano recommended for videos"
298
+ )
299
+
300
+ with gr.Row():
301
+ video_conf = gr.Slider(
302
+ minimum=0.0,
303
+ maximum=1.0,
304
+ value=0.25,
305
+ step=0.05,
306
+ label="Confidence Threshold"
307
+ )
308
+ video_iou = gr.Slider(
309
+ minimum=0.0,
310
+ maximum=1.0,
311
+ value=0.45,
312
+ step=0.05,
313
+ label="IoU Threshold"
314
+ )
315
+
316
+ max_frames = gr.Slider(
317
+ minimum=10,
318
+ maximum=300,
319
+ value=100,
320
+ step=10,
321
+ label="Max Frames to Process",
322
+ info="Limit for Spaces resources"
323
+ )
324
+
325
+ video_button = gr.Button("🎬 Process Video", variant="primary", size="lg")
326
+
327
+ with gr.Column():
328
+ video_output = gr.Video(
329
+ label="Processed Video",
330
+ elem_id="video_output"
331
+ )
332
+ video_text_output = gr.Textbox(
333
+ label="Processing Summary",
334
+ lines=4
335
+ )
336
+
337
+ # About tab
338
+ with gr.TabItem("ℹ️ About", id=2):
339
+ gr.Markdown(
340
+ """
341
+ ## About YOLO (You Only Look Once)
342
+
343
+ YOLO is a state-of-the-art, real-time object detection system. This app uses **YOLOv8** from Ultralytics,
344
+ the latest evolution building on Joseph Redmon's original YOLO architecture.
345
+
346
+ ### πŸš€ Model Sizes
347
+
348
+ | Model | Parameters | Speed (CPU) | mAP | Use Case |
349
+ |-------|-----------|-------------|-----|----------|
350
+ | Nano | 3.2M | ~100ms | 37.3 | Real-time, edge devices |
351
+ | Small | 11.2M | ~200ms | 44.9 | Balanced performance |
352
+ | Medium | 25.9M | ~400ms | 50.2 | Good accuracy |
353
+ | Large | 43.7M | ~800ms | 52.9 | High accuracy |
354
+ | XLarge | 68.2M | ~1600ms | 53.9 | Best accuracy |
355
+
356
+ ### 🎯 Detectable Objects (COCO Dataset)
357
+
358
+ YOLOv8 can detect 80 different object classes including:
359
+ - **People**: person
360
+ - **Vehicles**: bicycle, car, motorcycle, airplane, bus, train, truck, boat
361
+ - **Animals**: bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe
362
+ - **Sports**: frisbee, skis, snowboard, sports ball, kite, baseball bat, skateboard, surfboard, tennis racket
363
+ - **Food**: banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake
364
+ - **Household**: chair, couch, bed, dining table, toilet, TV, laptop, mouse, keyboard, cell phone, book, clock
365
+ - And many more!
366
+
367
+ ### πŸ“– Resources
368
+
369
+ - [Ultralytics YOLOv8 Documentation](https://docs.ultralytics.com/)
370
+ - [Original YOLO Paper](https://arxiv.org/abs/1506.02640)
371
+ - [GitHub Repository](https://github.com/ultralytics/ultralytics)
372
+
373
+ ### 🀝 Credits
374
+
375
+ - Original YOLO by Joseph Redmon
376
+ - YOLOv8 by Ultralytics
377
+ - Gradio by Hugging Face
378
+ - Deployed on Hugging Face Spaces
379
+
380
+ ---
381
+
382
+ Made with ❀️ using Gradio and Ultralytics
383
+ """
384
+ )
385
+
386
+ # Event handlers
387
+ image_button.click(
388
+ fn=detect_image,
389
+ inputs=[image_input, image_model_size, image_conf, image_iou],
390
+ outputs=[image_output, image_text_output]
391
+ )
392
+
393
+ video_button.click(
394
+ fn=detect_video,
395
+ inputs=[video_input, video_model_size, video_conf, video_iou, max_frames],
396
+ outputs=[video_output, video_text_output]
397
+ )
398
+
399
+ # Load initial model on startup
400
+ demo.load(
401
+ fn=lambda: load_model('nano'),
402
+ inputs=None,
403
+ outputs=None
404
+ )
405
+
406
+ return demo
407
+
408
+ # Main execution
409
+ if __name__ == "__main__":
410
+ # Create and launch interface
411
+ demo = create_interface()
412
+ demo.queue() # Enable queue for better performance
413
+ demo.launch(
414
+ server_name="0.0.0.0",
415
+ server_port=7860,
416
+ show_error=True
417
+ )