Spaces:

RakanAlsheraiwi
/

ObjectDetection

Runtime error

App Files Files Community

RakanAlsheraiwi commited on Sep 30, 2024

Commit

a0b4f1b

verified ·

1 Parent(s): 9b1ee29

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -39

app.py CHANGED Viewed

@@ -4,45 +4,37 @@ from PIL import Image, ImageDraw
 import gradio as gr
 import numpy as np
 import pandas as pd
-from transformers import pipeline
 # Load the YOLOv5 model
-# Use a local clone of YOLOv5
-yolo_repo = 'ultralytics/yolov5'
-model = torch.hub.load(yolo_repo, 'yolov5s', source='github')
-# Load the translation model
-translator = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar")
 # Define a function to detect objects and draw bounding boxes for images
 def detect_and_draw_image(input_image):
     results = model(input_image)
-    detections = results.xyxy[0].numpy()
     draw = ImageDraw.Draw(input_image)
     counts = {}
     for detection in detections:
         xmin, ymin, xmax, ymax, conf, class_id = detection
         # Update counts for each label
         label = model.names[int(class_id)]
         counts[label] = counts.get(label, 0) + 1
         # Draw the bounding box
         draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
         draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")
-    # Translate counts to Arabic
-    translated_counts = translator(list(counts.keys()))
     df = pd.DataFrame({
-        'label (English)': list(counts.keys()),
-        'label (Arabic)': [t['translation_text'] for t in translated_counts],
         'counts': list(counts.values())
     })
-    return input_image, df
 # Define a function to detect objects and draw bounding boxes for videos
 def detect_and_draw_video(video_path):
@@ -50,57 +42,57 @@ def detect_and_draw_video(video_path):
     frames = []
     frame_shape = None
     overall_counts = {}
-    detected_objects = set()  # Set to keep track of unique detections
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
-        frame = cv2.resize(frame, (640, 480))
         results = model(frame)
-        detections = results.xyxy[0].numpy()
         for detection in detections:
             xmin, ymin, xmax, ymax, conf, class_id = detection
-            # Create a unique identifier for the object based on its bounding box
-            identifier = (model.names[int(class_id)], int((xmin + xmax) / 2), int((ymin + ymax) / 2))
-            # Count the object only if it hasn't been detected before
-            if identifier not in detected_objects:
-                detected_objects.add(identifier)
-                label = model.names[int(class_id)]
-                overall_counts[label] = overall_counts.get(label, 0) + 1
             cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
-            cv2.putText(frame, f"{model.names[int(class_id)]}: {conf:.2f}", (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)
         frames.append(frame)
-    cap.release()
-    if frame_shape is None:
-        return None, None
     output_path = 'output.mp4'
-    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))
     for frame in frames:
         out.write(frame)
     out.release()
-    # Translate counts to Arabic
-    translated_counts = translator(list(overall_counts.keys()))
     df = pd.DataFrame({
-        'label (English)': list(overall_counts.keys()),
-        'label (Arabic)': [t['translation_text'] for t in translated_counts],
         'counts': list(overall_counts.values())
     })
-    return output_path, df
 # Create separate interfaces for images and videos
 image_interface = gr.Interface(

 import gradio as gr
 import numpy as np
 import pandas as pd
 # Load the YOLOv5 model
+model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Load the small YOLOv5 model
 # Define a function to detect objects and draw bounding boxes for images
 def detect_and_draw_image(input_image):
     results = model(input_image)
+    detections = results.xyxy[0].numpy()  # Get detections
     draw = ImageDraw.Draw(input_image)
     counts = {}
     for detection in detections:
         xmin, ymin, xmax, ymax, conf, class_id = detection
         # Update counts for each label
         label = model.names[int(class_id)]
         counts[label] = counts.get(label, 0) + 1
         # Draw the bounding box
         draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
+        # Draw the label and score
         draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")
+    # Create DataFrame
     df = pd.DataFrame({
+        'label': list(counts.keys()),
         'counts': list(counts.values())
     })
+    return input_image, df  # Return modified image and DataFrame
 # Define a function to detect objects and draw bounding boxes for videos
 def detect_and_draw_video(video_path):
     frames = []
     frame_shape = None
     overall_counts = {}
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
+        # Resize frame for faster processing
+        frame = cv2.resize(frame, (640, 480))  # Resize to 640x480
+        # Perform detection
         results = model(frame)
+        detections = results.xyxy[0].numpy()  # Get detections
         for detection in detections:
             xmin, ymin, xmax, ymax, conf, class_id = detection
+            # Update counts for each label
+            label = model.names[int(class_id)]
+            overall_counts[label] = overall_counts.get(label, 0) + 1
+            # Draw the bounding box
             cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
+            # Draw the label and score
+            cv2.putText(frame, f"{label}: {conf:.2f}", (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
         frames.append(frame)
+        # Store the shape of the first valid frame
+        if frame_shape is None:
+            frame_shape = frame.shape[1], frame.shape[0]
+    cap.release()
+    if frame_shape is None:  # Check if any frames were processed
+        return None, None  # Handle no frames case gracefully
+    # Create a temporary output video file
     output_path = 'output.mp4'
+    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, frame_shape)
     for frame in frames:
         out.write(frame)
     out.release()
+    # Create DataFrame for video results
     df = pd.DataFrame({
+        'label': list(overall_counts.keys()),
         'counts': list(overall_counts.values())
     })
+    return output_path, df  # Return path to the output video and DataFrame
 # Create separate interfaces for images and videos
 image_interface = gr.Interface(