RakanAlsheraiwi commited on
Commit
a0b4f1b
·
verified ·
1 Parent(s): 9b1ee29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -39
app.py CHANGED
@@ -4,45 +4,37 @@ from PIL import Image, ImageDraw
4
  import gradio as gr
5
  import numpy as np
6
  import pandas as pd
7
- from transformers import pipeline
8
 
9
  # Load the YOLOv5 model
10
- # Use a local clone of YOLOv5
11
- yolo_repo = 'ultralytics/yolov5'
12
- model = torch.hub.load(yolo_repo, 'yolov5s', source='github')
13
-
14
- # Load the translation model
15
- translator = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar")
16
 
17
  # Define a function to detect objects and draw bounding boxes for images
18
  def detect_and_draw_image(input_image):
19
  results = model(input_image)
20
- detections = results.xyxy[0].numpy()
21
 
22
  draw = ImageDraw.Draw(input_image)
23
-
24
  counts = {}
25
  for detection in detections:
26
  xmin, ymin, xmax, ymax, conf, class_id = detection
27
-
28
  # Update counts for each label
29
  label = model.names[int(class_id)]
30
  counts[label] = counts.get(label, 0) + 1
31
 
32
  # Draw the bounding box
33
  draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
 
34
  draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")
35
 
36
- # Translate counts to Arabic
37
- translated_counts = translator(list(counts.keys()))
38
-
39
  df = pd.DataFrame({
40
- 'label (English)': list(counts.keys()),
41
- 'label (Arabic)': [t['translation_text'] for t in translated_counts],
42
  'counts': list(counts.values())
43
  })
44
 
45
- return input_image, df
46
 
47
  # Define a function to detect objects and draw bounding boxes for videos
48
  def detect_and_draw_video(video_path):
@@ -50,57 +42,57 @@ def detect_and_draw_video(video_path):
50
  frames = []
51
  frame_shape = None
52
  overall_counts = {}
53
- detected_objects = set() # Set to keep track of unique detections
54
 
55
  while cap.isOpened():
56
  ret, frame = cap.read()
57
  if not ret:
58
  break
59
 
60
- frame = cv2.resize(frame, (640, 480))
 
61
 
 
62
  results = model(frame)
63
- detections = results.xyxy[0].numpy()
64
 
65
  for detection in detections:
66
  xmin, ymin, xmax, ymax, conf, class_id = detection
 
 
 
 
67
 
68
- # Create a unique identifier for the object based on its bounding box
69
- identifier = (model.names[int(class_id)], int((xmin + xmax) / 2), int((ymin + ymax) / 2))
70
-
71
- # Count the object only if it hasn't been detected before
72
- if identifier not in detected_objects:
73
- detected_objects.add(identifier)
74
- label = model.names[int(class_id)]
75
- overall_counts[label] = overall_counts.get(label, 0) + 1
76
-
77
  cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
78
- cv2.putText(frame, f"{model.names[int(class_id)]}: {conf:.2f}", (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)
 
79
 
80
  frames.append(frame)
81
 
82
- cap.release()
 
 
83
 
84
- if frame_shape is None:
85
- return None, None
 
 
86
 
 
87
  output_path = 'output.mp4'
88
- out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))
89
 
90
  for frame in frames:
91
  out.write(frame)
92
  out.release()
93
 
94
- # Translate counts to Arabic
95
- translated_counts = translator(list(overall_counts.keys()))
96
-
97
  df = pd.DataFrame({
98
- 'label (English)': list(overall_counts.keys()),
99
- 'label (Arabic)': [t['translation_text'] for t in translated_counts],
100
  'counts': list(overall_counts.values())
101
  })
102
 
103
- return output_path, df
104
 
105
  # Create separate interfaces for images and videos
106
  image_interface = gr.Interface(
 
4
  import gradio as gr
5
  import numpy as np
6
  import pandas as pd
 
7
 
8
  # Load the YOLOv5 model
9
+ model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # Load the small YOLOv5 model
 
 
 
 
 
10
 
11
  # Define a function to detect objects and draw bounding boxes for images
12
  def detect_and_draw_image(input_image):
13
  results = model(input_image)
14
+ detections = results.xyxy[0].numpy() # Get detections
15
 
16
  draw = ImageDraw.Draw(input_image)
17
+
18
  counts = {}
19
  for detection in detections:
20
  xmin, ymin, xmax, ymax, conf, class_id = detection
21
+
22
  # Update counts for each label
23
  label = model.names[int(class_id)]
24
  counts[label] = counts.get(label, 0) + 1
25
 
26
  # Draw the bounding box
27
  draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
28
+ # Draw the label and score
29
  draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")
30
 
31
+ # Create DataFrame
 
 
32
  df = pd.DataFrame({
33
+ 'label': list(counts.keys()),
 
34
  'counts': list(counts.values())
35
  })
36
 
37
+ return input_image, df # Return modified image and DataFrame
38
 
39
  # Define a function to detect objects and draw bounding boxes for videos
40
  def detect_and_draw_video(video_path):
 
42
  frames = []
43
  frame_shape = None
44
  overall_counts = {}
 
45
 
46
  while cap.isOpened():
47
  ret, frame = cap.read()
48
  if not ret:
49
  break
50
 
51
+ # Resize frame for faster processing
52
+ frame = cv2.resize(frame, (640, 480)) # Resize to 640x480
53
 
54
+ # Perform detection
55
  results = model(frame)
56
+ detections = results.xyxy[0].numpy() # Get detections
57
 
58
  for detection in detections:
59
  xmin, ymin, xmax, ymax, conf, class_id = detection
60
+
61
+ # Update counts for each label
62
+ label = model.names[int(class_id)]
63
+ overall_counts[label] = overall_counts.get(label, 0) + 1
64
 
65
+ # Draw the bounding box
 
 
 
 
 
 
 
 
66
  cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
67
+ # Draw the label and score
68
+ cv2.putText(frame, f"{label}: {conf:.2f}", (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
69
 
70
  frames.append(frame)
71
 
72
+ # Store the shape of the first valid frame
73
+ if frame_shape is None:
74
+ frame_shape = frame.shape[1], frame.shape[0]
75
 
76
+ cap.release()
77
+
78
+ if frame_shape is None: # Check if any frames were processed
79
+ return None, None # Handle no frames case gracefully
80
 
81
+ # Create a temporary output video file
82
  output_path = 'output.mp4'
83
+ out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, frame_shape)
84
 
85
  for frame in frames:
86
  out.write(frame)
87
  out.release()
88
 
89
+ # Create DataFrame for video results
 
 
90
  df = pd.DataFrame({
91
+ 'label': list(overall_counts.keys()),
 
92
  'counts': list(overall_counts.values())
93
  })
94
 
95
+ return output_path, df # Return path to the output video and DataFrame
96
 
97
  # Create separate interfaces for images and videos
98
  image_interface = gr.Interface(