File size: 4,776 Bytes
f2dfae0 5a169e9 f2dfae0 5a169e9 f2dfae0 5a169e9 f2dfae0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import cv2
import gradio as gr
import numpy as np
import os
import datetime
# Load YOLO model
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
# Set classes
classes = []
with open('coco.names', 'r') as f:
classes = [line.strip() for line in f.readlines()]
# Function to detect objects in a video frame
def detect_birds(video_file):
cap = cv2.VideoCapture(video_file)
frame_count = 0
output_frames = []
# Variables for object count and duration
object_counts = {class_name: 0 for class_name in classes}
object_durations = {class_name: datetime.timedelta() for class_name in classes}
last_frame_time = None
while True:
ret, frame = cap.read()
if not ret:
break
if frame is None:
continue
height, width, _ = frame.shape
# Create a blob from the frame and pass it through the network
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
detections = net.forward(output_layers)
# Process detections
boxes = []
confidences = []
class_ids = []
for detection in detections:
for detection_result in detection:
scores = detection_result[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection_result[0] * width)
center_y = int(detection_result[1] * height)
w = int(detection_result[2] * width)
h = int(detection_result[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Apply non-maxima suppression to eliminate redundant overlapping boxes
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# Draw bounding boxes and labels
if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
label = classes[class_ids[i]]
confidence = confidences[i]
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
(0, 255, 0), 2)
# Update object count and duration
object_counts[label] += 1
if last_frame_time is not None:
duration = datetime.datetime.now() - last_frame_time
object_durations[label] += duration
last_frame_time = datetime.datetime.now()
# Save the frame with bounding boxes as an image
output_frame_path = f'output_frames/frame_{frame_count:04d}.jpg'
cv2.imwrite(output_frame_path, frame)
output_frames.append(output_frame_path)
frame_count += 1
cap.release()
# Combine the output frames into a video file
output_video_path = 'output.mp4'
if frame_count > 0:
frame = cv2.imread(output_frames[0])
if frame is not None:
height, width, _ = frame.shape
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))
for frame_path in output_frames:
frame = cv2.imread(frame_path)
if frame is not None:
writer.write(frame)
writer.release()
else:
output_video_path = None
else:
output_video_path = None
cv2.destroyAllWindows()
# Remove the output frames directory
for frame_path in output_frames:
os.remove(frame_path)
# Format object count and duration as text
count_text = '\n'.join([f'{label}: {count}' for label, count in object_counts.items() if count > 0])
duration_text = '\n'.join([f'{label}: {str(duration).split(".")[0]}' for label, duration in object_durations.items() if duration.total_seconds() > 0])
return output_video_path, count_text, duration_text
# Create a Gradio interface
inputs = gr.inputs.Video(label='Input Video')
outputs = [
gr.outputs.Video(label='Output Video'),
gr.outputs.Textbox(label='Object Count', type='text'),
gr.outputs.Textbox(label='Duration', type='text')
]
gr.Interface(fn=detect_birds, inputs=inputs, outputs=outputs, capture_session=True, share=True).launch()
|