File size: 4,776 Bytes
f2dfae0
5a169e9
f2dfae0
 
 
5a169e9
f2dfae0
 
5a169e9
f2dfae0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import cv2
import gradio as gr
import numpy as np
import os
import datetime

# Load YOLO model
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')

# Set classes
classes = []
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Function to detect objects in a video frame
def detect_birds(video_file):
    cap = cv2.VideoCapture(video_file)
    frame_count = 0
    output_frames = []

    # Variables for object count and duration
    object_counts = {class_name: 0 for class_name in classes}
    object_durations = {class_name: datetime.timedelta() for class_name in classes}
    last_frame_time = None

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame is None:
            continue

        height, width, _ = frame.shape

        # Create a blob from the frame and pass it through the network
        blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        layer_names = net.getLayerNames()
        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
        detections = net.forward(output_layers)

        # Process detections
        boxes = []
        confidences = []
        class_ids = []
        for detection in detections:
            for detection_result in detection:
                scores = detection_result[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if confidence > 0.5:
                    center_x = int(detection_result[0] * width)
                    center_y = int(detection_result[1] * height)
                    w = int(detection_result[2] * width)
                    h = int(detection_result[3] * height)

                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # Apply non-maxima suppression to eliminate redundant overlapping boxes
        indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

        # Draw bounding boxes and labels
        if len(indices) > 0:
            for i in indices.flatten():
                x, y, w, h = boxes[i]
                label = classes[class_ids[i]]
                confidence = confidences[i]

                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                            (0, 255, 0), 2)

                # Update object count and duration
                object_counts[label] += 1
                if last_frame_time is not None:
                    duration = datetime.datetime.now() - last_frame_time
                    object_durations[label] += duration
                last_frame_time = datetime.datetime.now()

        # Save the frame with bounding boxes as an image
        output_frame_path = f'output_frames/frame_{frame_count:04d}.jpg'
        cv2.imwrite(output_frame_path, frame)
        output_frames.append(output_frame_path)

        frame_count += 1

    cap.release()

    # Combine the output frames into a video file
    output_video_path = 'output.mp4'
    if frame_count > 0:
        frame = cv2.imread(output_frames[0])
        if frame is not None:
            height, width, _ = frame.shape

            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            writer = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))

            for frame_path in output_frames:
                frame = cv2.imread(frame_path)
                if frame is not None:
                    writer.write(frame)

            writer.release()
        else:
            output_video_path = None
    else:
        output_video_path = None

    cv2.destroyAllWindows()

    # Remove the output frames directory
    for frame_path in output_frames:
        os.remove(frame_path)

    # Format object count and duration as text
    count_text = '\n'.join([f'{label}: {count}' for label, count in object_counts.items() if count > 0])
    duration_text = '\n'.join([f'{label}: {str(duration).split(".")[0]}' for label, duration in object_durations.items() if duration.total_seconds() > 0])

    return output_video_path, count_text, duration_text

# Create a Gradio interface
inputs = gr.inputs.Video(label='Input Video')
outputs = [
    gr.outputs.Video(label='Output Video'),
    gr.outputs.Textbox(label='Object Count', type='text'),
    gr.outputs.Textbox(label='Duration', type='text')
]

gr.Interface(fn=detect_birds, inputs=inputs, outputs=outputs, capture_session=True, share=True).launch()