File size: 3,711 Bytes
b4a375f
 
 
 
459da81
b4a375f
 
 
 
9f44384
b4a375f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3b9d4f
b4a375f
459da81
 
 
b4a375f
459da81
 
 
 
 
 
 
 
 
b4a375f
459da81
 
 
 
b4a375f
459da81
b4a375f
 
 
 
 
 
 
 
 
 
 
 
 
 
c3b9d4f
b4a375f
 
 
 
 
 
c3b9d4f
b4a375f
459da81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43e0c53
9f44384
b4a375f
 
9f44384
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
import cv2
import numpy as np
import os
from PIL import Image

# Load YOLO model
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

# Set backend (CPU)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# Load class names
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Get YOLO output layer names
output_layers_names = net.getUnconnectedOutLayersNames()

def count_people_in_frame(frame):
    """
    Detects people in a given frame (image) and returns count.
    """
    height, width, _ = frame.shape

    # Convert frame to YOLO format
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)

    # Forward pass
    layer_outputs = net.forward(output_layers_names)

    # Process detections
    boxes, confidences = [], []
    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if classes[class_id] == 'person' and confidence > 0.5:
                center_x, center_y = int(detection[0] * width), int(detection[1] * height)
                w, h = int(detection[2] * width), int(detection[3] * height)
                x, y = int(center_x - w / 2), int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))

    # Apply Non-Maximum Suppression (NMS)
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) if boxes else []

    return len(indexes)

def analyze_image(image):
    """
    Processes an image and detects people.
    """
    if isinstance(image, np.ndarray):  
        image_cv = image  # Already a NumPy array
    else:  
        image_cv = np.array(image)  # Convert PIL image to NumPy array

    people_count = count_people_in_frame(image_cv)
    return image, f"People in Image: {people_count}"

def analyze_video(video_file):
    """
    Processes a video and detects people in each frame.
    """
    video_path = video_file.name  

    if not os.path.exists(video_path):
        return "Error: Video file could not be loaded."

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Error: Unable to open video file."

    frame_count = 0
    people_per_frame = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Count people in the frame
        people_count = count_people_in_frame(frame)
        people_per_frame.append(people_count)

        frame_count += 1

    cap.release()

    return f"Max People Detected in Video: {max(people_per_frame) if people_per_frame else 0}"

def process_input(input_file):
    """
    Determines if the input is an image or a video and calls the appropriate function.
    """
    file_path = input_file.name
    file_extension = os.path.splitext(file_path)[1].lower()

    if file_extension in [".jpg", ".jpeg", ".png", ".bmp"]:
        image = Image.open(file_path)
        return analyze_image(image)
    elif file_extension in [".mp4", ".avi", ".mov", ".mkv"]:
        return analyze_video(input_file)
    else:
        return "Error: Unsupported file format."

# Gradio Interface for Image and Video Processing
app = gr.Interface(
    fn=process_input,
    inputs=gr.File(label="Upload Image or Video"),  # Use File to handle both types
    outputs=[gr.Textbox(label="People Counting Results")],
    title="YOLO People Counter (Image & Video)",
    description="Upload an image or video to detect and count people using YOLOv3."
)

# Launch app
if __name__ == "__main__":
    app.launch()