Spaces:

hb-setosys
/

Setosys_Objects_in_Video_Model

Sleeping

File size: 2,827 Bytes

import gradio as gr
import cv2
import numpy as np
import os

# Load YOLO model
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

# Enable GPU (if available)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

# Load class names
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Get YOLO output layer names
output_layers_names = net.getUnconnectedOutLayersNames()

def count_people(video_path):
    if not os.path.exists(video_path):
        return "Error: Video file not found."

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Error: Unable to open video file."

    frame_count = 0
    people_per_frame = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        height, width, _ = frame.shape

        # Convert frame to YOLO format
        blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)

        # Forward pass
        layer_outputs = net.forward(output_layers_names)

        # Process detections
        boxes, confidences = [], []
        for output in layer_outputs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if classes[class_id] == 'person' and confidence > 0.5:
                    center_x, center_y = int(detection[0] * width), int(detection[1] * height)
                    w, h = int(detection[2] * width), int(detection[3] * height)
                    x, y = int(center_x - w / 2), int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))

        # Apply Non-Maximum Suppression (NMS)
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) if boxes else []

        # Count people in this frame
        people_per_frame.append(len(indexes))

        frame_count += 1

    cap.release()

    # Generate analytics
    return {
        "Total Frames Processed": frame_count,
        "Max People in a Single Frame": int(np.max(people_per_frame)) if people_per_frame else 0,
        "Avg People Per Frame": round(np.mean(people_per_frame), 2) if people_per_frame else 0
    }

# Gradio UI function
def analyze_video(video_file):
    result = count_people(video_file)
    return "\n".join([f"{key}: {value}" for key, value in result.items()])

# Gradio Interface
interface = gr.Interface(
    fn=analyze_video,
    inputs=gr.Video(label="Upload Video"),
    outputs=gr.Textbox(label="People Counting Results"),
    title="YOLO-based People Counter",
    description="Upload a video to detect and count people using YOLOv3."
)

# Launch app
interface.launch()