Spaces:

z-uo
/

yolo_volley_detection_video

Build error

File size: 3,100 Bytes

import gradio as gr
import numpy as np
import cv2

# model load
cfg = r'volleyball_test.cfg'
weights = r'volleyball_final.weights'
net = cv2.dnn.readNetFromDarknet(cfg, weights)

# classes
classes = []
with open("classes.names", 'r') as f:
    classes = f.read().splitlines()


def predict_img(img_bgr):
    # img_bgr = inp.astype('uint8')[...,::-1]
    img = cv2.resize(img_bgr, (700, 700))
    height, width, channels = img.shape

    # Convert image into blob and load it on model
    blob = cv2.dnn.blobFromImage(
        img, 1/255, (height, width), (0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)

    # Getting all the three detection layers of yolo
    output_layers_names = net.getUnconnectedOutLayersNames()
    # print(output_layers_names)
    layersOutputs = net.forward(output_layers_names)
    # print(layersOutputs)

    # Finding the y-vector and minimum no.of bounding box
    confthreshold = 0.5
    boxes = []
    confidences = []
    class_ids = []

    for output in layersOutputs:

        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > confthreshold:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x = int(center_x - w/2)
                y = int(center_y - h/2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Applying Non max Suppression for removing unwanted multiple bounding boxes
    indexes = cv2.dnn.NMSBoxes(
        boxes, confidences, confthreshold, nms_threshold=0.3)

    for i in indexes:
        box = boxes[i]
        x, y, w, h = box
        cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
        conf_value = str(round(confidences[i], 2))
        label = str(classes[class_ids[i]])
        cv2.putText(img, label + " " + conf_value, (x, y-10),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
    # return the images
    # cv2.imwrite("out.jpg", img)

    return img

def predict(inp):
    vidcap = cv2.VideoCapture(inp)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    outcap = cv2.VideoWriter('outpy.mp4',cv2.VideoWriter_fourcc(*'MP4V'), fps, (700, 700))

    success,image = vidcap.read()
    count = 0
    while success:
        img = predict_img(image)
        outcap.write(img)
        success,image = vidcap.read()
        print('Read a new frame: ', success)
        count += 1
    return "./outpy.mp4"

gr.Interface(
    fn=predict,
    inputs=[
        gr.inputs.Video()  # you can have many inputs
    ],
    outputs=[
        gr.inputs.Video()  # you can have many outputs
    ],
    title="Volley classification and detection",
    description="This project use a yolov3 and pretrained model from [this](https://github.com/lalchhabi/Volleyball_Position_Detection_System) project",
    examples=[
        "test.mp4",
    ]
).launch()