File size: 5,014 Bytes
c814e57
 
 
 
 
a30dae1
c814e57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb5912f
 
 
 
 
 
c814e57
 
2f10eff
 
 
 
 
 
 
 
 
c814e57
 
 
 
 
e169367
c814e57
 
e169367
 
 
 
 
 
 
c814e57
 
 
 
 
 
 
a30dae1
 
 
2f10eff
c814e57
2f10eff
 
c814e57
2f10eff
c814e57
 
a30dae1
c814e57
a30dae1
c814e57
 
e169367
 
 
2f10eff
 
 
a30dae1
c814e57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f10eff
 
 
 
 
 
 
 
a30dae1
2f10eff
 
 
 
 
 
a30dae1
2f10eff
c814e57
2f10eff
 
 
a30dae1
 
2f10eff
 
a30dae1
2f10eff
 
 
c814e57
2f10eff
a30dae1
2f10eff
 
 
a30dae1
 
2f10eff
c814e57
 
a30dae1
e169367
2f10eff
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import streamlit as st
import cv2
import tempfile
from ultralytics import YOLO
import numpy as np
import time

alerting_classes = {
    0: 'People',
    2: 'Car',
    7: 'Truck',
    24: 'Backpack',
    65: 'Suspicious handheld device',
    26: 'Handbag',
    28: 'Suitcase',
}

red_tint = np.array([[[0, 0, 255]]], dtype=np.uint8)

model1 = YOLO('yolov8n.pt')

st.title("Object Detection and Recognition")
st.write("""
This web app performs object detection and recognition on a video using YOLOv8. 
It detects various objects, such as people, cars, trucks, backpacks, suspicious handheld devices, handbags, and suitcases. 
The processed video is displayed with alerts highlighted, and you can stop the inference at any time.
""")

video_file = st.file_uploader("Choose a video file", type=["mp4"])

video_placeholder = st.image([])
results = None

centered_text = """
        <div style="text-align: center;">
            Built with ❤️ by Unnati
        </div>
        """

if video_file is not None:
    # Create temporary file for uploaded video
    tfile = tempfile.NamedTemporaryFile(delete=False)
    tfile.write(video_file.read())

    # Open video capture using temporary file path
    # Open video capture using temporary file path
    cap = cv2.VideoCapture(tfile.name)
    original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Set the target width and height based on the conditions
    target_width = int(original_width * 0.65) if original_width <= 1920 else int(original_width * 0.5)
    target_height = int(original_height * 0.65) if original_width <= 1920 else int(original_height * 0.5)

    alert_set = set(alerting_classes.keys())
    alert_set.remove(0)

    # Create red-tinted overlay
    red_tinted_overlay = np.tile(red_tint, (1, 1, 1))

    stop_button = st.button("Stop Inference")

    # Collect frames in a list
    frames = []
    frame_counter = 0  # Counter to track frame number

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    progress_bar_processing = st.progress(0)

    while cap.isOpened() and not stop_button:
        success, frame = cap.read()

        # if the frame is read correctly ret is True
        if not success:
            # st.warning("Can't receive frame (stream end?). Exiting ...")
            break

        # Resize the frame
        resized_frame = cv2.resize(frame, (target_width, target_height))

        if frame_counter % 4 == 0:  # Perform inference on every 4th frame
            alert_flag = False
            alert_reason = []

            # Perform YOLO object detection
            results = model1(frame, conf=0.35, verbose=False, classes=list(alerting_classes.keys()))

            class_ids = results[0].boxes.cls.tolist()
            class_counts = {cls: class_ids.count(cls) for cls in set(class_ids)}

            for cls in alert_set:
                if cls in class_counts and class_counts[cls] > 0:
                    alert_flag = True
                    alert_reason.append((cls, class_counts[cls]))

            if class_counts.get(0, 0) > 5:
                alert_flag = True
                alert_reason.append((0, class_counts[0]))

        text = 'ALERT!'
        font = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.75
        thickness = 2

        size = cv2.getTextSize(text, font, font_scale, thickness)
        x = 0
        y = int((2 + size[0][1]))

        img = results[0].plot()
        if alert_flag:
            # Resize the red-tinted overlay to match the image size
            red_tinted_overlay = cv2.resize(red_tinted_overlay, (img.shape[1], img.shape[0]))
            img = cv2.addWeighted(img, 0.7, red_tinted_overlay, 0.3, 0)
            cv2.putText(img, text, (x, y), font, font_scale, (0, 0, 0), thickness)

            y += int(size[0][1]) + 10  # Move to the next line

            for cls, count in alert_reason:
                alert_text = f'{count} {alerting_classes[cls]}'
                cv2.putText(img, alert_text, (x, y), font, font_scale, (0, 0, 0), thickness)
                y += int(size[0][1]) + 10  # Move to the next line

        # Append the frame to the list
        frames.append(img)

        # Update processing progress bar
        current_frame_processing = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        progress_bar_processing.progress(current_frame_processing / total_frames)

        frame_counter += 1  # Increment frame counter

    # Get the fps from the video capture object
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_delay = 1 / fps if fps > 0 else 1 / 24  # Use 24 fps as a fallback if fps is not available

    # Release resources
    del results
    cap.release()
    tfile.close()

    # Display frames one by one as a video
    for i, frame in enumerate(frames):
        video_placeholder.image(frame, channels="BGR", caption="YOLOv8 Inference")
        time.sleep(frame_delay)

st.markdown("<hr>", unsafe_allow_html=True)
st.markdown(centered_text, unsafe_allow_html=True)