|
import streamlit as st |
|
import cv2 |
|
import tempfile |
|
from ultralytics import YOLO |
|
import numpy as np |
|
import time |
|
|
|
alerting_classes = { |
|
0: 'People', |
|
2: 'Car', |
|
7: 'Truck', |
|
24: 'Backpack', |
|
65: 'Suspicious handheld device', |
|
26: 'Handbag', |
|
28: 'Suitcase', |
|
} |
|
|
|
red_tint = np.array([[[0, 0, 255]]], dtype=np.uint8) |
|
|
|
model1 = YOLO('yolov8n.pt') |
|
|
|
st.title("Object Detection and Recognition") |
|
st.write(""" |
|
This web app performs object detection and recognition on a video using YOLOv8. |
|
It detects various objects, such as people, cars, trucks, backpacks, suspicious handheld devices, handbags, and suitcases. |
|
The processed video is displayed with alerts highlighted, and you can stop the inference at any time. |
|
""") |
|
|
|
video_file = st.file_uploader("Choose a video file", type=["mp4"]) |
|
|
|
video_placeholder = st.image([]) |
|
results = None |
|
|
|
centered_text = """ |
|
<div style="text-align: center;"> |
|
Built with ❤️ by Unnati |
|
</div> |
|
""" |
|
|
|
if video_file is not None: |
|
|
|
tfile = tempfile.NamedTemporaryFile(delete=False) |
|
tfile.write(video_file.read()) |
|
|
|
|
|
|
|
cap = cv2.VideoCapture(tfile.name) |
|
original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
|
original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
|
|
|
|
target_width = int(original_width * 0.65) if original_width <= 1920 else int(original_width * 0.5) |
|
target_height = int(original_height * 0.65) if original_width <= 1920 else int(original_height * 0.5) |
|
|
|
alert_set = set(alerting_classes.keys()) |
|
alert_set.remove(0) |
|
|
|
|
|
red_tinted_overlay = np.tile(red_tint, (1, 1, 1)) |
|
|
|
stop_button = st.button("Stop Inference") |
|
|
|
|
|
frames = [] |
|
frame_counter = 0 |
|
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
progress_bar_processing = st.progress(0) |
|
|
|
while cap.isOpened() and not stop_button: |
|
success, frame = cap.read() |
|
|
|
|
|
if not success: |
|
|
|
break |
|
|
|
|
|
resized_frame = cv2.resize(frame, (target_width, target_height)) |
|
|
|
if frame_counter % 4 == 0: |
|
alert_flag = False |
|
alert_reason = [] |
|
|
|
|
|
results = model1(frame, conf=0.35, verbose=False, classes=list(alerting_classes.keys())) |
|
|
|
class_ids = results[0].boxes.cls.tolist() |
|
class_counts = {cls: class_ids.count(cls) for cls in set(class_ids)} |
|
|
|
for cls in alert_set: |
|
if cls in class_counts and class_counts[cls] > 0: |
|
alert_flag = True |
|
alert_reason.append((cls, class_counts[cls])) |
|
|
|
if class_counts.get(0, 0) > 5: |
|
alert_flag = True |
|
alert_reason.append((0, class_counts[0])) |
|
|
|
text = 'ALERT!' |
|
font = cv2.FONT_HERSHEY_DUPLEX |
|
font_scale = 0.75 |
|
thickness = 2 |
|
|
|
size = cv2.getTextSize(text, font, font_scale, thickness) |
|
x = 0 |
|
y = int((2 + size[0][1])) |
|
|
|
img = results[0].plot() |
|
if alert_flag: |
|
|
|
red_tinted_overlay = cv2.resize(red_tinted_overlay, (img.shape[1], img.shape[0])) |
|
img = cv2.addWeighted(img, 0.7, red_tinted_overlay, 0.3, 0) |
|
cv2.putText(img, text, (x, y), font, font_scale, (0, 0, 0), thickness) |
|
|
|
y += int(size[0][1]) + 10 |
|
|
|
for cls, count in alert_reason: |
|
alert_text = f'{count} {alerting_classes[cls]}' |
|
cv2.putText(img, alert_text, (x, y), font, font_scale, (0, 0, 0), thickness) |
|
y += int(size[0][1]) + 10 |
|
|
|
|
|
frames.append(img) |
|
|
|
|
|
current_frame_processing = int(cap.get(cv2.CAP_PROP_POS_FRAMES)) |
|
progress_bar_processing.progress(current_frame_processing / total_frames) |
|
|
|
frame_counter += 1 |
|
|
|
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
frame_delay = 1 / fps if fps > 0 else 1 / 24 |
|
|
|
|
|
del results |
|
cap.release() |
|
tfile.close() |
|
|
|
|
|
for i, frame in enumerate(frames): |
|
video_placeholder.image(frame, channels="BGR", caption="YOLOv8 Inference") |
|
time.sleep(frame_delay) |
|
|
|
st.markdown("<hr>", unsafe_allow_html=True) |
|
st.markdown(centered_text, unsafe_allow_html=True) |
|
|