Spaces:

unnati026
/

Object-Detection-and-Recognition

Running

App Files Files Community

Object-Detection-and-Recognition / app.py

unnati026

Update app.py

e169367 about 1 year ago

raw

history blame

5.01 kB

	import streamlit as st
	import cv2
	import tempfile
	from ultralytics import YOLO
	import numpy as np
	import time

	alerting_classes = {
	0: 'People',
	2: 'Car',
	7: 'Truck',
	24: 'Backpack',
	65: 'Suspicious handheld device',
	26: 'Handbag',
	28: 'Suitcase',
	}

	red_tint = np.array([[[0, 0, 255]]], dtype=np.uint8)

	model1 = YOLO('yolov8n.pt')

	st.title("Object Detection and Recognition")
	st.write("""
	This web app performs object detection and recognition on a video using YOLOv8.
	It detects various objects, such as people, cars, trucks, backpacks, suspicious handheld devices, handbags, and suitcases.
	The processed video is displayed with alerts highlighted, and you can stop the inference at any time.
	""")

	video_file = st.file_uploader("Choose a video file", type=["mp4"])

	video_placeholder = st.image([])
	results = None

	centered_text = """
	<div style="text-align: center;">
	Built with ❤️ by Unnati
	</div>
	"""

	if video_file is not None:
	# Create temporary file for uploaded video
	tfile = tempfile.NamedTemporaryFile(delete=False)
	tfile.write(video_file.read())

	# Open video capture using temporary file path
	# Open video capture using temporary file path
	cap = cv2.VideoCapture(tfile.name)
	original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	# Set the target width and height based on the conditions
	target_width = int(original_width * 0.65) if original_width <= 1920 else int(original_width * 0.5)
	target_height = int(original_height * 0.65) if original_width <= 1920 else int(original_height * 0.5)

	alert_set = set(alerting_classes.keys())
	alert_set.remove(0)

	# Create red-tinted overlay
	red_tinted_overlay = np.tile(red_tint, (1, 1, 1))

	stop_button = st.button("Stop Inference")

	# Collect frames in a list
	frames = []
	frame_counter = 0 # Counter to track frame number

	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	progress_bar_processing = st.progress(0)

	while cap.isOpened() and not stop_button:
	success, frame = cap.read()

	# if the frame is read correctly ret is True
	if not success:
	# st.warning("Can't receive frame (stream end?). Exiting ...")
	break

	# Resize the frame
	resized_frame = cv2.resize(frame, (target_width, target_height))

	if frame_counter % 4 == 0: # Perform inference on every 4th frame
	alert_flag = False
	alert_reason = []

	# Perform YOLO object detection
	results = model1(frame, conf=0.35, verbose=False, classes=list(alerting_classes.keys()))

	class_ids = results[0].boxes.cls.tolist()
	class_counts = {cls: class_ids.count(cls) for cls in set(class_ids)}

	for cls in alert_set:
	if cls in class_counts and class_counts[cls] > 0:
	alert_flag = True
	alert_reason.append((cls, class_counts[cls]))

	if class_counts.get(0, 0) > 5:
	alert_flag = True
	alert_reason.append((0, class_counts[0]))

	text = 'ALERT!'
	font = cv2.FONT_HERSHEY_DUPLEX
	font_scale = 0.75
	thickness = 2

	size = cv2.getTextSize(text, font, font_scale, thickness)
	x = 0
	y = int((2 + size[0][1]))

	img = results[0].plot()
	if alert_flag:
	# Resize the red-tinted overlay to match the image size
	red_tinted_overlay = cv2.resize(red_tinted_overlay, (img.shape[1], img.shape[0]))
	img = cv2.addWeighted(img, 0.7, red_tinted_overlay, 0.3, 0)
	cv2.putText(img, text, (x, y), font, font_scale, (0, 0, 0), thickness)

	y += int(size[0][1]) + 10 # Move to the next line

	for cls, count in alert_reason:
	alert_text = f'{count} {alerting_classes[cls]}'
	cv2.putText(img, alert_text, (x, y), font, font_scale, (0, 0, 0), thickness)
	y += int(size[0][1]) + 10 # Move to the next line

	# Append the frame to the list
	frames.append(img)

	# Update processing progress bar
	current_frame_processing = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
	progress_bar_processing.progress(current_frame_processing / total_frames)

	frame_counter += 1 # Increment frame counter

	# Get the fps from the video capture object
	fps = cap.get(cv2.CAP_PROP_FPS)
	frame_delay = 1 / fps if fps > 0 else 1 / 24 # Use 24 fps as a fallback if fps is not available

	# Release resources
	del results
	cap.release()
	tfile.close()

	# Display frames one by one as a video
	for i, frame in enumerate(frames):
	video_placeholder.image(frame, channels="BGR", caption="YOLOv8 Inference")
	time.sleep(frame_delay)

	st.markdown("<hr>", unsafe_allow_html=True)
	st.markdown(centered_text, unsafe_allow_html=True)