Spaces:

Safwanahmad619
/

Object_Detector_In_Video-yolo

Sleeping

App Files Files Community

Object_Detector_In_Video-yolo / app.py

Safwanahmad619

Create app.py

624970e verified 10 months ago

raw

history blame contribute delete

2.51 kB

	import gradio as gr
	import cv2
	from transformers import YolosImageProcessor, YolosForObjectDetection
	from PIL import Image
	import torch

	# Load model and processor
	model = YolosForObjectDetection.from_pretrained('hustvl/yolos-tiny')
	image_processor = YolosImageProcessor.from_pretrained("hustvl/yolos-tiny")

	def process_frame(frame):
	# Resize the frame to reduce processing time
	frame = cv2.resize(frame, (640, 360)) # downscaling the frame

	# Convert the frame (numpy array) to PIL image
	image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

	# Prepare the image for the model
	inputs = image_processor(images=image, return_tensors="pt")

	# Perform object detection
	with torch.no_grad():
	outputs = model(**inputs)

	# Post-process the outputs to extract bounding boxes and labels
	target_sizes = torch.tensor([image.size[::-1]])
	results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]

	# Draw the bounding boxes on the original frame
	for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
	box = [round(i, 2) for i in box.tolist()]
	cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)
	cv2.putText(frame, f"{model.config.id2label[label.item()]}: {round(score.item(), 2)}",
	(int(box[0]), int(box[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	return frame

	def video_object_detection(video):
	cap = cv2.VideoCapture(video)
	processed_frames = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	# Optionally skip frames to speed up processing
	# if int(cap.get(cv2.CAP_PROP_POS_FRAMES)) % 2 == 0: # Process every 2nd frame
	processed_frame = process_frame(frame)
	processed_frames.append(processed_frame)

	cap.release()

	# Convert processed frames to a video for display
	height, width, _ = processed_frames[0].shape
	output_video = cv2.VideoWriter('/tmp/output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 20, (width, height))

	for frame in processed_frames:
	output_video.write(frame)

	output_video.release()

	return '/tmp/output.mp4'

	# Create Gradio interface with live=True
	iface = gr.Interface(fn=video_object_detection, inputs="video", outputs="video", title="YOLOs-Tiny Video Detection", live=True)
	iface.launch()