Spaces:

hadiyya
/

MC1-Gradio-Assignment

Runtime error

App Files Files Community

MC1-Gradio-Assignment / app.py

hadiyya

Run requirements.txt from app.py

d3ff2ec verified 7 months ago

raw

history blame contribute delete

3.43 kB

	import subprocess
	import sys

	# Install dependencies from requirements.txt
	subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

	import spaces
	import cv2
	from PIL import Image
	import torch
	import time
	import numpy as np
	import uuid

	from draw_boxes import draw_bounding_boxes
	from transformers import AutoImageProcessor, AutoModelForObjectDetection # Added import

	SUBSAMPLE = 2

	# Initialize image processor and model
	image_processor = AutoImageProcessor.from_pretrained("PekingU/rtdetr_r101vd_coco_o365")
	model = AutoModelForObjectDetection.from_pretrained("PekingU/rtdetr_r101vd_coco_o365").to("cuda")

	@spaces.GPU
	def stream_object_detection(video, conf_threshold):
	cap = cv2.VideoCapture(video)

	video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
	fps = int(cap.get(cv2.CAP_PROP_FPS))

	desired_fps = fps // SUBSAMPLE
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2

	iterating, frame = cap.read()

	n_frames = 0


	output_video_name = f"output_{uuid.uuid4()}.mp4"

	# Output Video
	output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore
	batch = []

	while iterating:
	frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	if n_frames % SUBSAMPLE == 0:
	batch.append(frame)
	if len(batch) == 2 * desired_fps:
	inputs = image_processor(images=batch, return_tensors="pt").to("cuda")

	with torch.no_grad():
	outputs = model(**inputs)

	boxes = image_processor.post_process_object_detection(
	outputs,
	target_sizes=torch.tensor([(height, width)] * len(batch)),
	threshold=conf_threshold)

	for i, (array, box) in enumerate(zip(batch, boxes)):
	pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold)
	frame = np.array(pil_image)
	# Convert RGB to BGR
	frame = frame[:, :, ::-1].copy()
	output_video.write(frame)

	batch = []
	output_video.release()
	yield output_video_name
	output_video_name = f"output_{uuid.uuid4()}.mp4"
	output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore

	iterating, frame = cap.read()
	n_frames += 1

	cap.release()
	output_video.release()

	import gradio as gr

	with gr.Blocks() as app:
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	Video Object Detection with <a href='https://huggingface.co/PekingU/rtdetr_r101vd_coco_o365' target='_blank'>RT-DETR</a>
	</h1>
	""")
	with gr.Row():
	with gr.Column():
	video = gr.Video(label="Video Source")
	conf_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.30,
	)
	with gr.Column():
	output_video = gr.Video(label="Processed Video", streaming=True, autoplay=True)

	video.change(
	fn=stream_object_detection,
	inputs=[video, conf_threshold],
	outputs=[output_video],
	)