Spaces:

Ayushnangia
/

Optical_flow_RAFT

Runtime error

App Files Files Community

Optical_flow_RAFT / app.py

Ayushnangia

Update app.py

ca57650 over 1 year ago

raw

history blame contribute delete

9.48 kB

	import cv2
	import time
	import numpy as np
	import onnx
	import onnxruntime
	import os
	os.system('pip install --upgrade --force-reinstall onnxruntime')

	# Ref: https://github.com/liruoteng/OpticalFlowToolkit/blob/5cf87b947a0032f58c922bbc22c0afb30b90c418/lib/flowlib.py#L249

	import numpy as np

	UNKNOWN_FLOW_THRESH = 1e7

	def make_color_wheel():
	"""
	Generate color wheel according Middlebury color code
	:return: Color wheel
	"""
	RY = 15
	YG = 6
	GC = 4
	CB = 11
	BM = 13
	MR = 6

	ncols = RY + YG + GC + CB + BM + MR

	colorwheel = np.zeros([ncols, 3])

	col = 0

	# RY
	colorwheel[0:RY, 0] = 255
	colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
	col += RY

	# YG
	colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
	colorwheel[col:col+YG, 1] = 255
	col += YG

	# GC
	colorwheel[col:col+GC, 1] = 255
	colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
	col += GC

	# CB
	colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
	colorwheel[col:col+CB, 2] = 255
	col += CB

	# BM
	colorwheel[col:col+BM, 2] = 255
	colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
	col += + BM

	# MR
	colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
	colorwheel[col:col+MR, 0] = 255

	return colorwheel

	colorwheel = make_color_wheel()

	def compute_color(u, v):
	"""
	compute optical flow color map
	:param u: optical flow horizontal map
	:param v: optical flow vertical map
	:return: optical flow in color code
	"""
	[h, w] = u.shape
	img = np.zeros([h, w, 3])
	nanIdx = np.isnan(u) \| np.isnan(v)
	u[nanIdx] = 0
	v[nanIdx] = 0

	ncols = np.size(colorwheel, 0)

	rad = np.sqrt(u2+v2)

	a = np.arctan2(-v, -u) / np.pi

	fk = (a+1) / 2 * (ncols - 1) + 1

	k0 = np.floor(fk).astype(int)

	k1 = k0 + 1
	k1[k1 == ncols+1] = 1
	f = fk - k0

	for i in range(0, np.size(colorwheel,1)):
	tmp = colorwheel[:, i]
	col0 = tmp[k0-1] / 255
	col1 = tmp[k1-1] / 255
	col = (1-f) * col0 + f * col1

	idx = rad <= 1
	col[idx] = 1-rad[idx]*(1-col[idx])
	notidx = np.logical_not(idx)

	col[notidx] *= 0.75
	img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))

	return img

	def flow_to_image(flow):
	"""
	Convert flow into middlebury color code image
	:param flow: optical flow map
	:return: optical flow image in middlebury color
	"""
	u = flow[:, :, 0]
	v = flow[:, :, 1]

	maxu = -999.
	maxv = -999.
	minu = 999.
	minv = 999.

	idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) \| (abs(v) > UNKNOWN_FLOW_THRESH)
	u[idxUnknow] = 0
	v[idxUnknow] = 0

	maxu = max(maxu, np.max(u))
	minu = min(minu, np.min(u))

	maxv = max(maxv, np.max(v))
	minv = min(minv, np.min(v))

	rad = np.sqrt(u 2 + v 2)
	maxrad = max(-1, np.max(rad))

	u = u/(maxrad + np.finfo(float).eps)
	v = v/(maxrad + np.finfo(float).eps)

	img = compute_color(u, v)

	idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
	img[idx] = 0

	return np.uint8(img)
	class Raft():

	def __init__(self, model_path):

	# Initialize model
	self.initialize_model(model_path)

	def __call__(self, img1, img2):

	return self.estimate_flow(img1, img2)

	def initialize_model(self, model_path):

	self.session = onnxruntime.InferenceSession(model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])

	# Get model info
	self.get_input_details()
	self.get_output_details()

	def estimate_flow(self, img1, img2):

	input_tensor1 = self.prepare_input(img1)
	input_tensor2 = self.prepare_input(img2)

	outputs = self.inference(input_tensor1, input_tensor2)

	self.flow_map = self.process_output(outputs)

	return self.flow_map

	def prepare_input(self, img):

	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	self.img_height, self.img_width = img.shape[:2]

	img_input = cv2.resize(img, (self.input_width,self.input_height))

	# img_input = img_input/255
	img_input = img_input.transpose(2, 0, 1)
	img_input = img_input[np.newaxis,:,:,:]

	return img_input.astype(np.float32)

	def inference(self, input_tensor1, input_tensor2):

	# start = time.time()
	outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor1,
	self.input_names[1]: input_tensor2})

	# print(time.time() - start)
	return outputs

	def process_output(self, output):

	flow_map = output[1][0].transpose(1, 2, 0)

	return flow_map

	def draw_flow(self):

	# Convert flow to image
	flow_img = flow_to_image(self.flow_map)

	# Convert to BGR
	flow_img = cv2.cvtColor(flow_img, cv2.COLOR_RGB2BGR)

	# Resize the depth map to match the input image shape
	return cv2.resize(flow_img, (self.img_width,self.img_height))

	def get_input_details(self):

	model_inputs = self.session.get_inputs()
	self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

	self.input_shape = model_inputs[0].shape
	self.input_height = self.input_shape[2]
	self.input_width = self.input_shape[3]

	def get_output_details(self):

	model_outputs = self.session.get_outputs()
	self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]

	self.output_shape = model_outputs[0].shape
	self.output_height = self.output_shape[2]
	self.output_width = self.output_shape[3]

	if __name__ == '__main__':

	from imread_from_url import imread_from_url

	# Initialize model
	model_path='raft_small_iter10_240x320.onnx'
	flow_estimator = Raft(model_path)

	# Read inference image
	img1 = imread_from_url("https://github.com/princeton-vl/RAFT/blob/master/demo-frames/frame_0016.png?raw=true")
	img2 = imread_from_url("https://github.com/princeton-vl/RAFT/blob/master/demo-frames/frame_0025.png?raw=true")

	# Estimate flow and colorize it
	flow_map = flow_estimator(img1, img2)
	flow_img = flow_estimator.draw_flow()

	combined_img = np.hstack((img1, img2, flow_img))

	#cv2.namedWindow("Estimated flow", cv2.WINDOW_NORMAL)
	#cv2.imshow("Estimated flow", combined_img)
	#cv2.waitKey(0)

	import os
	import cv2
	import gradio as gr
	import yt_dlp

	def download_youtube_video(youtube_url, output_filename):
	ydl_opts = {
	'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
	'outtmpl': output_filename,
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([youtube_url])

	def process_video(youtube_url, start_time, flow_frame_offset):
	model_path = 'models/raft_small_iter10_240x320.onnx'
	flow_estimator = Raft(model_path)

	output_filename = 'downloaded_video.mp4'
	processed_output = 'processed_video.mp4'

	# Download video
	if os.path.exists(output_filename):
	os.remove(output_filename)
	download_youtube_video(youtube_url, output_filename)

	cap = cv2.VideoCapture(output_filename)
	if not cap.isOpened():
	return "Error: Could not open video."

	frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = cap.get(cv2.CAP_PROP_FPS)

	fourcc = cv2.VideoWriter_fourcc(*'XVID')
	out = cv2.VideoWriter(processed_output, fourcc, fps, (frame_width, frame_height))

	cap.set(cv2.CAP_PROP_POS_FRAMES, start_time * fps)

	frame_list = []
	frame_num = 0

	while cap.isOpened():
	ret, prev_frame = cap.read()
	if not ret:
	break

	frame_list.append(prev_frame)
	frame_num += 1

	if frame_num <= flow_frame_offset:
	continue

	flow_map = flow_estimator(frame_list[0], frame_list[-1])
	flow_img = flow_estimator.draw_flow()

	alpha = 0.5
	combined_img = cv2.addWeighted(frame_list[0], alpha, flow_img, (1 - alpha), 0)

	if combined_img is None:
	break

	out.write(combined_img)
	frame_list.pop(0)

	cap.release()
	out.release()

	return processed_output




	examples = [
	["https://www.youtube.com/watch?v=is38pqgbj6A", 5, 50, "output_1.mp4"],
	["https://www.youtube.com/watch?v=AdbrfoxiAtk", 0, 60, "output_2.mp4"],
	["https://www.youtube.com/watch?v=vWGg0iPmI8k", 13, 70, "output_3.mp4"],
	]

	with gr.Blocks() as app:
	gr.Markdown("### Optical Flow Video Processing\n"
	"Enter a YouTube URL, set the start time and flow frame offset, "
	"then click 'Process Video' to see the optical flow processing.")

	with gr.Row():
	with gr.Column():
	youtube_url = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube Video URL Here")
	start_time = gr.Slider(minimum=0, maximum=60, label="Start Time (seconds)", step=1)
	flow_frame_offset = gr.Slider(minimum=1, maximum=100, label="Flow Frame Offset", step=1)
	submit_button = gr.Button("Process Video")

	with gr.Column():
	output_video = gr.Video(label="Processed Video")

	submit_button.click(
	fn=process_video,
	inputs=[youtube_url, start_time, flow_frame_offset],
	outputs=output_video
	)

	gr.Examples(examples=examples,
	inputs=[youtube_url, start_time, flow_frame_offset],
	fn=process_video,
	outputs=output_video,
	cache_examples=False)

	app.launch()