Ryan Lee

Fix OpenCV version assertions in ViTTrack & RAFT and text offset (#234)

44fe25c over 1 year ago

9.19 kB

	import argparse

	import cv2 as cv
	import numpy as np

	from raft import Raft

	# Check OpenCV version
	assert cv.__version__ >= "4.9.0", \
	"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"

	parser = argparse.ArgumentParser(description='RAFT (https://github.com/princeton-vl/RAFT)')
	parser.add_argument('--input1', '-i1', type=str,
	help='Usage: Set input1 path to first image, omit if using camera or video.')
	parser.add_argument('--input2', '-i2', type=str,
	help='Usage: Set input2 path to second image, omit if using camera or video.')
	parser.add_argument('--video', '-vid', type=str,
	help='Usage: Set video path to desired input video, omit if using camera or two image inputs.')
	parser.add_argument('--model', '-m', type=str, default='optical_flow_estimation_raft_2023aug.onnx',
	help='Usage: Set model path, defaults to optical_flow_estimation_raft_2023aug.onnx.')
	parser.add_argument('--save', '-s', action='store_true',
	help='Usage: Specify to save a file with results. Invalid in case of camera input.')
	parser.add_argument('--visual', '-vis', action='store_true',
	help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
	args = parser.parse_args()

	UNKNOWN_FLOW_THRESH = 1e7

	def make_color_wheel():
	""" Generate color wheel according Middlebury color code.

	Returns:
	Color wheel(numpy.ndarray): Color wheel
	"""
	RY = 15
	YG = 6
	GC = 4
	CB = 11
	BM = 13
	MR = 6

	ncols = RY + YG + GC + CB + BM + MR

	colorwheel = np.zeros([ncols, 3])

	col = 0

	# RY
	colorwheel[0:RY, 0] = 255
	colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
	col += RY

	# YG
	colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
	colorwheel[col:col+YG, 1] = 255
	col += YG

	# GC
	colorwheel[col:col+GC, 1] = 255
	colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
	col += GC

	# CB
	colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
	colorwheel[col:col+CB, 2] = 255
	col += CB

	# BM
	colorwheel[col:col+BM, 2] = 255
	colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
	col += + BM

	# MR
	colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
	colorwheel[col:col+MR, 0] = 255

	return colorwheel

	colorwheel = make_color_wheel()

	def compute_color(u, v):
	""" Compute optical flow color map

	Args:
	u(numpy.ndarray): Optical flow horizontal map
	v(numpy.ndarray): Optical flow vertical map

	Returns:
	img (numpy.ndarray): Optical flow in color code
	"""
	[h, w] = u.shape
	img = np.zeros([h, w, 3])
	nanIdx = np.isnan(u) \| np.isnan(v)
	u[nanIdx] = 0
	v[nanIdx] = 0

	ncols = np.size(colorwheel, 0)

	rad = np.sqrt(u2+v2)

	a = np.arctan2(-v, -u) / np.pi

	fk = (a+1) / 2 * (ncols - 1) + 1

	k0 = np.floor(fk).astype(int)

	k1 = k0 + 1
	k1[k1 == ncols+1] = 1
	f = fk - k0

	for i in range(0, np.size(colorwheel,1)):
	tmp = colorwheel[:, i]
	col0 = tmp[k0-1] / 255
	col1 = tmp[k1-1] / 255
	col = (1-f) * col0 + f * col1

	idx = rad <= 1
	col[idx] = 1-rad[idx]*(1-col[idx])
	notidx = np.logical_not(idx)

	col[notidx] *= 0.75
	img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))

	return img

	def flow_to_image(flow):
	"""Convert flow into middlebury color code image

	Args:
	flow (np.ndarray): The computed flow map

	Returns:
	(np.ndarray): Image corresponding to the flow map.
	"""
	u = flow[:, :, 0]
	v = flow[:, :, 1]

	maxu = -999.
	maxv = -999.
	minu = 999.
	minv = 999.

	idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) \| (abs(v) > UNKNOWN_FLOW_THRESH)
	u[idxUnknow] = 0
	v[idxUnknow] = 0

	maxu = max(maxu, np.max(u))
	minu = min(minu, np.min(u))

	maxv = max(maxv, np.max(v))
	minv = min(minv, np.min(v))

	rad = np.sqrt(u 2 + v 2)
	maxrad = max(-1, np.max(rad))

	u = u/(maxrad + np.finfo(float).eps)
	v = v/(maxrad + np.finfo(float).eps)

	img = compute_color(u, v)

	idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
	img[idx] = 0

	return np.uint8(img)


	def draw_flow(flow_map, img_width, img_height):
	"""Convert flow map to image

	Args:
	flow_map (np.ndarray): The computed flow map
	img_width (int): The width of the first input photo
	img_height (int): The height of the first input photo

	Returns:
	(np.ndarray): Image corresponding to the flow map.
	"""
	# Convert flow to image
	flow_img = flow_to_image(flow_map)
	# Convert to BGR
	flow_img = cv.cvtColor(flow_img, cv.COLOR_RGB2BGR)
	# Resize the depth map to match the input image shape
	return cv.resize(flow_img, (img_width, img_height))


	def visualize(image1, image2, flow_img):
	"""
	Combine two input images with resulting flow img and display them together

	Args:
	image1 (np.ndarray): The first input image.
	imag2 (np.ndarray): The second input image.
	flow_img (np.ndarray): The output flow map drawn as an image

	Returns:
	combined_img (np.ndarray): The visualized result.
	"""
	combined_img = np.hstack((image1, image2, flow_img))
	cv.namedWindow("Estimated flow", cv.WINDOW_NORMAL)
	cv.imshow("Estimated flow", combined_img)
	cv.waitKey(0)
	return combined_img


	if __name__ == '__main__':
	# Instantiate RAFT
	model = Raft(modelPath=args.model)

	if args.input1 is not None and args.input2 is not None:
	# Read image
	image1 = cv.imread(args.input1)
	image2 = cv.imread(args.input2)
	img_height, img_width, img_channels = image1.shape

	# Inference
	result = model.infer(image1, image2)

	# Create flow image based on the result flow map
	flow_image = draw_flow(result, img_width, img_height)

	# Save results if save is true
	if args.save:
	print('Results saved to result.jpg\n')
	cv.imwrite('result.jpg', flow_image)

	# Visualize results in a new window
	if args.visual:
	input_output_visualization = visualize(image1, image2, flow_image)


	elif args.video is not None:
	cap = cv.VideoCapture(args.video)
	FLOW_FRAME_OFFSET = 3 # Number of frame difference to estimate the optical flow

	if args.visual:
	cv.namedWindow("Estimated flow", cv.WINDOW_NORMAL)

	frame_list = []
	img_array = []
	frame_num = 0
	while cap.isOpened():
	try:
	# Read frame from the video
	ret, prev_frame = cap.read()
	frame_list.append(prev_frame)
	if not ret:
	break
	except:
	continue

	frame_num += 1
	if frame_num <= FLOW_FRAME_OFFSET:
	continue
	else:
	frame_num = 0

	result = model.infer(frame_list[0], frame_list[-1])
	img_height, img_width, img_channels = frame_list[0].shape
	flow_img = draw_flow(result, img_width, img_height)

	alpha = 0.6
	combined_img = cv.addWeighted(frame_list[0], alpha, flow_img, (1-alpha),0)

	if args.visual:
	cv.imshow("Estimated flow", combined_img)
	img_array.append(combined_img)
	# Remove the oldest frame
	frame_list.pop(0)

	# Press key q to stop
	if cv.waitKey(1) == ord('q'):
	break

	cap.release()

	if args.save:
	fourcc = cv.VideoWriter_fourcc(*'mp4v')
	height,width,layers= img_array[0].shape
	video = cv.VideoWriter('result.mp4', fourcc, 30.0, (width, height), isColor=True)
	for img in img_array:
	video.write(img)
	video.release()

	cv.destroyAllWindows()


	else: # Omit input to call default camera
	deviceId = 0
	cap = cv.VideoCapture(deviceId)
	w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
	h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))

	tm = cv.TickMeter()
	while cv.waitKey(30) < 0:
	hasFrame1, frame1 = cap.read()
	hasFrame2, frame2 = cap.read()
	if not hasFrame1:
	print('First frame was not grabbed!')
	break

	if not hasFrame2:
	print('Second frame was not grabbed!')
	break

	# Inference
	tm.start()
	result = model.infer(frame1, frame2)
	tm.stop()
	result = draw_flow(result, w, h)

	# Draw results on the input image
	frame = visualize(frame1, frame2, result)

	tm.reset()