Spaces:

fjd
/

define-hf-demo

Runtime error

define-hf-demo / vidar /datasets /VKITTI2Dataset.py

Jiading Fang

add define

fc16538 about 2 years ago

15.2 kB

	# TRI-VIDAR - Copyright 2022 Toyota Research Institute. All rights reserved.

	import csv
	import os

	import cv2
	import numpy as np

	from vidar.datasets.BaseDataset import BaseDataset
	from vidar.datasets.utils.FolderTree import FolderTree
	from vidar.datasets.utils.misc import \
	convert_ontology, initialize_ontology, stack_sample, make_relative_pose
	from vidar.utils.data import dict_remove_nones
	from vidar.utils.decorators import iterate1
	from vidar.utils.read import read_image


	def make_tree(path, sub_folder, camera, mode, context):
	"""
	Create a folder tree for a certain task

	Parameters
	----------
	path : String
	Data path
	sub_folder : String
	Subfolder path
	camera : Int
	Camera index
	mode : String
	Which task we are using
	context : list[Int]
	Context samples

	Returns
	-------
	tree : FolderTree
	Folder tree containing task data
	"""
	path = os.path.join(path, sub_folder)
	sub_folders = '{}/frames/{}/Camera_{}'.format(mode, sub_folder, camera)
	return FolderTree(path, sub_folders=sub_folders, context=context)


	def semantic_color_to_id(semantic_color, ontology):
	"""
	Convert semantic color to semantic ID

	Parameters
	----------
	semantic_color : numpy.Array
	Matrix with semantic colors [H, W, 3]
	ontology : Dict
	Ontology dictionary, with {id: color}

	Returns
	-------
	semantic_id : numpy.Array
	Matrix with semantic IDs [H, W]
	"""
	# Create semantic ID map
	semantic_id = np.zeros(semantic_color.shape[:2])
	# Loop over every ontology item and assign ID to color
	for key, val in ontology.items():
	idx = (semantic_color[:, :, 0] == val['color'][0]) & \
	(semantic_color[:, :, 1] == val['color'][1]) & \
	(semantic_color[:, :, 2] == val['color'][2])
	semantic_id[idx] = key
	# Return semantic ID map
	return semantic_id


	class VKITTI2Dataset(BaseDataset):
	"""
	VKITTI2 dataset class

	Parameters
	----------
	path : String
	Path to the dataset
	split : String {'train', 'val', 'test'}
	Which dataset split to use
	ontology : String
	Which ontology should be used
	return_ontology : Bool
	Returns ontology information in the sample
	data_transform : Function
	Transformations applied to the sample
	"""
	def __init__(self, split, tag=None, **kwargs):
	super().__init__(**kwargs)
	self.tag = 'vkitti2' if tag is None else tag

	# Store variables
	self.split = split
	self.mode = 'clone'

	# Initialize ontology
	if self.with_semantic:
	self.ontology, self.ontology_convert = initialize_ontology('vkitti2', self.ontology)
	self.return_ontology = self.return_ontology

	# Create RGB tree
	self.rgb_tree = make_tree(
	self.path, 'rgb', 0, self.mode, self.context)

	# Create semantic tree
	if self.with_semantic:
	self.semantic_tree = make_tree(
	self.path, 'classSegmentation', 0, self.mode, self.context)

	# Create instance tree
	if self.with_instance:
	self.instance_tree = make_tree(
	self.path, 'instanceSegmentation', 0, self.mode, self.context)

	def __len__(self):
	"""Dataset length"""
	return len(self.rgb_tree)

	@staticmethod
	@iterate1
	def _get_depth(filename):
	"""Get depth map from filename"""
	filename = filename.replace('rgb', 'depth').replace('jpg', 'png')
	return cv2.imread(filename, cv2.IMREAD_ANYCOLOR \| cv2.IMREAD_ANYDEPTH) / 100.

	@staticmethod
	@iterate1
	def _get_intrinsics(filename, camera, mode):
	"""Get intrinsics from filename"""
	# Get sample number in the scene
	number = int(filename.split('/')[-1].replace('rgb_', '').replace('.jpg', ''))
	# Get intrinsic filename
	filename_idx = filename.rfind(mode) + len(mode)
	filename_intrinsics = os.path.join(filename[:filename_idx].replace(
	'/rgb/', '/textgt/'), 'intrinsic.txt')
	# Open intrinsic file
	with open(filename_intrinsics, 'r') as f:
	# Get intrinsic parameters
	lines = list(csv.reader(f, delimiter=' '))[1:]
	params = [float(p) for p in lines[number * 2 + camera][2:]]
	# Build intrinsics matrix
	intrinsics = np.array([[params[0], 0.0, params[2]],
	[0.0, params[1], params[3]],
	[0.0, 0.0, 1.0]]).astype(np.float32)
	# Return intrinsics
	return intrinsics

	@staticmethod
	@iterate1
	def _get_pose(filename, camera, mode):
	"""Get pose from filename"""
	# Get sample number in the scene
	number = int(filename.split('/')[-1].replace('rgb_', '').replace('.jpg', ''))
	# Get intrinsic filename
	filename_idx = filename.rfind(mode) + len(mode)
	filename_pose = os.path.join(filename[:filename_idx].replace(
	'/rgb/', '/textgt/'), 'extrinsic.txt')
	# Open intrinsics file
	with open(filename_pose, 'r') as f:
	# Get pose parameters
	lines = list(csv.reader(f, delimiter=' '))[1:]
	pose = np.array([float(p) for p in lines[number * 2 + camera][2:]]).reshape(4, 4)
	# Return pose
	return pose

	@staticmethod
	def _get_ontology(filename, mode):
	"""Get ontology from filename"""
	# Get ontology filename
	filename_idx = filename.rfind(mode) + len(mode)
	filename_ontology = os.path.join(filename[:filename_idx].replace(
	'/classSegmentation/', '/textgt/'), 'colors.txt')
	# Open ontology file
	with open(filename_ontology, 'r') as f:
	# Get ontology parameters
	lines = list(csv.reader(f, delimiter=' '))[1:]
	from collections import OrderedDict
	ontology = OrderedDict()
	for i, line in enumerate(lines):
	ontology[i] = {
	'name': line[0],
	'color': np.array([int(clr) for clr in line[1:]])
	}
	return ontology

	def _get_semantic(self, filename):
	"""Get semantic from filename"""
	# Get semantic color map
	semantic_color = {key: np.array(val) for key, val in read_image(filename).items()}
	# Return semantic id map
	semantic_id = {key: semantic_color_to_id(val, self.ontology) for key, val in semantic_color.items()}
	return convert_ontology(semantic_id, self.ontology_convert)

	@staticmethod
	def _get_instance(filename):
	"""Get instance from filename"""
	# Get instance id map
	return np.array(read_image(filename))

	@staticmethod
	def _get_bbox3d(filename):

	bboxes3d_dim = []
	bboxes3d_pos = []
	bboxes3d_rot = []
	bboxes3d_idx = []

	k = int(filename.split('/')[-1][4:-4])
	bb = '/'.join(filename.replace('/rgb/', '/textgt/').split('/')[:-4])
	bb += '/pose.txt'

	with open(bb, 'r') as file:
	for i, f in enumerate(file):
	if i == 0:
	continue
	line = [float(a) for a in f.split(' ')]
	if line[0] == k and line[1] == 0:
	bboxes3d_dim.append(np.array([line[6], line[5], line[4]]))
	bboxes3d_pos.append(np.array(line[13:16]))
	# bboxes3d_rot.append(np.array([line[18], line[17], line[16]]))
	bboxes3d_rot.append(np.array([line[17], line[16], line[18]]))
	bboxes3d_idx.append(np.array([line[2]]))

	return {
	'dim': np.stack(bboxes3d_dim, 0),
	'pos': np.stack(bboxes3d_pos, 0),
	'rot': np.stack(bboxes3d_rot, 0),
	'idx': np.stack(bboxes3d_idx, 0),
	}

	@staticmethod
	@iterate1
	def _get_optical_flow(filename, mode):
	"""
	Get optical flow from filename. Code obtained here:
	https://europe.naverlabs.com/research/computer-vision-research-naver-labs-europe/proxy-virtual-worlds-vkitti-2/
	"""
	# Get filename
	if mode == 'bwd':
	filename = filename.replace('rgb', 'backwardFlow')
	elif mode == 'fwd':
	filename = filename.replace('/rgb/', '/forwardFlow/').replace('rgb_', 'flow_')
	else:
	raise ValueError('Invalid optical flow mode')
	filename = filename.replace('jpg', 'png')
	# Return None if file does not exist
	if not os.path.exists(filename):
	return None
	else:
	# Get optical flow
	optical_flow = cv2.imread(filename, cv2.IMREAD_ANYCOLOR \| cv2.IMREAD_ANYDEPTH)
	h, w = optical_flow.shape[:2]
	# Get invalid optical flow pixels
	invalid = optical_flow[..., 0] == 0
	# Normalize and scale optical flow values
	optical_flow = 2.0 / (2 ** 16 - 1.0) * optical_flow[..., 2:0:-1].astype('f4') - 1.
	optical_flow[..., 0] *= w - 1
	optical_flow[..., 1] *= h - 1
	# Remove invalid pixels
	optical_flow[invalid] = 0
	return optical_flow

	@staticmethod
	@iterate1
	def _get_scene_flow(filename, mode):
	"""Get scene flow from filename. Code obtained here:
	https://europe.naverlabs.com/research/computer-vision-research-naver-labs-europe/proxy-virtual-worlds-vkitti-2/
	"""
	# Get filename
	if mode == 'bwd':
	filename = filename.replace('rgb', 'backwardSceneFlow')
	elif mode == 'fwd':
	filename = filename.replace('/rgb/', '/forwardSceneFlow/').replace('rgb_', 'sceneFlow_')
	else:
	raise ValueError('Invalid scene flow mode')
	filename = filename.replace('jpg', 'png')
	# Return None if file does not exist
	if not os.path.exists(filename):
	return None
	else:
	# Get scene flow
	scene_flow = cv2.imread(filename, cv2.IMREAD_ANYCOLOR \| cv2.IMREAD_ANYDEPTH)
	# Return normalized and scaled optical flow (-10m to 10m)
	return (scene_flow[:, :, ::-1] * 2. / 65535. - 1.) * 10.

	def __getitem__(self, idx):
	"""Get dataset sample"""

	samples = []

	for camera in self.cameras:

	# Get filename
	filename = self.rgb_tree.get_item(idx)
	filename = {key: val.replace('Camera_0', 'Camera_{}'.format(camera))
	for key, val in filename.items()}

	# Base sample
	sample = {
	'idx': idx,
	'tag': self.tag,
	'filename': self.relative_path(filename),
	'splitname': '%s_%010d' % (self.split, idx),
	}

	# Image and intrinsics
	sample.update({
	'rgb': read_image(filename),
	'intrinsics': self._get_intrinsics(filename, camera, self.mode),
	})

	# If returning pose
	if self.with_pose:
	sample['pose'] = self._get_pose(filename, camera, self.mode)

	# If returning depth
	if self.with_depth:
	sample['depth'] = self._get_depth(filename)

	# If returning input depth
	if self.with_input_depth:
	sample['input_depth'] = self._get_depth(filename)

	# If returning semantic
	if self.with_semantic:
	filename = self.semantic_tree.get_item(idx)
	sample.update({'semantic': self._get_semantic(filename)})
	# If returning ontology
	if self.return_ontology:
	sample.update({'ontology': self._get_ontology(filename, self.mode)})

	# If returning instance
	if self.with_instance:
	filename = self.instance_tree.get_item(idx)
	sample.update({'instance': self._get_instance(filename)})

	# If returning 3D bounding boxes
	if self.with_bbox3d:
	filename = self.rgb_tree.get_item(idx)
	sample.update({
	'bboxes3d': self._get_bbox3d(filename)
	})

	# If returning optical flow
	if self.with_optical_flow:
	sample['bwd_optical_flow'] = \
	dict_remove_nones(self._get_optical_flow(filename, 'bwd'))
	sample['fwd_optical_flow'] = \
	dict_remove_nones(self._get_optical_flow(filename, 'fwd'))

	# If returning scene flow
	if self.with_scene_flow:
	sample['bwd_scene_flow'] = \
	dict_remove_nones(self._get_scene_flow(filename, 'bwd'))
	sample['fwd_scene_flow'] = \
	dict_remove_nones(self._get_scene_flow(filename, 'fwd'))

	# If returning context information
	if self.with_context:

	# Get context filenames
	filename_context = self.rgb_tree.get_context(idx)
	filename_context = {key: val.replace('Camera_0', 'Camera_{}'.format(camera))
	for key, val in filename_context.items()}

	# Get RGB context
	sample['rgb'].update(read_image(filename_context))

	# Get pose context
	if self.with_pose:
	sample['pose'].update(self._get_pose(filename_context, camera, self.mode))

	# Get depth context
	if self.with_depth_context:
	sample['depth'].update(self._get_depth(filename_context))

	# Get input depth context
	if self.with_input_depth_context:
	sample['input_depth'].update(self._get_depth(filename_context))

	# Get semantic context
	if self.with_semantic_context:
	sample['semantic'].update(self._get_semantic(self.semantic_tree.get_context(idx)))

	# Get optical flow context
	if self.with_optical_flow_context:
	sample['bwd_optical_flow'].update(
	dict_remove_nones(self._get_optical_flow(filename_context, 'bwd')))
	sample['fwd_optical_flow'].update(
	dict_remove_nones(self._get_optical_flow(filename_context, 'fwd')))

	# Get scene flow context
	if self.with_scene_flow_context:
	sample['bwd_scene_flow'].update(
	dict_remove_nones(self._get_scene_flow(filename_context, 'bwd')))
	sample['fwd_scene_flow'].update(
	dict_remove_nones(self._get_scene_flow(filename_context, 'fwd')))

	# Stack sample
	samples.append(sample)

	# Make relative poses
	samples = make_relative_pose(samples)

	# Transform data
	if self.data_transform:
	samples = self.data_transform(samples)

	# Return stacked sample
	return stack_sample(samples)