define-hf-demo / vidar /datasets /ScanNetTemporalDataset.py
Jiading Fang
add define
fc16538
import os
import numpy as np
import cv2
from vidar.datasets.BaseDataset import BaseDataset
from vidar.datasets.utils.FolderTree import FolderTree
from vidar.datasets.utils.misc import stack_sample
from vidar.utils.read import read_image
from vidar.datasets.BaseDataset import BaseDataset
from vidar.datasets.utils.misc import invert_pose, stack_sample, make_relative_pose
from vidar.utils.read import read_image
class ScanNetTemporalDataset(BaseDataset):
def __init__(self, tag=None, single_folder=False, split=None, stride=1, **kwargs):
super().__init__(**kwargs)
self.tag = 'scannet_temporal' if tag is None else tag
if split is None or split == '':
split = ('', )
self.rgb_tree = FolderTree(
os.path.join(self.path, split),
context=self.context, sub_folders=['color'], stride=stride,
single_folder=single_folder, suffix='.jpg')
def __len__(self):
"""Dataset length"""
return len(self.rgb_tree)
@staticmethod
def get_intrinsics(rgb):
"""Return dummy intrinsics"""
return np.array([[rgb.size[0] / 2., 0., rgb.size[0] / 2.],
[0., rgb.size[1], rgb.size[1] / 2.],
[0., 0., 1.]])
@staticmethod
def load_intrinsics(filename):
filename_intrinsics = {key: '/'.join(val.split('/')[:-2]) + '/intrinsic/intrinsic_depth.txt'
for key, val in filename.items()}
return {key: np.genfromtxt(val).astype(np.float32).reshape((4, 4))[:3, :3]
for key, val in filename_intrinsics.items()}
@staticmethod
def load_depth(filename):
try:
filename_depth = {key: val.replace('color', 'depth').replace('.jpg', '.png')
for key, val in filename.items()}
return {key: (cv2.imread(val, cv2.IMREAD_ANYDEPTH).astype(np.float32) / 1000.0)
for key, val in filename_depth.items()}
except:
filename_depth = {key: val.replace('color', 'depth').replace('.jpg', '.npy')
for key, val in filename.items()}
return {key: (np.load(val) / 1000.0).astype(np.float32)
for key, val in filename_depth.items()}
@staticmethod
def load_pose(filename):
filename_pose = {key: val.replace('color', 'pose').replace('.jpg', '.txt')
for key, val in filename.items()}
return {key: invert_pose(np.genfromtxt(val).astype(np.float32).reshape((4, 4)))
for key, val in filename_pose.items()}
def __getitem__(self, idx):
"""Get dataset sample given an index."""
samples = []
for _ in self.cameras:
# Filename
filename = self.rgb_tree.get_item(idx)
# Base sample
sample = {
'idx': idx,
'tag': self.tag,
'filename': self.relative_path(filename),
'splitname': '%010d' % idx
}
# Image
sample['rgb'] = read_image(filename)
# Intrinsics
sample['intrinsics'] = self.load_intrinsics(filename)
if self.with_depth:
sample['depth'] = self.load_depth(filename)
if self.with_pose:
sample['pose'] = self.load_pose(filename)
# If with context
if self.with_context:
filename_context = self.rgb_tree.get_context(idx)
sample['rgb'].update(read_image(filename_context))
if self.with_depth:
sample['depth'].update(self.load_depth(filename_context))
if self.with_pose:
sample['pose'].update(self.load_pose(filename_context))
# Stack sample
samples.append(sample)
# Make relative poses
samples = make_relative_pose(samples)
# Transform data
if self.data_transform:
samples = self.data_transform(samples)
# Return stacked sample
return stack_sample(samples)