Align3R / dust3r /datasets /habitat.py
cyun9286's picture
Add application file
f53b39e
raw
history blame
4.73 kB
# Copyright (C) 2024-present Naver Corporation. All rights reserved.
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
#
# --------------------------------------------------------
# Dataloader for preprocessed habitat
# dataset at https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md
# See datasets_preprocess/habitat for more details
# --------------------------------------------------------
import os.path as osp
import os
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" # noqa
import cv2 # noqa
import numpy as np
from PIL import Image
import json
from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset
class Habitat(BaseStereoViewDataset):
def __init__(self, size, *args, ROOT, **kwargs):
self.ROOT = ROOT
super().__init__(*args, **kwargs)
assert self.split is not None
# loading list of scenes
with open(osp.join(self.ROOT, f'Habitat_{size}_scenes_{self.split}.txt')) as f:
self.scenes = f.read().splitlines()
self.instances = list(range(1, 5))
def filter_scene(self, label, instance=None):
if instance:
subscene, instance = instance.split('_')
label += '/' + subscene
self.instances = [int(instance) - 1]
valid = np.bool_([scene.startswith(label) for scene in self.scenes])
assert sum(valid), 'no scene was selected for {label=} {instance=}'
self.scenes = [scene for i, scene in enumerate(self.scenes) if valid[i]]
def _get_views(self, idx, resolution, rng):
scene = self.scenes[idx]
data_path, key = osp.split(osp.join(self.ROOT, scene))
views = []
two_random_views = [0, rng.choice(self.instances)] # view 0 is connected with all other views
for view_index in two_random_views:
# load the view (and use the next one if this one's broken)
for ii in range(view_index, view_index + 5):
image, depthmap, intrinsics, camera_pose = self._load_one_view(data_path, key, ii % 5, resolution, rng)
if np.isfinite(camera_pose).all():
break
views.append(dict(
img=image,
depthmap=depthmap,
camera_pose=camera_pose, # cam2world
camera_intrinsics=intrinsics,
dataset='Habitat',
label=osp.relpath(data_path, self.ROOT),
instance=f"{key}_{view_index}"))
return views
def _load_one_view(self, data_path, key, view_index, resolution, rng):
view_index += 1 # file indices starts at 1
impath = osp.join(data_path, f"{key}_{view_index}.jpeg")
image = Image.open(impath)
depthmap_filename = osp.join(data_path, f"{key}_{view_index}_depth.exr")
depthmap = cv2.imread(depthmap_filename, cv2.IMREAD_GRAYSCALE | cv2.IMREAD_ANYDEPTH)
camera_params_filename = osp.join(data_path, f"{key}_{view_index}_camera_params.json")
with open(camera_params_filename, 'r') as f:
camera_params = json.load(f)
intrinsics = np.float32(camera_params['camera_intrinsics'])
camera_pose = np.eye(4, dtype=np.float32)
camera_pose[:3, :3] = camera_params['R_cam2world']
camera_pose[:3, 3] = camera_params['t_cam2world']
image, depthmap, intrinsics = self._crop_resize_if_necessary(
image, depthmap, intrinsics, resolution, rng, info=impath)
return image, depthmap, intrinsics, camera_pose
if __name__ == "__main__":
from dust3r.datasets.base.base_stereo_view_dataset import view_name
from dust3r.viz import SceneViz, auto_cam_size
from dust3r.utils.image import rgb
dataset = Habitat(1_000_000, split='train', ROOT="data/habitat_processed",
resolution=224, aug_crop=16)
for idx in np.random.permutation(len(dataset)):
views = dataset[idx]
assert len(views) == 2
print(view_name(views[0]), view_name(views[1]))
viz = SceneViz()
poses = [views[view_idx]['camera_pose'] for view_idx in [0, 1]]
cam_size = max(auto_cam_size(poses), 0.001)
for view_idx in [0, 1]:
pts3d = views[view_idx]['pts3d']
valid_mask = views[view_idx]['valid_mask']
colors = rgb(views[view_idx]['img'])
viz.add_pointcloud(pts3d, colors, valid_mask)
viz.add_camera(pose_c2w=views[view_idx]['camera_pose'],
focal=views[view_idx]['camera_intrinsics'][0, 0],
color=(idx * 255, (1 - idx) * 255, 0),
image=colors,
cam_size=cam_size)
viz.show()