Spaces:
Running
Running
import os.path as osp | |
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
from torch.utils.data import Dataset | |
from loguru import logger | |
from src.utils.dataset import read_megadepth_gray, read_megadepth_depth | |
class MegaDepthDataset(Dataset): | |
def __init__( | |
self, | |
root_dir, | |
npz_path, | |
mode="train", | |
min_overlap_score=0.4, | |
img_resize=None, | |
df=None, | |
img_padding=False, | |
depth_padding=False, | |
augment_fn=None, | |
**kwargs | |
): | |
""" | |
Manage one scene(npz_path) of MegaDepth dataset. | |
Args: | |
root_dir (str): megadepth root directory that has `phoenix`. | |
npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. | |
mode (str): options are ['train', 'val', 'test'] | |
min_overlap_score (float): how much a pair should have in common. In range of [0, 1]. Set to 0 when testing. | |
img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended. | |
This is useful during training with batches and testing with memory intensive algorithms. | |
df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize. | |
img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training. | |
depth_padding (bool): If set to 'True', zero-pad depthmap to (2000, 2000). This is useful during training. | |
augment_fn (callable, optional): augments images with pre-defined visual effects. | |
""" | |
super().__init__() | |
self.root_dir = root_dir | |
self.mode = mode | |
self.scene_id = npz_path.split(".")[0] | |
# prepare scene_info and pair_info | |
if mode == "test" and min_overlap_score != 0: | |
logger.warning( | |
"You are using `min_overlap_score`!=0 in test mode. Set to 0." | |
) | |
min_overlap_score = 0 | |
self.scene_info = np.load(npz_path, allow_pickle=True) | |
self.pair_infos = self.scene_info["pair_infos"].copy() | |
del self.scene_info["pair_infos"] | |
self.pair_infos = [ | |
pair_info | |
for pair_info in self.pair_infos | |
if pair_info[1] > min_overlap_score | |
] | |
# parameters for image resizing, padding and depthmap padding | |
if mode == "train": | |
assert img_resize is not None and img_padding and depth_padding | |
self.img_resize = img_resize | |
self.df = df | |
self.img_padding = img_padding | |
self.depth_max_size = ( | |
2000 if depth_padding else None | |
) # the upperbound of depthmaps size in megadepth. | |
# for training LoFTR | |
self.augment_fn = augment_fn if mode == "train" else None | |
self.coarse_scale = getattr(kwargs, "coarse_scale", 0.125) | |
def __len__(self): | |
return len(self.pair_infos) | |
def __getitem__(self, idx): | |
(idx0, idx1), overlap_score, central_matches = self.pair_infos[idx] | |
# read grayscale image and mask. (1, h, w) and (h, w) | |
img_name0 = osp.join(self.root_dir, self.scene_info["image_paths"][idx0]) | |
img_name1 = osp.join(self.root_dir, self.scene_info["image_paths"][idx1]) | |
# TODO: Support augmentation & handle seeds for each worker correctly. | |
image0, mask0, scale0 = read_megadepth_gray( | |
img_name0, self.img_resize, self.df, self.img_padding, None | |
) | |
# np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) | |
image1, mask1, scale1 = read_megadepth_gray( | |
img_name1, self.img_resize, self.df, self.img_padding, None | |
) | |
# np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) | |
# read depth. shape: (h, w) | |
if self.mode in ["train", "val"]: | |
depth0 = read_megadepth_depth( | |
osp.join(self.root_dir, self.scene_info["depth_paths"][idx0]), | |
pad_to=self.depth_max_size, | |
) | |
depth1 = read_megadepth_depth( | |
osp.join(self.root_dir, self.scene_info["depth_paths"][idx1]), | |
pad_to=self.depth_max_size, | |
) | |
else: | |
depth0 = depth1 = torch.tensor([]) | |
# read intrinsics of original size | |
K_0 = torch.tensor( | |
self.scene_info["intrinsics"][idx0].copy(), dtype=torch.float | |
).reshape(3, 3) | |
K_1 = torch.tensor( | |
self.scene_info["intrinsics"][idx1].copy(), dtype=torch.float | |
).reshape(3, 3) | |
# read and compute relative poses | |
T0 = self.scene_info["poses"][idx0] | |
T1 = self.scene_info["poses"][idx1] | |
T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[ | |
:4, :4 | |
] # (4, 4) | |
T_1to0 = T_0to1.inverse() | |
data = { | |
"image0": image0, # (1, h, w) | |
"depth0": depth0, # (h, w) | |
"image1": image1, | |
"depth1": depth1, | |
"T_0to1": T_0to1, # (4, 4) | |
"T_1to0": T_1to0, | |
"K0": K_0, # (3, 3) | |
"K1": K_1, | |
"scale0": scale0, # [scale_w, scale_h] | |
"scale1": scale1, | |
"dataset_name": "MegaDepth", | |
"scene_id": self.scene_id, | |
"pair_id": idx, | |
"pair_names": ( | |
self.scene_info["image_paths"][idx0], | |
self.scene_info["image_paths"][idx1], | |
), | |
} | |
# for LoFTR training | |
if mask0 is not None: # img_padding is True | |
if self.coarse_scale: | |
[ts_mask_0, ts_mask_1] = F.interpolate( | |
torch.stack([mask0, mask1], dim=0)[None].float(), | |
scale_factor=self.coarse_scale, | |
mode="nearest", | |
recompute_scale_factor=False, | |
)[0].bool() | |
data.update({"mask0": ts_mask_0, "mask1": ts_mask_1}) | |
return data | |