Spaces:
Sleeping
Sleeping
File size: 3,580 Bytes
e4bf056 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
import cv2
import numpy as np
import os.path as osp
from collections import deque
from dust3r.utils.image import imread_cv2
from .base_many_view_dataset import BaseManyViewDataset
class Demo(BaseManyViewDataset):
def __init__(self, num_seq=1, num_frames=5,
min_thresh=10, max_thresh=100,
full_video=True, kf_every=1,
*args, ROOT, **kwargs):
self.ROOT = ROOT
super().__init__(*args, **kwargs)
self.num_seq = num_seq
self.num_frames = num_frames
self.max_thresh = max_thresh
self.min_thresh = min_thresh
self.full_video = full_video
self.kf_every = kf_every
def __len__(self):
return self.num_seq
def _get_views(self, idx, resolution, rng):
img_idxs = sorted(os.listdir(self.ROOT))
valid_extensions = {'.jpg', '.jpeg', '.png', '.heic'}
img_idxs = [idx for idx in img_idxs
if idx.lower().endswith(tuple(valid_extensions)) and 'depth' not in idx.lower()]
img_idxs = self.sample_frame_idx(img_idxs, rng, full_video=self.full_video)
# pseudo intrinsics
fx, fy = 525, 525
views = []
imgs_idxs = deque(img_idxs)
while len(imgs_idxs) > 0:
im_idx = imgs_idxs.popleft()
impath = osp.join(self.ROOT, im_idx)
if not osp.exists(impath):
raise FileNotFoundError(f"Image not found: {impath}")
print(f'Loading image: {impath}')
if 'heic' in impath.lower():
from PIL import Image
rgb_image = Image.open(impath)
if rgb_image.mode != 'RGB':
rgb_image = rgb_image.convert('RGB')
rgb_image = np.array(rgb_image)
else:
rgb_image = imread_cv2(impath)
depth_path = impath.split('.')[0] + '_depth.png'
meta_data_path = impath.split('.')[0] + '.npz'
if osp.exists(meta_data_path):
input_metadata = np.load(meta_data_path)
camera_pose = input_metadata['camera_pose'].astype(np.float32)
intrinsics = input_metadata['camera_intrinsics'].astype(np.float32)
else:
cx, cy = rgb_image.shape[1]//2, rgb_image.shape[0]//2
intrinsics = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
# pseudo camera pose
camera_pose = np.eye(4).astype(np.float32)
if not osp.exists(depth_path):
depthmap = np.ones((rgb_image.shape[0], rgb_image.shape[1])).astype(np.float32)
else:
depthmap = imread_cv2(depth_path, cv2.IMREAD_UNCHANGED)
depthmap = (depthmap.astype(np.float32) / 65535) * np.nan_to_num(input_metadata['maximum_depth'])
# resize rgb to the same size as depth
rgb_image = cv2.resize(rgb_image, (depthmap.shape[1], depthmap.shape[0]))
rgb_image, depthmap, intrinsics = self._crop_resize_if_necessary(
rgb_image, depthmap, intrinsics, resolution, rng=rng, info=impath)
views.append(dict(
img=rgb_image,
depthmap=depthmap,
camera_pose=camera_pose,
camera_intrinsics=intrinsics,
dataset='demo',
label=osp.join(self.ROOT, im_idx),
instance=osp.split(impath)[1],
))
return views
|