V3D / recon /utils /camera_utils.py
heheyas
init
cfb7702
#
# Copyright (C) 2023, Inria
# GRAPHDECO research group, https://team.inria.fr/graphdeco
# All rights reserved.
#
# This software is free for non-commercial, research and evaluation use
# under the terms of the LICENSE.md file.
#
# For inquiries contact george.drettakis@inria.fr
#
from pathlib import Path
from mediapy import read_video, write_video
from scene.cameras import Camera
import numpy as np
from utils.general_utils import PILtoTorch
from utils.graphics_utils import fov2focal
WARNED = False
def loadCam(args, id, cam_info, resolution_scale):
orig_w, orig_h = cam_info.image.size
if args.resolution in [1, 2, 4, 8]:
resolution = round(orig_w / (resolution_scale * args.resolution)), round(
orig_h / (resolution_scale * args.resolution)
)
else: # should be a type that converts to float
if args.resolution == -1:
if orig_w > 1600:
global WARNED
if not WARNED:
print(
"[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n "
"If this is not desired, please explicitly specify '--resolution/-r' as 1"
)
WARNED = True
global_down = orig_w / 1600
else:
global_down = 1
else:
global_down = orig_w / args.resolution
scale = float(global_down) * float(resolution_scale)
resolution = (int(orig_w / scale), int(orig_h / scale))
resized_image_rgb = PILtoTorch(cam_info.image, resolution)
gt_image = resized_image_rgb[:3, ...]
loaded_mask = None
if resized_image_rgb.shape[1] == 4:
loaded_mask = resized_image_rgb[3:4, ...]
return Camera(
colmap_id=cam_info.uid,
R=cam_info.R,
T=cam_info.T,
FoVx=cam_info.FovX,
FoVy=cam_info.FovY,
image=gt_image,
gt_alpha_mask=loaded_mask,
image_name=cam_info.image_name,
uid=id,
data_device=args.data_device,
)
def cameraList_from_camInfos(cam_infos, resolution_scale, args):
camera_list = []
for id, c in enumerate(cam_infos):
camera_list.append(loadCam(args, id, c, resolution_scale))
return camera_list
def camera_to_JSON(id, camera: Camera):
Rt = np.zeros((4, 4))
Rt[:3, :3] = camera.R.transpose()
Rt[:3, 3] = camera.T
Rt[3, 3] = 1.0
W2C = np.linalg.inv(Rt)
pos = W2C[:3, 3]
rot = W2C[:3, :3]
serializable_array_2d = [x.tolist() for x in rot]
camera_entry = {
"id": id,
"img_name": camera.image_name,
"width": camera.width,
"height": camera.height,
"position": pos.tolist(),
"rotation": serializable_array_2d,
"fy": fov2focal(camera.FovY, camera.height),
"fx": fov2focal(camera.FovX, camera.width),
}
return camera_entry
def get_c2w_from_up_and_look_at(
up,
look_at,
pos,
opengl=False,
):
up = up / np.linalg.norm(up)
z = look_at - pos
z = z / np.linalg.norm(z)
y = -up
x = np.cross(y, z)
x /= np.linalg.norm(x)
y = np.cross(z, x)
c2w = np.zeros([4, 4], dtype=np.float32)
c2w[:3, 0] = x
c2w[:3, 1] = y
c2w[:3, 2] = z
c2w[:3, 3] = pos
c2w[3, 3] = 1.0
# opencv to opengl
if opengl:
c2w[..., 1:3] *= -1
return c2w
def get_uniform_poses(num_frames, radius, elevation, opengl=False):
T = num_frames
azimuths = np.deg2rad(np.linspace(0, 360, T + 1)[:T])
elevations = np.full_like(azimuths, np.deg2rad(elevation))
cam_dists = np.full_like(azimuths, radius)
campos = np.stack(
[
cam_dists * np.cos(elevations) * np.cos(azimuths),
cam_dists * np.cos(elevations) * np.sin(azimuths),
cam_dists * np.sin(elevations),
],
axis=-1,
)
center = np.array([0, 0, 0], dtype=np.float32)
up = np.array([0, 0, 1], dtype=np.float32)
poses = []
for t in range(T):
poses.append(get_c2w_from_up_and_look_at(up, center, campos[t], opengl=opengl))
return np.stack(poses, axis=0)