Spaces:

heheyas
/

V3D

Running on Zero

V3D / recon /utils /camera_utils.py

heheyas

init

cfb7702 3 months ago

No virus

4.17 kB

	#
	# Copyright (C) 2023, Inria
	# GRAPHDECO research group, https://team.inria.fr/graphdeco
	# All rights reserved.
	#
	# This software is free for non-commercial, research and evaluation use
	# under the terms of the LICENSE.md file.
	#
	# For inquiries contact george.drettakis@inria.fr
	#

	from pathlib import Path
	from mediapy import read_video, write_video
	from scene.cameras import Camera
	import numpy as np
	from utils.general_utils import PILtoTorch
	from utils.graphics_utils import fov2focal

	WARNED = False


	def loadCam(args, id, cam_info, resolution_scale):
	orig_w, orig_h = cam_info.image.size

	if args.resolution in [1, 2, 4, 8]:
	resolution = round(orig_w / (resolution_scale * args.resolution)), round(
	orig_h / (resolution_scale * args.resolution)
	)
	else: # should be a type that converts to float
	if args.resolution == -1:
	if orig_w > 1600:
	global WARNED
	if not WARNED:
	print(
	"[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n "
	"If this is not desired, please explicitly specify '--resolution/-r' as 1"
	)
	WARNED = True
	global_down = orig_w / 1600
	else:
	global_down = 1
	else:
	global_down = orig_w / args.resolution

	scale = float(global_down) * float(resolution_scale)
	resolution = (int(orig_w / scale), int(orig_h / scale))

	resized_image_rgb = PILtoTorch(cam_info.image, resolution)

	gt_image = resized_image_rgb[:3, ...]
	loaded_mask = None

	if resized_image_rgb.shape[1] == 4:
	loaded_mask = resized_image_rgb[3:4, ...]

	return Camera(
	colmap_id=cam_info.uid,
	R=cam_info.R,
	T=cam_info.T,
	FoVx=cam_info.FovX,
	FoVy=cam_info.FovY,
	image=gt_image,
	gt_alpha_mask=loaded_mask,
	image_name=cam_info.image_name,
	uid=id,
	data_device=args.data_device,
	)


	def cameraList_from_camInfos(cam_infos, resolution_scale, args):
	camera_list = []

	for id, c in enumerate(cam_infos):
	camera_list.append(loadCam(args, id, c, resolution_scale))

	return camera_list


	def camera_to_JSON(id, camera: Camera):
	Rt = np.zeros((4, 4))
	Rt[:3, :3] = camera.R.transpose()
	Rt[:3, 3] = camera.T
	Rt[3, 3] = 1.0

	W2C = np.linalg.inv(Rt)
	pos = W2C[:3, 3]
	rot = W2C[:3, :3]
	serializable_array_2d = [x.tolist() for x in rot]
	camera_entry = {
	"id": id,
	"img_name": camera.image_name,
	"width": camera.width,
	"height": camera.height,
	"position": pos.tolist(),
	"rotation": serializable_array_2d,
	"fy": fov2focal(camera.FovY, camera.height),
	"fx": fov2focal(camera.FovX, camera.width),
	}
	return camera_entry


	def get_c2w_from_up_and_look_at(
	up,
	look_at,
	pos,
	opengl=False,
	):
	up = up / np.linalg.norm(up)
	z = look_at - pos
	z = z / np.linalg.norm(z)
	y = -up
	x = np.cross(y, z)
	x /= np.linalg.norm(x)
	y = np.cross(z, x)

	c2w = np.zeros([4, 4], dtype=np.float32)
	c2w[:3, 0] = x
	c2w[:3, 1] = y
	c2w[:3, 2] = z
	c2w[:3, 3] = pos
	c2w[3, 3] = 1.0

	# opencv to opengl
	if opengl:
	c2w[..., 1:3] *= -1

	return c2w


	def get_uniform_poses(num_frames, radius, elevation, opengl=False):
	T = num_frames
	azimuths = np.deg2rad(np.linspace(0, 360, T + 1)[:T])
	elevations = np.full_like(azimuths, np.deg2rad(elevation))
	cam_dists = np.full_like(azimuths, radius)

	campos = np.stack(
	[
	cam_dists * np.cos(elevations) * np.cos(azimuths),
	cam_dists * np.cos(elevations) * np.sin(azimuths),
	cam_dists * np.sin(elevations),
	],
	axis=-1,
	)

	center = np.array([0, 0, 0], dtype=np.float32)
	up = np.array([0, 0, 1], dtype=np.float32)
	poses = []
	for t in range(T):
	poses.append(get_c2w_from_up_and_look_at(up, center, campos[t], opengl=opengl))

	return np.stack(poses, axis=0)