| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | |
| |
|
| | import numpy as np |
| | import os |
| | import enum |
| | import types |
| | from typing import List, Mapping, Optional, Text, Tuple, Union |
| | import copy |
| | from PIL import Image |
| | |
| | from matplotlib import cm |
| | from tqdm import tqdm |
| |
|
| | import torch |
| |
|
| |
|
| | def normalize(x: np.ndarray) -> np.ndarray: |
| | """Normalization helper function.""" |
| | return x / np.linalg.norm(x) |
| |
|
| |
|
| | def pad_poses(p: np.ndarray) -> np.ndarray: |
| | """Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1].""" |
| | bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape) |
| | return np.concatenate([p[..., :3, :4], bottom], axis=-2) |
| |
|
| |
|
| | def unpad_poses(p: np.ndarray) -> np.ndarray: |
| | """Remove the homogeneous bottom row from [..., 4, 4] pose matrices.""" |
| | return p[..., :3, :4] |
| |
|
| |
|
| | def recenter_poses(poses: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: |
| | """Recenter poses around the origin.""" |
| | cam2world = average_pose(poses) |
| | transform = np.linalg.inv(pad_poses(cam2world)) |
| | poses = transform @ pad_poses(poses) |
| | return unpad_poses(poses), transform |
| |
|
| |
|
| | def average_pose(poses: np.ndarray) -> np.ndarray: |
| | """New pose using average position, z-axis, and up vector of input poses.""" |
| | position = poses[:, :3, 3].mean(0) |
| | z_axis = poses[:, :3, 2].mean(0) |
| | up = poses[:, :3, 1].mean(0) |
| | cam2world = viewmatrix(z_axis, up, position) |
| | return cam2world |
| |
|
| |
|
| | def viewmatrix(lookdir: np.ndarray, up: np.ndarray, |
| | position: np.ndarray) -> np.ndarray: |
| | """Construct lookat view matrix.""" |
| | vec2 = normalize(lookdir) |
| | vec0 = normalize(np.cross(up, vec2)) |
| | vec1 = normalize(np.cross(vec2, vec0)) |
| | m = np.stack([vec0, vec1, vec2, position], axis=1) |
| | return m |
| |
|
| |
|
| | def focus_point_fn(poses: np.ndarray) -> np.ndarray: |
| | """Calculate nearest point to all focal axes in poses.""" |
| | directions, origins = poses[:, :3, 2:3], poses[:, :3, 3:4] |
| | m = np.eye(3) - directions * np.transpose(directions, [0, 2, 1]) |
| | mt_m = np.transpose(m, [0, 2, 1]) @ m |
| | focus_pt = np.linalg.inv(mt_m.mean(0)) @ (mt_m @ origins).mean(0)[:, 0] |
| | return focus_pt |
| |
|
| |
|
| | def transform_poses_pca(poses: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: |
| | """Transforms poses so principal components lie on XYZ axes. |
| | |
| | Args: |
| | poses: a (N, 3, 4) array containing the cameras' camera to world transforms. |
| | |
| | Returns: |
| | A tuple (poses, transform), with the transformed poses and the applied |
| | camera_to_world transforms. |
| | """ |
| | t = poses[:, :3, 3] |
| | t_mean = t.mean(axis=0) |
| | t = t - t_mean |
| |
|
| | eigval, eigvec = np.linalg.eig(t.T @ t) |
| | |
| | inds = np.argsort(eigval)[::-1] |
| | eigvec = eigvec[:, inds] |
| | rot = eigvec.T |
| | if np.linalg.det(rot) < 0: |
| | rot = np.diag(np.array([1, 1, -1])) @ rot |
| |
|
| | transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1) |
| | poses_recentered = unpad_poses(transform @ pad_poses(poses)) |
| | transform = np.concatenate([transform, np.eye(4)[3:]], axis=0) |
| |
|
| | |
| | if poses_recentered.mean(axis=0)[2, 1] < 0: |
| | poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered |
| | transform = np.diag(np.array([1, -1, -1, 1])) @ transform |
| |
|
| | return poses_recentered, transform |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| |
|
| | def generate_ellipse_path(poses: np.ndarray, |
| | n_frames: int = 120, |
| | const_speed: bool = True, |
| | z_variation: float = 0., |
| | z_phase: float = 0.) -> np.ndarray: |
| | """Generate an elliptical render path based on the given poses.""" |
| | |
| | center = focus_point_fn(poses) |
| | |
| | offset = np.array([center[0], center[1], 0]) |
| |
|
| | |
| | sc = np.percentile(np.abs(poses[:, :3, 3] - offset), 90, axis=0) |
| | |
| | low = -sc + offset |
| | high = sc + offset |
| | |
| | z_low = np.percentile((poses[:, :3, 3]), 10, axis=0) |
| | z_high = np.percentile((poses[:, :3, 3]), 90, axis=0) |
| |
|
| | def get_positions(theta): |
| | |
| | |
| | return np.stack([ |
| | low[0] + (high - low)[0] * (np.cos(theta) * .5 + .5), |
| | low[1] + (high - low)[1] * (np.sin(theta) * .5 + .5), |
| | z_variation * (z_low[2] + (z_high - z_low)[2] * |
| | (np.cos(theta + 2 * np.pi * z_phase) * .5 + .5)), |
| | ], -1) |
| |
|
| | theta = np.linspace(0, 2. * np.pi, n_frames + 1, endpoint=True) |
| | positions = get_positions(theta) |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | positions = positions[:-1] |
| |
|
| | |
| | avg_up = poses[:, :3, 1].mean(0) |
| | avg_up = avg_up / np.linalg.norm(avg_up) |
| | ind_up = np.argmax(np.abs(avg_up)) |
| | up = np.eye(3)[ind_up] * np.sign(avg_up[ind_up]) |
| |
|
| | return np.stack([viewmatrix(p - center, up, p) for p in positions]) |
| |
|
| |
|
| | def generate_path(viewpoint_cameras, n_frames=480): |
| | c2ws = np.array([np.linalg.inv(np.asarray( |
| | (cam.world_view_transform.T).cpu().numpy())) for cam in viewpoint_cameras]) |
| | pose = c2ws[:, :3, :] @ np.diag([1, -1, -1, 1]) |
| | pose_recenter, colmap_to_world_transform = transform_poses_pca(pose) |
| |
|
| | |
| | new_poses = generate_ellipse_path(poses=pose_recenter, n_frames=n_frames) |
| | |
| | new_poses = np.linalg.inv(colmap_to_world_transform) @ pad_poses(new_poses) |
| |
|
| | traj = [] |
| | for c2w in new_poses: |
| | c2w = c2w @ np.diag([1, -1, -1, 1]) |
| | cam = copy.deepcopy(viewpoint_cameras[0]) |
| | cam.image_height = int(cam.image_height / 2) * 2 |
| | cam.image_width = int(cam.image_width / 2) * 2 |
| | cam.world_view_transform = torch.from_numpy( |
| | np.linalg.inv(c2w).T).float().cuda() |
| | cam.full_proj_transform = (cam.world_view_transform.unsqueeze( |
| | 0).bmm(cam.projection_matrix.unsqueeze(0))).squeeze(0) |
| | cam.camera_center = cam.world_view_transform.inverse()[3, :3] |
| | traj.append(cam) |
| |
|
| | return traj |
| |
|
| |
|
| | def generate_video_render_path(c2ws, n_frames=480): |
| | |
| | |
| | |
| | pose = c2ws[:, :3, :] @ np.diag([1, -1, -1, 1]) |
| | pose_recenter, colmap_to_world_transform = transform_poses_pca(pose) |
| |
|
| | |
| | new_poses = generate_ellipse_path(poses=pose_recenter, n_frames=n_frames) |
| | |
| | new_poses = np.linalg.inv(colmap_to_world_transform) @ pad_poses(new_poses) |
| |
|
| | traj = [] |
| | for c2w in new_poses: |
| | c2w = c2w @ np.diag([1, -1, -1, 1]) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | traj.append(c2w) |
| |
|
| | return traj |
| |
|
| |
|
| | def load_img(pth: str) -> np.ndarray: |
| | """Load an image and cast to float32.""" |
| | with open(pth, 'rb') as f: |
| | image = np.array(Image.open(f), dtype=np.float32) |
| | return image |
| |
|
| |
|
| | def create_videos(base_dir, input_dir, out_name, num_frames=480): |
| | """Creates videos out of the images saved to disk.""" |
| | |
| | video_prefix = f'{out_name}' |
| |
|
| | zpad = max(5, len(str(num_frames - 1))) |
| | def idx_to_str(idx): return str(idx).zfill(zpad) |
| |
|
| | os.makedirs(base_dir, exist_ok=True) |
| | render_dist_curve_fn = np.log |
| |
|
| | |
| | depth_file = os.path.join(input_dir, 'vis', f'depth_{idx_to_str(0)}.tiff') |
| | depth_frame = load_img(depth_file) |
| | shape = depth_frame.shape |
| | p = 3 |
| | distance_limits = np.percentile(depth_frame.flatten(), [p, 100 - p]) |
| | lo, hi = [render_dist_curve_fn(x) for x in distance_limits] |
| | print(f'Video shape is {shape[:2]}') |
| |
|
| | video_kwargs = { |
| | 'shape': shape[:2], |
| | 'codec': 'h264', |
| | 'fps': 60, |
| | 'crf': 18, |
| | } |
| |
|
| | for k in ['depth', 'normal', 'color']: |
| | video_file = os.path.join(base_dir, f'{video_prefix}_{k}.mp4') |
| | input_format = 'gray' if k == 'alpha' else 'rgb' |
| |
|
| | file_ext = 'png' if k in ['color', 'normal'] else 'tiff' |
| | idx = 0 |
| |
|
| | if k == 'color': |
| | file0 = os.path.join(input_dir, 'renders', |
| | f'{idx_to_str(0)}.{file_ext}') |
| | else: |
| | file0 = os.path.join( |
| | input_dir, 'vis', f'{k}_{idx_to_str(0)}.{file_ext}') |
| |
|
| | if not os.path.exists(file0): |
| | print(f'Images missing for tag {k}') |
| | continue |
| | print(f'Making video {video_file}...') |
| | with media.VideoWriter( |
| | video_file, **video_kwargs, input_format=input_format) as writer: |
| | for idx in tqdm(range(num_frames)): |
| | |
| | if k == 'color': |
| | img_file = os.path.join( |
| | input_dir, 'renders', f'{idx_to_str(idx)}.{file_ext}') |
| | else: |
| | img_file = os.path.join( |
| | input_dir, 'vis', f'{k}_{idx_to_str(idx)}.{file_ext}') |
| |
|
| | if not os.path.exists(img_file): |
| | ValueError(f'Image file {img_file} does not exist.') |
| | img = load_img(img_file) |
| | if k in ['color', 'normal']: |
| | img = img / 255. |
| | elif k.startswith('depth'): |
| | img = render_dist_curve_fn(img) |
| | img = np.clip((img - np.minimum(lo, hi)) / |
| | np.abs(hi - lo), 0, 1) |
| | img = cm.get_cmap('turbo')(img)[..., :3] |
| |
|
| | frame = (np.clip(np.nan_to_num(img), 0., 1.) |
| | * 255.).astype(np.uint8) |
| | writer.add_image(frame) |
| | idx += 1 |
| |
|
| |
|
| | def save_img_u8(img, pth): |
| | """Save an image (probably RGB) in [0, 1] to disk as a uint8 PNG.""" |
| | with open(pth, 'wb') as f: |
| | Image.fromarray( |
| | (np.clip(np.nan_to_num(img), 0., 1.) * 255.).astype(np.uint8)).save( |
| | f, 'PNG') |
| |
|
| |
|
| | def save_img_f32(depthmap, pth): |
| | """Save an image (probably a depthmap) to disk as a float32 TIFF.""" |
| | with open(pth, 'wb') as f: |
| | Image.fromarray(np.nan_to_num(depthmap).astype( |
| | np.float32)).save(f, 'TIFF') |
| |
|