|
from src.utils.typing_utils import * |
|
|
|
import os |
|
import numpy as np |
|
from PIL import Image |
|
import trimesh |
|
from trimesh.transformations import rotation_matrix |
|
import pyrender |
|
from diffusers.utils import export_to_video |
|
from diffusers.utils.loading_utils import load_video |
|
import torch |
|
from torchvision.utils import make_grid |
|
|
|
os.environ['PYOPENGL_PLATFORM'] = 'egl' |
|
|
|
def render( |
|
scene: pyrender.Scene, |
|
renderer: pyrender.Renderer, |
|
camera: pyrender.Camera, |
|
pose: np.ndarray, |
|
light: Optional[pyrender.Light] = None, |
|
normalize_depth: bool = False, |
|
flags: int = pyrender.constants.RenderFlags.NONE, |
|
return_type: Literal['pil', 'ndarray'] = 'pil' |
|
) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[Image.Image, Image.Image]]: |
|
camera_node = scene.add(camera, pose=pose) |
|
if light is not None: |
|
light_node = scene.add(light, pose=pose) |
|
image, depth = renderer.render( |
|
scene, |
|
flags=flags |
|
) |
|
scene.remove_node(camera_node) |
|
if light is not None: |
|
scene.remove_node(light_node) |
|
if normalize_depth or return_type == 'pil': |
|
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 |
|
if return_type == 'pil': |
|
image = Image.fromarray(image) |
|
depth = Image.fromarray(depth.astype(np.uint8)) |
|
return image, depth |
|
|
|
def rotation_matrix_from_vectors(vec1, vec2): |
|
a, b = vec1 / np.linalg.norm(vec1), vec2 / np.linalg.norm(vec2) |
|
v = np.cross(a, b) |
|
c = np.dot(a, b) |
|
s = np.linalg.norm(v) |
|
if s == 0: |
|
return np.eye(3) if c > 0 else -np.eye(3) |
|
kmat = np.array([ |
|
[0, -v[2], v[1]], |
|
[v[2], 0, -v[0]], |
|
[-v[1], v[0], 0] |
|
]) |
|
return np.eye(3) + kmat + kmat @ kmat * ((1 - c) / (s ** 2)) |
|
|
|
def create_circular_camera_positions( |
|
num_views: int, |
|
radius: float, |
|
axis: np.ndarray = np.array([0.0, 1.0, 0.0]) |
|
) -> List[np.ndarray]: |
|
|
|
|
|
positions = [] |
|
axis = axis / np.linalg.norm(axis) |
|
for i in range(num_views): |
|
theta = 2 * np.pi * i / num_views |
|
position = np.array([ |
|
np.sin(theta) * radius, |
|
0.0, |
|
np.cos(theta) * radius |
|
]) |
|
if not np.allclose(axis, np.array([0.0, 1.0, 0.0])): |
|
R = rotation_matrix_from_vectors(np.array([0.0, 1.0, 0.0]), axis) |
|
position = R @ position |
|
positions.append(position) |
|
return positions |
|
|
|
def create_circular_camera_poses( |
|
num_views: int, |
|
radius: float, |
|
axis: np.ndarray = np.array([0.0, 1.0, 0.0]) |
|
) -> List[np.ndarray]: |
|
|
|
|
|
|
|
|
|
canonical_pose = np.array([ |
|
[1.0, 0.0, 0.0, 0.0], |
|
[0.0, 1.0, 0.0, 0.0], |
|
[0.0, 0.0, 1.0, radius], |
|
[0.0, 0.0, 0.0, 1.0] |
|
]) |
|
poses = [] |
|
for i in range(num_views): |
|
theta = 2 * np.pi * i / num_views |
|
R = rotation_matrix( |
|
angle=theta, |
|
direction=axis, |
|
point=[0, 0, 0] |
|
) |
|
pose = R @ canonical_pose |
|
poses.append(pose) |
|
return poses |
|
|
|
def render_views_around_mesh( |
|
mesh: Union[trimesh.Trimesh, trimesh.Scene], |
|
num_views: int = 36, |
|
radius: float = 3.5, |
|
axis: np.ndarray = np.array([0.0, 1.0, 0.0]), |
|
image_size: tuple = (512, 512), |
|
fov: float = 40.0, |
|
light_intensity: Optional[float] = 5.0, |
|
znear: float = 0.1, |
|
zfar: float = 10.0, |
|
normalize_depth: bool = False, |
|
flags: int = pyrender.constants.RenderFlags.NONE, |
|
return_depth: bool = False, |
|
return_type: Literal['pil', 'ndarray'] = 'pil' |
|
) -> Union[ |
|
List[Image.Image], |
|
List[np.ndarray], |
|
Tuple[List[Image.Image], List[Image.Image]], |
|
Tuple[List[np.ndarray], List[np.ndarray]] |
|
]: |
|
|
|
if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): |
|
raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") |
|
if isinstance(mesh, trimesh.Trimesh): |
|
mesh = trimesh.Scene(mesh) |
|
|
|
scene = pyrender.Scene.from_trimesh_scene(mesh) |
|
light = pyrender.DirectionalLight( |
|
color=np.ones(3), |
|
intensity=light_intensity |
|
) if light_intensity is not None else None |
|
camera = pyrender.PerspectiveCamera( |
|
yfov=np.deg2rad(fov), |
|
aspectRatio=image_size[0]/image_size[1], |
|
znear=znear, |
|
zfar=zfar |
|
) |
|
renderer = pyrender.OffscreenRenderer(*image_size) |
|
|
|
camera_poses = create_circular_camera_poses( |
|
num_views, |
|
radius, |
|
axis = axis |
|
) |
|
|
|
images, depths = [], [] |
|
for pose in camera_poses: |
|
image, depth = render( |
|
scene, renderer, camera, pose, light, |
|
normalize_depth=normalize_depth, |
|
flags=flags, |
|
return_type=return_type |
|
) |
|
images.append(image) |
|
depths.append(depth) |
|
|
|
renderer.delete() |
|
|
|
if return_depth: |
|
return images, depths |
|
return images |
|
|
|
def render_normal_views_around_mesh( |
|
mesh: Union[trimesh.Trimesh, trimesh.Scene], |
|
num_views: int = 36, |
|
radius: float = 3.5, |
|
axis: np.ndarray = np.array([0.0, 1.0, 0.0]), |
|
image_size: tuple = (512, 512), |
|
fov: float = 40.0, |
|
light_intensity: Optional[float] = 5.0, |
|
znear: float = 0.1, |
|
zfar: float = 10.0, |
|
normalize_depth: bool = False, |
|
flags: int = pyrender.constants.RenderFlags.NONE, |
|
return_depth: bool = False, |
|
return_type: Literal['pil', 'ndarray'] = 'pil' |
|
) -> Union[ |
|
List[Image.Image], |
|
List[np.ndarray], |
|
Tuple[List[Image.Image], List[Image.Image]], |
|
Tuple[List[np.ndarray], List[np.ndarray]] |
|
]: |
|
|
|
if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): |
|
raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") |
|
if isinstance(mesh, trimesh.Scene): |
|
mesh = mesh.to_geometry() |
|
normals = mesh.vertex_normals |
|
colors = ((normals + 1.0) / 2.0 * 255).astype(np.uint8) |
|
mesh.visual = trimesh.visual.ColorVisuals( |
|
mesh=mesh, |
|
vertex_colors=colors |
|
) |
|
mesh = trimesh.Scene(mesh) |
|
return render_views_around_mesh( |
|
mesh, num_views, radius, axis, |
|
image_size, fov, light_intensity, znear, zfar, |
|
normalize_depth, flags, |
|
return_depth, return_type |
|
) |
|
|
|
def create_camera_pose_on_sphere( |
|
azimuth: float = 0.0, |
|
elevation: float = 0.0, |
|
radius: float = 3.5, |
|
) -> np.ndarray: |
|
|
|
|
|
|
|
|
|
canonical_pose = np.array([ |
|
[1.0, 0.0, 0.0, 0.0], |
|
[0.0, 1.0, 0.0, 0.0], |
|
[0.0, 0.0, 1.0, radius], |
|
[0.0, 0.0, 0.0, 1.0] |
|
]) |
|
azimuth = np.deg2rad(azimuth) |
|
elevation = np.deg2rad(elevation) |
|
position = np.array([ |
|
np.cos(elevation) * np.sin(azimuth), |
|
np.sin(elevation), |
|
np.cos(elevation) * np.cos(azimuth), |
|
]) |
|
R = np.eye(4) |
|
R[:3, :3] = rotation_matrix_from_vectors( |
|
np.array([0.0, 0.0, 1.0]), |
|
position |
|
) |
|
pose = R @ canonical_pose |
|
return pose |
|
|
|
def render_single_view( |
|
mesh: Union[trimesh.Trimesh, trimesh.Scene], |
|
azimuth: float = 0.0, |
|
elevation: float = 0.0, |
|
radius: float = 3.5, |
|
image_size: tuple = (512, 512), |
|
fov: float = 40.0, |
|
light_intensity: Optional[float] = 5.0, |
|
num_env_lights: int = 0, |
|
znear: float = 0.1, |
|
zfar: float = 10.0, |
|
normalize_depth: bool = False, |
|
flags: int = pyrender.constants.RenderFlags.NONE, |
|
return_depth: bool = False, |
|
return_type: Literal['pil', 'ndarray'] = 'pil' |
|
) -> Union[ |
|
Image.Image, |
|
np.ndarray, |
|
Tuple[Image.Image, Image.Image], |
|
Tuple[np.ndarray, np.ndarray] |
|
]: |
|
|
|
if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): |
|
raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") |
|
if isinstance(mesh, trimesh.Trimesh): |
|
mesh = trimesh.Scene(mesh) |
|
|
|
scene = pyrender.Scene.from_trimesh_scene(mesh) |
|
light = pyrender.DirectionalLight( |
|
color=np.ones(3), |
|
intensity=light_intensity |
|
) if light_intensity is not None else None |
|
camera = pyrender.PerspectiveCamera( |
|
yfov=np.deg2rad(fov), |
|
aspectRatio=image_size[0]/image_size[1], |
|
znear=znear, |
|
zfar=zfar |
|
) |
|
renderer = pyrender.OffscreenRenderer(*image_size) |
|
|
|
camera_pose = create_camera_pose_on_sphere( |
|
azimuth, |
|
elevation, |
|
radius |
|
) |
|
|
|
if num_env_lights > 0: |
|
env_light_poses = create_circular_camera_poses( |
|
num_env_lights, |
|
radius, |
|
axis = np.array([0.0, 1.0, 0.0]) |
|
) |
|
for pose in env_light_poses: |
|
scene.add(pyrender.DirectionalLight( |
|
color=np.ones(3), |
|
intensity=light_intensity |
|
), pose=pose) |
|
|
|
light = None |
|
|
|
image, depth = render( |
|
scene, renderer, camera, camera_pose, light, |
|
normalize_depth=normalize_depth, |
|
flags=flags, |
|
return_type=return_type |
|
) |
|
renderer.delete() |
|
|
|
if return_depth: |
|
return image, depth |
|
return image |
|
|
|
def render_normal_single_view( |
|
mesh: Union[trimesh.Trimesh, trimesh.Scene], |
|
azimuth: float = 0.0, |
|
elevation: float = 0.0, |
|
radius: float = 3.5, |
|
image_size: tuple = (512, 512), |
|
fov: float = 40.0, |
|
light_intensity: Optional[float] = 5.0, |
|
znear: float = 0.1, |
|
zfar: float = 10.0, |
|
normalize_depth: bool = False, |
|
flags: int = pyrender.constants.RenderFlags.NONE, |
|
return_depth: bool = False, |
|
return_type: Literal['pil', 'ndarray'] = 'pil' |
|
) -> Union[ |
|
Image.Image, |
|
np.ndarray, |
|
Tuple[Image.Image, Image.Image], |
|
Tuple[np.ndarray, np.ndarray] |
|
]: |
|
|
|
if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): |
|
raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") |
|
if isinstance(mesh, trimesh.Scene): |
|
mesh = mesh.to_geometry() |
|
normals = mesh.vertex_normals |
|
colors = ((normals + 1.0) / 2.0 * 255).astype(np.uint8) |
|
mesh.visual = trimesh.visual.ColorVisuals( |
|
mesh=mesh, |
|
vertex_colors=colors |
|
) |
|
mesh = trimesh.Scene(mesh) |
|
return render_single_view( |
|
mesh, azimuth, elevation, radius, |
|
image_size, fov, light_intensity, znear, zfar, |
|
normalize_depth, flags, |
|
return_depth, return_type |
|
) |
|
|
|
def export_renderings( |
|
images: List[Image.Image], |
|
export_path: str, |
|
fps: int = 36, |
|
loop: int = 0 |
|
): |
|
export_type = export_path.split('.')[-1] |
|
if export_type == 'mp4': |
|
export_to_video( |
|
images, |
|
export_path, |
|
fps=fps, |
|
) |
|
elif export_type == 'gif': |
|
duration = 1000 / fps |
|
images[0].save( |
|
export_path, |
|
save_all=True, |
|
append_images=images[1:], |
|
duration=duration, |
|
loop=loop |
|
) |
|
else: |
|
raise ValueError(f'Unknown export type: {export_type}') |
|
|
|
def make_grid_for_images_or_videos( |
|
images_or_videos: Union[List[Image.Image], List[List[Image.Image]]], |
|
nrow: int = 4, |
|
padding: int = 0, |
|
pad_value: int = 0, |
|
image_size: tuple = (512, 512), |
|
return_type: Literal['pil', 'ndarray'] = 'pil' |
|
) -> Union[Image.Image, List[Image.Image], np.ndarray]: |
|
if isinstance(images_or_videos[0], Image.Image): |
|
images = [np.array(image.resize(image_size).convert('RGB')) for image in images_or_videos] |
|
images = np.stack(images, axis=0).transpose(0, 3, 1, 2) |
|
images = torch.from_numpy(images) |
|
image_grid = make_grid( |
|
images, |
|
nrow=nrow, |
|
padding=padding, |
|
pad_value=pad_value, |
|
normalize=False |
|
) |
|
image_grid = image_grid.cpu().numpy() |
|
if return_type == 'pil': |
|
image_grid = Image.fromarray(image_grid.transpose(1, 2, 0)) |
|
return image_grid |
|
elif isinstance(images_or_videos[0], list) and isinstance(images_or_videos[0][0], Image.Image): |
|
image_grids = [] |
|
for i in range(len(images_or_videos[0])): |
|
images = [video[i] for video in images_or_videos] |
|
image_grid = make_grid_for_images_or_videos( |
|
images, |
|
nrow=nrow, |
|
padding=padding, |
|
return_type=return_type |
|
) |
|
image_grids.append(image_grid) |
|
if return_type == 'ndarray': |
|
image_grids = np.stack(image_grids, axis=0) |
|
return image_grids |
|
else: |
|
raise ValueError(f'Unknown input type: {type(images_or_videos[0])}') |