Spaces:
Sleeping
Sleeping
| from src.utils.typing_utils import * | |
| import os | |
| import numpy as np | |
| from PIL import Image | |
| import trimesh | |
| from trimesh.transformations import rotation_matrix | |
| import pyrender | |
| from diffusers.utils import export_to_video | |
| from diffusers.utils.loading_utils import load_video | |
| import torch | |
| from torchvision.utils import make_grid | |
| os.environ['PYOPENGL_PLATFORM'] = 'egl' | |
| def render( | |
| scene: pyrender.Scene, | |
| renderer: pyrender.Renderer, | |
| camera: pyrender.Camera, | |
| pose: np.ndarray, | |
| light: Optional[pyrender.Light] = None, | |
| normalize_depth: bool = False, | |
| flags: int = pyrender.constants.RenderFlags.NONE, | |
| return_type: Literal['pil', 'ndarray'] = 'pil' | |
| ) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[Image.Image, Image.Image]]: | |
| camera_node = scene.add(camera, pose=pose) | |
| if light is not None: | |
| light_node = scene.add(light, pose=pose) | |
| image, depth = renderer.render( | |
| scene, | |
| flags=flags | |
| ) | |
| scene.remove_node(camera_node) | |
| if light is not None: | |
| scene.remove_node(light_node) | |
| if normalize_depth or return_type == 'pil': | |
| depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 | |
| if return_type == 'pil': | |
| image = Image.fromarray(image) | |
| depth = Image.fromarray(depth.astype(np.uint8)) | |
| return image, depth | |
| def rotation_matrix_from_vectors(vec1, vec2): | |
| a, b = vec1 / np.linalg.norm(vec1), vec2 / np.linalg.norm(vec2) | |
| v = np.cross(a, b) | |
| c = np.dot(a, b) | |
| s = np.linalg.norm(v) | |
| if s == 0: | |
| return np.eye(3) if c > 0 else -np.eye(3) | |
| kmat = np.array([ | |
| [0, -v[2], v[1]], | |
| [v[2], 0, -v[0]], | |
| [-v[1], v[0], 0] | |
| ]) | |
| return np.eye(3) + kmat + kmat @ kmat * ((1 - c) / (s ** 2)) | |
| def create_circular_camera_positions( | |
| num_views: int, | |
| radius: float, | |
| axis: np.ndarray = np.array([0.0, 1.0, 0.0]) | |
| ) -> List[np.ndarray]: | |
| # Create a list of positions for a circular camera trajectory | |
| # around the given axis with the given radius. | |
| positions = [] | |
| axis = axis / np.linalg.norm(axis) | |
| for i in range(num_views): | |
| theta = 2 * np.pi * i / num_views | |
| position = np.array([ | |
| np.sin(theta) * radius, | |
| 0.0, | |
| np.cos(theta) * radius | |
| ]) | |
| if not np.allclose(axis, np.array([0.0, 1.0, 0.0])): | |
| R = rotation_matrix_from_vectors(np.array([0.0, 1.0, 0.0]), axis) | |
| position = R @ position | |
| positions.append(position) | |
| return positions | |
| def create_circular_camera_poses( | |
| num_views: int, | |
| radius: float, | |
| axis: np.ndarray = np.array([0.0, 1.0, 0.0]) | |
| ) -> List[np.ndarray]: | |
| # Create a list of poses for a circular camera trajectory | |
| # around the given axis with the given radius. | |
| # The camera always looks at the origin. | |
| # The up vector is always [0, 1, 0]. | |
| canonical_pose = np.array([ | |
| [1.0, 0.0, 0.0, 0.0], | |
| [0.0, 1.0, 0.0, 0.0], | |
| [0.0, 0.0, 1.0, radius], | |
| [0.0, 0.0, 0.0, 1.0] | |
| ]) | |
| poses = [] | |
| for i in range(num_views): | |
| theta = 2 * np.pi * i / num_views | |
| R = rotation_matrix( | |
| angle=theta, | |
| direction=axis, | |
| point=[0, 0, 0] | |
| ) | |
| pose = R @ canonical_pose | |
| poses.append(pose) | |
| return poses | |
| def render_views_around_mesh( | |
| mesh: Union[trimesh.Trimesh, trimesh.Scene], | |
| num_views: int = 36, | |
| radius: float = 3.5, | |
| axis: np.ndarray = np.array([0.0, 1.0, 0.0]), | |
| image_size: tuple = (512, 512), | |
| fov: float = 40.0, | |
| light_intensity: Optional[float] = 5.0, | |
| znear: float = 0.1, | |
| zfar: float = 10.0, | |
| normalize_depth: bool = False, | |
| flags: int = pyrender.constants.RenderFlags.NONE, | |
| return_depth: bool = False, | |
| return_type: Literal['pil', 'ndarray'] = 'pil' | |
| ) -> Union[ | |
| List[Image.Image], | |
| List[np.ndarray], | |
| Tuple[List[Image.Image], List[Image.Image]], | |
| Tuple[List[np.ndarray], List[np.ndarray]] | |
| ]: | |
| if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): | |
| raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") | |
| if isinstance(mesh, trimesh.Trimesh): | |
| mesh = trimesh.Scene(mesh) | |
| scene = pyrender.Scene.from_trimesh_scene(mesh) | |
| light = pyrender.DirectionalLight( | |
| color=np.ones(3), | |
| intensity=light_intensity | |
| ) if light_intensity is not None else None | |
| camera = pyrender.PerspectiveCamera( | |
| yfov=np.deg2rad(fov), | |
| aspectRatio=image_size[0]/image_size[1], | |
| znear=znear, | |
| zfar=zfar | |
| ) | |
| renderer = pyrender.OffscreenRenderer(*image_size) | |
| camera_poses = create_circular_camera_poses( | |
| num_views, | |
| radius, | |
| axis = axis | |
| ) | |
| images, depths = [], [] | |
| for pose in camera_poses: | |
| image, depth = render( | |
| scene, renderer, camera, pose, light, | |
| normalize_depth=normalize_depth, | |
| flags=flags, | |
| return_type=return_type | |
| ) | |
| images.append(image) | |
| depths.append(depth) | |
| renderer.delete() | |
| if return_depth: | |
| return images, depths | |
| return images | |
| def render_normal_views_around_mesh( | |
| mesh: Union[trimesh.Trimesh, trimesh.Scene], | |
| num_views: int = 36, | |
| radius: float = 3.5, | |
| axis: np.ndarray = np.array([0.0, 1.0, 0.0]), | |
| image_size: tuple = (512, 512), | |
| fov: float = 40.0, | |
| light_intensity: Optional[float] = 5.0, | |
| znear: float = 0.1, | |
| zfar: float = 10.0, | |
| normalize_depth: bool = False, | |
| flags: int = pyrender.constants.RenderFlags.NONE, | |
| return_depth: bool = False, | |
| return_type: Literal['pil', 'ndarray'] = 'pil' | |
| ) -> Union[ | |
| List[Image.Image], | |
| List[np.ndarray], | |
| Tuple[List[Image.Image], List[Image.Image]], | |
| Tuple[List[np.ndarray], List[np.ndarray]] | |
| ]: | |
| if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): | |
| raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") | |
| if isinstance(mesh, trimesh.Scene): | |
| mesh = mesh.to_geometry() | |
| normals = mesh.vertex_normals | |
| colors = ((normals + 1.0) / 2.0 * 255).astype(np.uint8) | |
| mesh.visual = trimesh.visual.ColorVisuals( | |
| mesh=mesh, | |
| vertex_colors=colors | |
| ) | |
| mesh = trimesh.Scene(mesh) | |
| return render_views_around_mesh( | |
| mesh, num_views, radius, axis, | |
| image_size, fov, light_intensity, znear, zfar, | |
| normalize_depth, flags, | |
| return_depth, return_type | |
| ) | |
| def create_camera_pose_on_sphere( | |
| azimuth: float = 0.0, # in degrees | |
| elevation: float = 0.0, # in degrees | |
| radius: float = 3.5, | |
| ) -> np.ndarray: | |
| # Create a camera pose for a given azimuth and elevation | |
| # with the given radius. | |
| # The camera always looks at the origin. | |
| # The up vector is always [0, 1, 0]. | |
| canonical_pose = np.array([ | |
| [1.0, 0.0, 0.0, 0.0], | |
| [0.0, 1.0, 0.0, 0.0], | |
| [0.0, 0.0, 1.0, radius], | |
| [0.0, 0.0, 0.0, 1.0] | |
| ]) | |
| azimuth = np.deg2rad(azimuth) | |
| elevation = np.deg2rad(elevation) | |
| position = np.array([ | |
| np.cos(elevation) * np.sin(azimuth), | |
| np.sin(elevation), | |
| np.cos(elevation) * np.cos(azimuth), | |
| ]) | |
| R = np.eye(4) | |
| R[:3, :3] = rotation_matrix_from_vectors( | |
| np.array([0.0, 0.0, 1.0]), | |
| position | |
| ) | |
| pose = R @ canonical_pose | |
| return pose | |
| def render_single_view( | |
| mesh: Union[trimesh.Trimesh, trimesh.Scene], | |
| azimuth: float = 0.0, # in degrees | |
| elevation: float = 0.0, # in degrees | |
| radius: float = 3.5, | |
| image_size: tuple = (512, 512), | |
| fov: float = 40.0, | |
| light_intensity: Optional[float] = 5.0, | |
| num_env_lights: int = 0, | |
| znear: float = 0.1, | |
| zfar: float = 10.0, | |
| normalize_depth: bool = False, | |
| flags: int = pyrender.constants.RenderFlags.NONE, | |
| return_depth: bool = False, | |
| return_type: Literal['pil', 'ndarray'] = 'pil' | |
| ) -> Union[ | |
| Image.Image, | |
| np.ndarray, | |
| Tuple[Image.Image, Image.Image], | |
| Tuple[np.ndarray, np.ndarray] | |
| ]: | |
| if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): | |
| raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") | |
| if isinstance(mesh, trimesh.Trimesh): | |
| mesh = trimesh.Scene(mesh) | |
| scene = pyrender.Scene.from_trimesh_scene(mesh) | |
| light = pyrender.DirectionalLight( | |
| color=np.ones(3), | |
| intensity=light_intensity | |
| ) if light_intensity is not None else None | |
| camera = pyrender.PerspectiveCamera( | |
| yfov=np.deg2rad(fov), | |
| aspectRatio=image_size[0]/image_size[1], | |
| znear=znear, | |
| zfar=zfar | |
| ) | |
| renderer = pyrender.OffscreenRenderer(*image_size) | |
| camera_pose = create_camera_pose_on_sphere( | |
| azimuth, | |
| elevation, | |
| radius | |
| ) | |
| if num_env_lights > 0: | |
| env_light_poses = create_circular_camera_poses( | |
| num_env_lights, | |
| radius, | |
| axis = np.array([0.0, 1.0, 0.0]) | |
| ) | |
| for pose in env_light_poses: | |
| scene.add(pyrender.DirectionalLight( | |
| color=np.ones(3), | |
| intensity=light_intensity | |
| ), pose=pose) | |
| # set light to None | |
| light = None | |
| image, depth = render( | |
| scene, renderer, camera, camera_pose, light, | |
| normalize_depth=normalize_depth, | |
| flags=flags, | |
| return_type=return_type | |
| ) | |
| renderer.delete() | |
| if return_depth: | |
| return image, depth | |
| return image | |
| def render_normal_single_view( | |
| mesh: Union[trimesh.Trimesh, trimesh.Scene], | |
| azimuth: float = 0.0, # in degrees | |
| elevation: float = 0.0, # in degrees | |
| radius: float = 3.5, | |
| image_size: tuple = (512, 512), | |
| fov: float = 40.0, | |
| light_intensity: Optional[float] = 5.0, | |
| znear: float = 0.1, | |
| zfar: float = 10.0, | |
| normalize_depth: bool = False, | |
| flags: int = pyrender.constants.RenderFlags.NONE, | |
| return_depth: bool = False, | |
| return_type: Literal['pil', 'ndarray'] = 'pil' | |
| ) -> Union[ | |
| Image.Image, | |
| np.ndarray, | |
| Tuple[Image.Image, Image.Image], | |
| Tuple[np.ndarray, np.ndarray] | |
| ]: | |
| if not isinstance(mesh, (trimesh.Trimesh, trimesh.Scene)): | |
| raise ValueError("mesh must be a trimesh.Trimesh or trimesh.Scene object") | |
| if isinstance(mesh, trimesh.Scene): | |
| mesh = mesh.to_geometry() | |
| normals = mesh.vertex_normals | |
| colors = ((normals + 1.0) / 2.0 * 255).astype(np.uint8) | |
| mesh.visual = trimesh.visual.ColorVisuals( | |
| mesh=mesh, | |
| vertex_colors=colors | |
| ) | |
| mesh = trimesh.Scene(mesh) | |
| return render_single_view( | |
| mesh, azimuth, elevation, radius, | |
| image_size, fov, light_intensity, znear, zfar, | |
| normalize_depth, flags, | |
| return_depth, return_type | |
| ) | |
| def export_renderings( | |
| images: List[Image.Image], | |
| export_path: str, | |
| fps: int = 36, | |
| loop: int = 0 | |
| ): | |
| export_type = export_path.split('.')[-1] | |
| if export_type == 'mp4': | |
| export_to_video( | |
| images, | |
| export_path, | |
| fps=fps, | |
| ) | |
| elif export_type == 'gif': | |
| duration = 1000 / fps | |
| images[0].save( | |
| export_path, | |
| save_all=True, | |
| append_images=images[1:], | |
| duration=duration, | |
| loop=loop | |
| ) | |
| else: | |
| raise ValueError(f'Unknown export type: {export_type}') | |
| def make_grid_for_images_or_videos( | |
| images_or_videos: Union[List[Image.Image], List[List[Image.Image]]], | |
| nrow: int = 4, | |
| padding: int = 0, | |
| pad_value: int = 0, | |
| image_size: tuple = (512, 512), | |
| return_type: Literal['pil', 'ndarray'] = 'pil' | |
| ) -> Union[Image.Image, List[Image.Image], np.ndarray]: | |
| if isinstance(images_or_videos[0], Image.Image): | |
| images = [np.array(image.resize(image_size).convert('RGB')) for image in images_or_videos] | |
| images = np.stack(images, axis=0).transpose(0, 3, 1, 2) # [N, C, H, W] | |
| images = torch.from_numpy(images) | |
| image_grid = make_grid( | |
| images, | |
| nrow=nrow, | |
| padding=padding, | |
| pad_value=pad_value, | |
| normalize=False | |
| ) # [C, H', W'] | |
| image_grid = image_grid.cpu().numpy() | |
| if return_type == 'pil': | |
| image_grid = Image.fromarray(image_grid.transpose(1, 2, 0)) | |
| return image_grid | |
| elif isinstance(images_or_videos[0], list) and isinstance(images_or_videos[0][0], Image.Image): | |
| image_grids = [] | |
| for i in range(len(images_or_videos[0])): | |
| images = [video[i] for video in images_or_videos] | |
| image_grid = make_grid_for_images_or_videos( | |
| images, | |
| nrow=nrow, | |
| padding=padding, | |
| return_type=return_type | |
| ) | |
| image_grids.append(image_grid) | |
| if return_type == 'ndarray': | |
| image_grids = np.stack(image_grids, axis=0) | |
| return image_grids | |
| else: | |
| raise ValueError(f'Unknown input type: {type(images_or_videos[0])}') |