Spaces:

IsshikiHugh
/

HSMR

Runtime error

File size: 13,132 Bytes

5ac1897

import os
if 'PYOPENGL_PLATFORM' not in os.environ:
    os.environ['PYOPENGL_PLATFORM'] = 'egl'
import torch
import numpy as np
import trimesh
import pyrender

from typing import List, Optional, Union, Tuple
from pathlib import Path

from lib.utils.vis import ColorPalette
from lib.utils.data import to_numpy
from lib.utils.media import save_img

from .utils import *


def render_mesh_overlay_img(
    faces       : Union[torch.Tensor, np.ndarray],
    verts       : torch.Tensor,
    K4          : List,
    img         : np.ndarray,
    output_fn   : Optional[Union[str, Path]] = None,
    device      : str = 'cuda',
    resize      : float = 1.0,
    Rt          : Optional[Tuple[torch.Tensor]] = None,
    mesh_color : Optional[Union[List[float], str]] = 'green',
):
    '''
    Render the mesh overlay on the input video frames.

    ### Args
    - faces: Union[torch.Tensor, np.ndarray], (V, 3)
    - verts: torch.Tensor, (V, 3)
    - K4: List
        - [fx, fy, cx, cy], the components of intrinsic camera matrix.
    - img: np.ndarray, (H, W, 3)
    - output_fn: Union[str, Path] or None
        - The output file path, if None, return the rendered img.
    - fps: int, default 30
    - device: str, default 'cuda'
    - resize: float, default 1.0
        - The resize factor of the output video.
    - Rt: Tuple of Tensor, default None
        - The extrinsic camera matrix, in the form of (R, t).
    '''
    frame = render_mesh_overlay_video(
            faces      = faces,
            verts      = verts[None],
            K4         = K4,
            frames     = img[None],
            device     = device,
            resize     = resize,
            Rt         = Rt,
            mesh_color = mesh_color,
        )[0]

    if output_fn is None:
        return frame
    else:
        save_img(frame, output_fn)


def render_mesh_overlay_video(
    faces      : Union[torch.Tensor, np.ndarray],
    verts      : Union[torch.Tensor, np.ndarray],
    K4         : List,
    frames     : np.ndarray,
    output_fn  : Optional[Union[str, Path]] = None,
    fps        : int = 30,
    device     : str = 'cuda',
    resize     : float = 1.0,
    Rt         : Tuple = None,
    mesh_color : Optional[Union[List[float], str]] = 'green',
):
    '''
    Render the mesh overlay on the input video frames.

    ### Args
    - faces: Union[torch.Tensor, np.ndarray], (V, 3)
    - verts: Union[torch.Tensor, np.ndarray], (L, V, 3)
    - K4: List
        - [fx, fy, cx, cy], the components of intrinsic camera matrix.
    - frames: np.ndarray, (L, H, W, 3)
    - output_fn: useless, only for compatibility.
    - fps: useless, only for compatibility.
    - device: useless, only for compatibility.
    - resize: useless, only for compatibility.
    - Rt: Tuple, default None
        - The extrinsic camera matrix, in the form of (R, t).
    '''
    faces, verts = to_numpy(faces), to_numpy(verts)
    assert len(K4) == 4, 'K4 must be a list of 4 elements.'
    assert frames.shape[0] == verts.shape[0], 'The length of frames and verts must be the same.'
    assert frames.shape[-1] == 3, 'The last dimension of frames must be 3.'
    if isinstance(mesh_color, str):
        mesh_color = ColorPalette.presets_float[mesh_color]

    # Prepare the data.
    L = len(frames)
    frame_w, frame_h = frames.shape[-2], frames.shape[-3]

    renderer = pyrender.OffscreenRenderer(
            viewport_width  = frame_w,
            viewport_height = frame_h,
            point_size      = 1.0
        )

    # Camera
    camera, cam_pose = create_camera(K4, Rt)

    # Scene.
    material = pyrender.MetallicRoughnessMaterial(
            metallicFactor  = 0.0,
            alphaMode       = 'OPAQUE',
            baseColorFactor = (*mesh_color, 1.0)
        )

    # Light.
    light_nodes = create_raymond_lights()

    results = []
    for i in range(L):
        mesh = trimesh.Trimesh(verts[i].copy(), faces.copy())
        # if side_view:
        #     rot = trimesh.transformations.rotation_matrix(
        #         np.radians(rot_angle), [0, 1, 0])
        #     mesh.apply_transform(rot)
        # elif top_view:
        #     rot = trimesh.transformations.rotation_matrix(
        #         np.radians(rot_angle), [1, 0, 0])
        #     mesh.apply_transform(rot)
        rot = trimesh.transformations.rotation_matrix(
            np.radians(180), [1, 0, 0])
        mesh.apply_transform(rot)
        mesh = pyrender.Mesh.from_trimesh(mesh, material=material)

        scene = pyrender.Scene(
                bg_color      = [0.0, 0.0, 0.0, 0.0],
                ambient_light = (0.3, 0.3, 0.3),
            )

        scene.add(mesh, 'mesh')
        scene.add(camera, pose=cam_pose)

        # Light.
        for node in light_nodes:
            scene.add_node(node)

        # Render.
        result_rgba, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)

        valid_mask = result_rgba.astype(np.float32)[:, :, [-1]] / 255.0  # (H, W, 1)
        bg = frames[i]  # (H, W, 3)
        final = result_rgba[:, :, :3] * valid_mask + bg * (1 - valid_mask)
        final = final.astype(np.uint8)  # (H, W, 3)
        results.append(final)
    results = np.stack(results, axis=0)  # (L, H, W, 3)

    renderer.delete()
    return results



def render_meshes_overlay_img(
    faces_all   : Union[torch.Tensor, np.ndarray],
    verts_all   : Union[torch.Tensor, np.ndarray],
    cam_t_all   : Union[torch.Tensor, np.ndarray],
    K4          : List,
    img         : np.ndarray,
    output_fn   : Optional[Union[str, Path]] = None,
    device      : str = 'cuda',
    resize      : float = 1.0,
    Rt          : Optional[Tuple[torch.Tensor]] = None,
    mesh_color : Optional[Union[List[float], str]] = 'green',
    view       : str = 'front',
    ret_rgba   : bool = False,
):
    '''
    Render the mesh overlay on the input video frames.

    ### Args
    - faces_all: Union[torch.Tensor, np.ndarray], ((Nm,) V, 3)
    - verts_all: Union[torch.Tensor, np.ndarray], ((Nm,) V, 3)
    - cam_t_all: Union[torch.Tensor, np.ndarray], ((Nm,) 3)
    - K4: List
        - [fx, fy, cx, cy], the components of intrinsic camera matrix.
    - img: np.ndarray, (H, W, 3)
    - output_fn: Union[str, Path] or None
        - The output file path, if None, return the rendered img.
    - fps: int, default 30
    - device: str, default 'cuda'
    - resize: float, default 1.0
        - The resize factor of the output video.
    - Rt: Tuple of Tensor, default None
        - The extrinsic camera matrix, in the form of (R, t).
    - view: str, default 'front', {'front', 'side90d', 'side60d', 'top90d'}
    - ret_rgba: bool, default False
        - If True, return rgba images, otherwise return rgb images.
        - For view is not 'front', the background will become transparent.
    '''
    if len(verts_all.shape) == 2:
        verts_all = verts_all[None] # (1, V, 3)
    elif len(verts_all.shape) == 3:
        verts_all = verts_all[:, None]  # ((Nm,) 1, V, 3)
    else:
        raise ValueError('The shape of verts_all is not correct.')
    if len(cam_t_all.shape) == 1:
        cam_t_all = cam_t_all[None] # (1, 3)
    elif len(cam_t_all.shape) == 2:
        cam_t_all = cam_t_all[:, None]  # ((Nm,) 1, 3)
    else:
        raise ValueError('The shape of verts_all is not correct.')
    frame = render_meshes_overlay_video(
            faces_all  = faces_all,
            verts_all  = verts_all,
            cam_t_all  = cam_t_all,
            K4         = K4,
            frames     = img[None],
            device     = device,
            resize     = resize,
            Rt         = Rt,
            mesh_color = mesh_color,
            view       = view,
            ret_rgba   = ret_rgba,
        )[0]

    if output_fn is None:
        return frame
    else:
        save_img(frame, output_fn)


def render_meshes_overlay_video(
    faces_all  : Union[torch.Tensor, np.ndarray],
    verts_all  : Union[torch.Tensor, np.ndarray],
    cam_t_all   : Union[torch.Tensor, np.ndarray],
    K4         : List,
    frames     : np.ndarray,
    output_fn  : Optional[Union[str, Path]] = None,
    fps        : int = 30,
    device     : str = 'cuda',
    resize     : float = 1.0,
    Rt         : Tuple = None,
    mesh_color : Optional[Union[List[float], str]] = 'green',
    view       : str = 'front',
    ret_rgba   : bool = False,
):
    '''
    Render the mesh overlay on the input video frames.

    ### Args
    - faces_all: Union[torch.Tensor, np.ndarray], ((Nm,) V, 3)
    - verts_all: Union[torch.Tensor, np.ndarray], ((Nm,) L, V, 3)
    - cam_t_all: Union[torch.Tensor, np.ndarray], ((Nm,) L, 3)
    - K4: List
        - [fx, fy, cx, cy], the components of intrinsic camera matrix.
    - frames: np.ndarray, (L, H, W, 3)
    - output_fn: useless, only for compatibility.
    - fps: useless, only for compatibility.
    - device: useless, only for compatibility.
    - resize: useless, only for compatibility.
    - Rt: Tuple, default None
        - The extrinsic camera matrix, in the form of (R, t).
    - view: str, default 'front', {'front', 'side90d', 'side60d', 'top90d'}
    - ret_rgba: bool, default False
        - If True, return rgba images, otherwise return rgb images.
        - For view is not 'front', the background will become transparent.
    '''
    faces_all, verts_all = to_numpy(faces_all), to_numpy(verts_all)
    if len(verts_all.shape) == 3:
        verts_all = verts_all[None]  # (1, L, V, 3)
    if len(cam_t_all.shape) == 2:
        cam_t_all = cam_t_all[None]  # (1, L, 3)
    Nm, L, _, _ = verts_all.shape
    if len(faces_all.shape) == 2:
        faces_all = faces_all[None].repeat(Nm, axis=0)  # (Nm, V, 3)

    assert len(K4) == 4, 'K4 must be a list of 4 elements.'
    assert frames.shape[0] == L, 'The length of frames and verts must be the same.'
    assert frames.shape[-1] == 3, 'The last dimension of frames must be 3.'
    assert len(verts_all.shape) == 4, 'The shape of verts_all must be (Nm, L, V, 3).'
    assert len(faces_all.shape) == 3, 'The shape of faces_all must be (Nm, V, 3).'
    if isinstance(mesh_color, str):
        mesh_color = ColorPalette.presets_float[mesh_color]

    # Prepare the data.
    frame_w, frame_h = frames.shape[-2], frames.shape[-3]

    renderer = pyrender.OffscreenRenderer(
            viewport_width  = frame_w,
            viewport_height = frame_h,
            point_size      = 1.0
        )

    # Camera
    camera, cam_pose = create_camera(K4, Rt)

    # Scene.
    material = pyrender.MetallicRoughnessMaterial(
            metallicFactor  = 0.0,
            alphaMode       = 'OPAQUE',
            baseColorFactor = (*mesh_color, 1.0)
        )

    # Light.
    light_nodes = create_raymond_lights()

    results = []
    for i in range(L):
        scene = pyrender.Scene(
                bg_color      = [0.0, 0.0, 0.0, 0.0],
                ambient_light = (0.3, 0.3, 0.3),
            )

        for mid in range(Nm):
            mesh = trimesh.Trimesh(verts_all[mid][i].copy(), faces_all[mid].copy())
            if view == 'front':
                pass
            elif view == 'side90d':
                rot = trimesh.transformations.rotation_matrix(np.radians(-90), [0, 1, 0])
                mesh.apply_transform(rot)
            elif view == 'side60d':
                rot = trimesh.transformations.rotation_matrix(np.radians(-60), [0, 1, 0])
                mesh.apply_transform(rot)
            elif view == 'top90d':
                rot = trimesh.transformations.rotation_matrix(np.radians(90), [1, 0, 0])
                mesh.apply_transform(rot)
            else:
                raise ValueError('The view is not supported.')
            trans = trimesh.transformations.translation_matrix(to_numpy(cam_t_all[mid][i]))
            mesh.apply_transform(trans)
            rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
            mesh.apply_transform(rot)
            mesh = pyrender.Mesh.from_trimesh(mesh, material=material)

            scene.add(mesh, f'mesh_{mid}')
        scene.add(camera, pose=cam_pose)

        # Light.
        for node in light_nodes:
            scene.add_node(node)

        # Render.
        result_rgba, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)

        valid_mask = result_rgba.astype(np.float32)[:, :, [-1]] / 255.0  # (H, W, 1)
        if view == 'front':
            bg = frames[i]  # (H, W, 3)
        else:
            bg = np.ones_like(frames[i]) * 255  # (H, W, 3)
        if ret_rgba:
            if view == 'front':
                bg_alpha = np.ones_like(bg[..., [0]]) * 255  # (H, W, 1)
            else:
                bg_alpha = np.zeros_like(bg[..., [0]]) * 255  # (H, W, 1)
            bg = np.concatenate([bg, bg_alpha], axis=-1)  # (H, W, 4)
            final = result_rgba * valid_mask + bg * (1 - valid_mask)  # (H, W, 4)
        else:
            final = result_rgba[:, :, :3] * valid_mask + bg * (1 - valid_mask)
        final = final.astype(np.uint8)  # (H, W, 3)
        results.append(final)
    results = np.stack(results, axis=0)  # (L, H, W, 3)

    renderer.delete()
    return results