Spaces:

IsshikiHugh
/

HSMR

Runtime error

App Files Files Community

HSMR / lib /utils /vis /py_renderer /__init__.py

IsshikiHugh

feat: CPU demo

5ac1897 8 months ago

raw

history blame contribute delete

13.1 kB

	import os
	if 'PYOPENGL_PLATFORM' not in os.environ:
	os.environ['PYOPENGL_PLATFORM'] = 'egl'
	import torch
	import numpy as np
	import trimesh
	import pyrender

	from typing import List, Optional, Union, Tuple
	from pathlib import Path

	from lib.utils.vis import ColorPalette
	from lib.utils.data import to_numpy
	from lib.utils.media import save_img

	from .utils import *


	def render_mesh_overlay_img(
	faces : Union[torch.Tensor, np.ndarray],
	verts : torch.Tensor,
	K4 : List,
	img : np.ndarray,
	output_fn : Optional[Union[str, Path]] = None,
	device : str = 'cuda',
	resize : float = 1.0,
	Rt : Optional[Tuple[torch.Tensor]] = None,
	mesh_color : Optional[Union[List[float], str]] = 'green',
	):
	'''
	Render the mesh overlay on the input video frames.

	### Args
	- faces: Union[torch.Tensor, np.ndarray], (V, 3)
	- verts: torch.Tensor, (V, 3)
	- K4: List
	- [fx, fy, cx, cy], the components of intrinsic camera matrix.
	- img: np.ndarray, (H, W, 3)
	- output_fn: Union[str, Path] or None
	- The output file path, if None, return the rendered img.
	- fps: int, default 30
	- device: str, default 'cuda'
	- resize: float, default 1.0
	- The resize factor of the output video.
	- Rt: Tuple of Tensor, default None
	- The extrinsic camera matrix, in the form of (R, t).
	'''
	frame = render_mesh_overlay_video(
	faces = faces,
	verts = verts[None],
	K4 = K4,
	frames = img[None],
	device = device,
	resize = resize,
	Rt = Rt,
	mesh_color = mesh_color,
	)[0]

	if output_fn is None:
	return frame
	else:
	save_img(frame, output_fn)


	def render_mesh_overlay_video(
	faces : Union[torch.Tensor, np.ndarray],
	verts : Union[torch.Tensor, np.ndarray],
	K4 : List,
	frames : np.ndarray,
	output_fn : Optional[Union[str, Path]] = None,
	fps : int = 30,
	device : str = 'cuda',
	resize : float = 1.0,
	Rt : Tuple = None,
	mesh_color : Optional[Union[List[float], str]] = 'green',
	):
	'''
	Render the mesh overlay on the input video frames.

	### Args
	- faces: Union[torch.Tensor, np.ndarray], (V, 3)
	- verts: Union[torch.Tensor, np.ndarray], (L, V, 3)
	- K4: List
	- [fx, fy, cx, cy], the components of intrinsic camera matrix.
	- frames: np.ndarray, (L, H, W, 3)
	- output_fn: useless, only for compatibility.
	- fps: useless, only for compatibility.
	- device: useless, only for compatibility.
	- resize: useless, only for compatibility.
	- Rt: Tuple, default None
	- The extrinsic camera matrix, in the form of (R, t).
	'''
	faces, verts = to_numpy(faces), to_numpy(verts)
	assert len(K4) == 4, 'K4 must be a list of 4 elements.'
	assert frames.shape[0] == verts.shape[0], 'The length of frames and verts must be the same.'
	assert frames.shape[-1] == 3, 'The last dimension of frames must be 3.'
	if isinstance(mesh_color, str):
	mesh_color = ColorPalette.presets_float[mesh_color]

	# Prepare the data.
	L = len(frames)
	frame_w, frame_h = frames.shape[-2], frames.shape[-3]

	renderer = pyrender.OffscreenRenderer(
	viewport_width = frame_w,
	viewport_height = frame_h,
	point_size = 1.0
	)

	# Camera
	camera, cam_pose = create_camera(K4, Rt)

	# Scene.
	material = pyrender.MetallicRoughnessMaterial(
	metallicFactor = 0.0,
	alphaMode = 'OPAQUE',
	baseColorFactor = (*mesh_color, 1.0)
	)

	# Light.
	light_nodes = create_raymond_lights()

	results = []
	for i in range(L):
	mesh = trimesh.Trimesh(verts[i].copy(), faces.copy())
	# if side_view:
	# rot = trimesh.transformations.rotation_matrix(
	# np.radians(rot_angle), [0, 1, 0])
	# mesh.apply_transform(rot)
	# elif top_view:
	# rot = trimesh.transformations.rotation_matrix(
	# np.radians(rot_angle), [1, 0, 0])
	# mesh.apply_transform(rot)
	rot = trimesh.transformations.rotation_matrix(
	np.radians(180), [1, 0, 0])
	mesh.apply_transform(rot)
	mesh = pyrender.Mesh.from_trimesh(mesh, material=material)

	scene = pyrender.Scene(
	bg_color = [0.0, 0.0, 0.0, 0.0],
	ambient_light = (0.3, 0.3, 0.3),
	)

	scene.add(mesh, 'mesh')
	scene.add(camera, pose=cam_pose)

	# Light.
	for node in light_nodes:
	scene.add_node(node)

	# Render.
	result_rgba, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)

	valid_mask = result_rgba.astype(np.float32)[:, :, [-1]] / 255.0 # (H, W, 1)
	bg = frames[i] # (H, W, 3)
	final = result_rgba[:, :, :3] * valid_mask + bg * (1 - valid_mask)
	final = final.astype(np.uint8) # (H, W, 3)
	results.append(final)
	results = np.stack(results, axis=0) # (L, H, W, 3)

	renderer.delete()
	return results



	def render_meshes_overlay_img(
	faces_all : Union[torch.Tensor, np.ndarray],
	verts_all : Union[torch.Tensor, np.ndarray],
	cam_t_all : Union[torch.Tensor, np.ndarray],
	K4 : List,
	img : np.ndarray,
	output_fn : Optional[Union[str, Path]] = None,
	device : str = 'cuda',
	resize : float = 1.0,
	Rt : Optional[Tuple[torch.Tensor]] = None,
	mesh_color : Optional[Union[List[float], str]] = 'green',
	view : str = 'front',
	ret_rgba : bool = False,
	):
	'''
	Render the mesh overlay on the input video frames.

	### Args
	- faces_all: Union[torch.Tensor, np.ndarray], ((Nm,) V, 3)
	- verts_all: Union[torch.Tensor, np.ndarray], ((Nm,) V, 3)
	- cam_t_all: Union[torch.Tensor, np.ndarray], ((Nm,) 3)
	- K4: List
	- [fx, fy, cx, cy], the components of intrinsic camera matrix.
	- img: np.ndarray, (H, W, 3)
	- output_fn: Union[str, Path] or None
	- The output file path, if None, return the rendered img.
	- fps: int, default 30
	- device: str, default 'cuda'
	- resize: float, default 1.0
	- The resize factor of the output video.
	- Rt: Tuple of Tensor, default None
	- The extrinsic camera matrix, in the form of (R, t).
	- view: str, default 'front', {'front', 'side90d', 'side60d', 'top90d'}
	- ret_rgba: bool, default False
	- If True, return rgba images, otherwise return rgb images.
	- For view is not 'front', the background will become transparent.
	'''
	if len(verts_all.shape) == 2:
	verts_all = verts_all[None] # (1, V, 3)
	elif len(verts_all.shape) == 3:
	verts_all = verts_all[:, None] # ((Nm,) 1, V, 3)
	else:
	raise ValueError('The shape of verts_all is not correct.')
	if len(cam_t_all.shape) == 1:
	cam_t_all = cam_t_all[None] # (1, 3)
	elif len(cam_t_all.shape) == 2:
	cam_t_all = cam_t_all[:, None] # ((Nm,) 1, 3)
	else:
	raise ValueError('The shape of verts_all is not correct.')
	frame = render_meshes_overlay_video(
	faces_all = faces_all,
	verts_all = verts_all,
	cam_t_all = cam_t_all,
	K4 = K4,
	frames = img[None],
	device = device,
	resize = resize,
	Rt = Rt,
	mesh_color = mesh_color,
	view = view,
	ret_rgba = ret_rgba,
	)[0]

	if output_fn is None:
	return frame
	else:
	save_img(frame, output_fn)


	def render_meshes_overlay_video(
	faces_all : Union[torch.Tensor, np.ndarray],
	verts_all : Union[torch.Tensor, np.ndarray],
	cam_t_all : Union[torch.Tensor, np.ndarray],
	K4 : List,
	frames : np.ndarray,
	output_fn : Optional[Union[str, Path]] = None,
	fps : int = 30,
	device : str = 'cuda',
	resize : float = 1.0,
	Rt : Tuple = None,
	mesh_color : Optional[Union[List[float], str]] = 'green',
	view : str = 'front',
	ret_rgba : bool = False,
	):
	'''
	Render the mesh overlay on the input video frames.

	### Args
	- faces_all: Union[torch.Tensor, np.ndarray], ((Nm,) V, 3)
	- verts_all: Union[torch.Tensor, np.ndarray], ((Nm,) L, V, 3)
	- cam_t_all: Union[torch.Tensor, np.ndarray], ((Nm,) L, 3)
	- K4: List
	- [fx, fy, cx, cy], the components of intrinsic camera matrix.
	- frames: np.ndarray, (L, H, W, 3)
	- output_fn: useless, only for compatibility.
	- fps: useless, only for compatibility.
	- device: useless, only for compatibility.
	- resize: useless, only for compatibility.
	- Rt: Tuple, default None
	- The extrinsic camera matrix, in the form of (R, t).
	- view: str, default 'front', {'front', 'side90d', 'side60d', 'top90d'}
	- ret_rgba: bool, default False
	- If True, return rgba images, otherwise return rgb images.
	- For view is not 'front', the background will become transparent.
	'''
	faces_all, verts_all = to_numpy(faces_all), to_numpy(verts_all)
	if len(verts_all.shape) == 3:
	verts_all = verts_all[None] # (1, L, V, 3)
	if len(cam_t_all.shape) == 2:
	cam_t_all = cam_t_all[None] # (1, L, 3)
	Nm, L, _, _ = verts_all.shape
	if len(faces_all.shape) == 2:
	faces_all = faces_all[None].repeat(Nm, axis=0) # (Nm, V, 3)

	assert len(K4) == 4, 'K4 must be a list of 4 elements.'
	assert frames.shape[0] == L, 'The length of frames and verts must be the same.'
	assert frames.shape[-1] == 3, 'The last dimension of frames must be 3.'
	assert len(verts_all.shape) == 4, 'The shape of verts_all must be (Nm, L, V, 3).'
	assert len(faces_all.shape) == 3, 'The shape of faces_all must be (Nm, V, 3).'
	if isinstance(mesh_color, str):
	mesh_color = ColorPalette.presets_float[mesh_color]

	# Prepare the data.
	frame_w, frame_h = frames.shape[-2], frames.shape[-3]

	renderer = pyrender.OffscreenRenderer(
	viewport_width = frame_w,
	viewport_height = frame_h,
	point_size = 1.0
	)

	# Camera
	camera, cam_pose = create_camera(K4, Rt)

	# Scene.
	material = pyrender.MetallicRoughnessMaterial(
	metallicFactor = 0.0,
	alphaMode = 'OPAQUE',
	baseColorFactor = (*mesh_color, 1.0)
	)

	# Light.
	light_nodes = create_raymond_lights()

	results = []
	for i in range(L):
	scene = pyrender.Scene(
	bg_color = [0.0, 0.0, 0.0, 0.0],
	ambient_light = (0.3, 0.3, 0.3),
	)

	for mid in range(Nm):
	mesh = trimesh.Trimesh(verts_all[mid][i].copy(), faces_all[mid].copy())
	if view == 'front':
	pass
	elif view == 'side90d':
	rot = trimesh.transformations.rotation_matrix(np.radians(-90), [0, 1, 0])
	mesh.apply_transform(rot)
	elif view == 'side60d':
	rot = trimesh.transformations.rotation_matrix(np.radians(-60), [0, 1, 0])
	mesh.apply_transform(rot)
	elif view == 'top90d':
	rot = trimesh.transformations.rotation_matrix(np.radians(90), [1, 0, 0])
	mesh.apply_transform(rot)
	else:
	raise ValueError('The view is not supported.')
	trans = trimesh.transformations.translation_matrix(to_numpy(cam_t_all[mid][i]))
	mesh.apply_transform(trans)
	rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
	mesh.apply_transform(rot)
	mesh = pyrender.Mesh.from_trimesh(mesh, material=material)

	scene.add(mesh, f'mesh_{mid}')
	scene.add(camera, pose=cam_pose)

	# Light.
	for node in light_nodes:
	scene.add_node(node)

	# Render.
	result_rgba, rend_depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)

	valid_mask = result_rgba.astype(np.float32)[:, :, [-1]] / 255.0 # (H, W, 1)
	if view == 'front':
	bg = frames[i] # (H, W, 3)
	else:
	bg = np.ones_like(frames[i]) * 255 # (H, W, 3)
	if ret_rgba:
	if view == 'front':
	bg_alpha = np.ones_like(bg[..., [0]]) * 255 # (H, W, 1)
	else:
	bg_alpha = np.zeros_like(bg[..., [0]]) * 255 # (H, W, 1)
	bg = np.concatenate([bg, bg_alpha], axis=-1) # (H, W, 4)
	final = result_rgba * valid_mask + bg * (1 - valid_mask) # (H, W, 4)
	else:
	final = result_rgba[:, :, :3] * valid_mask + bg * (1 - valid_mask)
	final = final.astype(np.uint8) # (H, W, 3)
	results.append(final)
	results = np.stack(results, axis=0) # (L, H, W, 3)

	renderer.delete()
	return results