Spaces:

luoxue-star
/

AniMer

Running

App Files Files Community

AniMer / app.py

luoxue-star

Fix Bug

935680d 3 months ago

raw

history blame contribute delete

5.38 kB

	from pathlib import Path
	import torch
	import os
	import cv2
	import numpy as np
	import tempfile
	from tqdm import tqdm
	import torch.utils
	import trimesh
	import torch.utils.data
	import gradio as gr
	from typing import Union, List, Tuple, Dict
	from amr.models import AMR
	from amr.configs import get_config
	from amr.utils import recursive_to
	from amr.datasets.vitdet_dataset import ViTDetDataset, DEFAULT_MEAN, DEFAULT_STD
	from amr.utils.renderer import Renderer, cam_crop_to_full
	from huggingface_hub import snapshot_download

	LIGHT_BLUE = (0.65098039, 0.74117647, 0.85882353)

	# Load model config
	path_model_cfg = 'config/config.yaml'
	model_cfg = get_config(path_model_cfg)

	# Load model
	repo_id = "luoxue-star/AniMer"
	local_dir = snapshot_download(repo_id=repo_id)
	# local_dir = "./checkpoints"
	PATH_CHECKPOINT = os.path.join(local_dir, "checkpoint.ckpt")
	model = AMR.load_from_checkpoint(checkpoint_path=PATH_CHECKPOINT, map_location="cpu",
	cfg=model_cfg, strict=False, weights_only=True)
	device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
	model = model.to(device)
	model.eval()

	# Setup the renderer
	renderer = Renderer(model_cfg, faces=model.smal.faces)

	# Make output directory if it does not exist
	OUTPUT_FOLDER = "demo_out"
	os.makedirs(OUTPUT_FOLDER, exist_ok=True)


	def predict(im):
	return im["composite"]


	def inference(img: Dict)-> Tuple[Union[np.ndarray\|None], List[str]]:
	img = np.array(img["composite"])[:, :, :-1]
	boxes = np.array([[0, 0, img.shape[1], img.shape[0]]]) # x1, y1, x2, y2

	# Run AniMer on the crop image
	dataset = ViTDetDataset(model_cfg, img, boxes)
	dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)
	all_verts = []
	all_cam_t = []
	temp_name = next(tempfile._get_candidate_names())
	for batch in tqdm(dataloader):
	batch = recursive_to(batch, device)
	with torch.no_grad():
	out = model(batch)

	pred_cam = out['pred_cam']
	box_center = batch["box_center"].float()
	box_size = batch["box_size"].float()
	img_size = batch["img_size"].float()
	scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
	pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size,
	scaled_focal_length).detach().cpu().numpy()

	# Render the result
	batch_size = batch['img'].shape[0]
	for n in range(batch_size):
	person_id = int(batch['personid'][n])
	input_patch = (batch['img'][n].cpu() * 255 * (DEFAULT_STD[:, None, None]) + (
	DEFAULT_MEAN[:, None, None])) / 255.
	input_patch = input_patch.permute(1, 2, 0).numpy()

	verts = out['pred_vertices'][n].detach().cpu().numpy()
	cam_t = pred_cam_t_full[n]
	all_verts.append(verts)
	all_cam_t.append(cam_t)

	# Render mesh onto the original image
	if len(all_verts):
	misc_args = dict(
	mesh_base_color=LIGHT_BLUE,
	scene_bg_color=(1, 1, 1),
	focal_length=scaled_focal_length,
	)

	cam_view = renderer.render_rgba_multiple(all_verts, cam_t=all_cam_t, render_res=img_size[n], **misc_args)
	# Overlay image
	input_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR).astype(np.float32)[:, :, ::-1] / 255.0
	input_img = np.concatenate([input_img, np.ones_like(input_img[:, :, :1])], axis=2) # Add alpha channel
	input_img_overlay = input_img[:, :, :3] * (1 - cam_view[:, :, 3:]) + cam_view[:, :, :3] * cam_view[:, :, 3:]
	output_img = (255 * input_img_overlay[:, :, ::-1]).astype(np.uint8)[:, :, [2, 1, 0]]

	# Return mesh path
	trimeshes = [renderer.vertices_to_trimesh(vvv, ttt.copy(), LIGHT_BLUE) for vvv,ttt in zip(all_verts, all_cam_t)]
	# Join meshes
	mesh = trimesh.util.concatenate(trimeshes)
	# Save mesh to file
	mesh_name = os.path.join(OUTPUT_FOLDER, next(tempfile._get_candidate_names()) + '.obj')
	trimesh.exchange.export.export_mesh(mesh, mesh_name)

	return (output_img, mesh_name)
	else:
	return (None, [])


	demo = gr.Interface(
	fn=inference,
	analytics_enabled=False,
	inputs=gr.ImageEditor(label="Input image", sources=["upload", "clipboard"], type='pil',
	brush=False, eraser=False, layers=False, transforms="crop",
	interactive=True),
	outputs=[
	gr.Image(label="Overlap image"),
	gr.Model3D(display_mode="wireframe", label="3D Mesh"),
	],
	title="AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer",
	description="""
	Project page: https://luoxue-star.github.io/AniMer_project_page/
	## Steps for Use
	1. Input: Select an example image or upload your own image.
	2. Crop: Crop the animal in the image (Otherwise, the result may be poor.)
	3. Output:
	- Overlapping Image
	- 3D Mesh
	""",
	examples=[
	'example_data/000000015956_horse.png',
	'example_data/n02101388_1188.png',
	'example_data/n02412080_12159.png',
	'example_data/000000101684_zebra.png',
	],
	)

	demo.launch()