import argparse import logging import os import time import numpy as np import rembg import torch from PIL import Image from tsr.system import TSR from tsr.utils import remove_background, resize_foreground, save_video class Timer: def __init__(self): self.items = {} self.time_scale = 1000.0 # ms self.time_unit = "ms" def start(self, name: str) -> None: if torch.cuda.is_available(): torch.cuda.synchronize() self.items[name] = time.time() logging.info(f"{name} ...") def end(self, name: str) -> float: if name not in self.items: return if torch.cuda.is_available(): torch.cuda.synchronize() start_time = self.items.pop(name) delta = time.time() - start_time t = delta * self.time_scale logging.info(f"{name} finished in {t:.2f}{self.time_unit}.") timer = Timer() logging.basicConfig( format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO ) parser = argparse.ArgumentParser() parser.add_argument("image", type=str, nargs="+", help="Path to input image(s).") parser.add_argument( "--device", default="cuda:0", type=str, help="Device to use. If no CUDA-compatible device is found, will fallback to 'cpu'. Default: 'cuda:0'", ) parser.add_argument( "--pretrained-model-name-or-path", default="stabilityai/TripoSR", type=str, help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/TripoSR'", ) parser.add_argument( "--chunk-size", default=8192, type=int, help="Evaluation chunk size for surface extraction and rendering. Smaller chunk size reduces VRAM usage but increases computation time. 0 for no chunking. Default: 8192", ) parser.add_argument( "--mc-resolution", default=256, type=int, help="Marching cubes grid resolution. Default: 256" ) parser.add_argument( "--no-remove-bg", action="store_true", help="If specified, the background will NOT be automatically removed from the input image, and the input image should be an RGB image with gray background and properly-sized foreground. Default: false", ) parser.add_argument( "--foreground-ratio", default=0.85, type=float, help="Ratio of the foreground size to the image size. Only used when --no-remove-bg is not specified. Default: 0.85", ) parser.add_argument( "--output-dir", default="output/", type=str, help="Output directory to save the results. Default: 'output/'", ) parser.add_argument( "--model-save-format", default="obj", type=str, choices=["obj", "glb"], help="Format to save the extracted mesh. Default: 'obj'", ) parser.add_argument( "--render", action="store_true", help="If specified, save a NeRF-rendered video. Default: false", ) args = parser.parse_args() output_dir = args.output_dir os.makedirs(output_dir, exist_ok=True) device = args.device if not torch.cuda.is_available(): device = "cpu" timer.start("Initializing model") model = TSR.from_pretrained( args.pretrained_model_name_or_path, config_name="config.yaml", weight_name="model.ckpt", ) model.renderer.set_chunk_size(args.chunk_size) model.to(device) timer.end("Initializing model") timer.start("Processing images") images = [] if args.no_remove_bg: rembg_session = None else: rembg_session = rembg.new_session() for i, image_path in enumerate(args.image): if args.no_remove_bg: image = np.array(Image.open(image_path).convert("RGB")) else: image = remove_background(Image.open(image_path), rembg_session) image = resize_foreground(image, args.foreground_ratio) image = np.array(image).astype(np.float32) / 255.0 image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5 image = Image.fromarray((image * 255.0).astype(np.uint8)) if not os.path.exists(os.path.join(output_dir, str(i))): os.makedirs(os.path.join(output_dir, str(i))) image.save(os.path.join(output_dir, str(i), f"input.png")) images.append(image) timer.end("Processing images") for i, image in enumerate(images): logging.info(f"Running image {i + 1}/{len(images)} ...") timer.start("Running model") with torch.no_grad(): scene_codes = model([image], device=device) timer.end("Running model") if args.render: timer.start("Rendering") render_images = model.render(scene_codes, n_views=30, return_type="pil") for ri, render_image in enumerate(render_images[0]): render_image.save(os.path.join(output_dir, str(i), f"render_{ri:03d}.png")) save_video( render_images[0], os.path.join(output_dir, str(i), f"render.mp4"), fps=30 ) timer.end("Rendering") timer.start("Exporting mesh") meshes = model.extract_mesh(scene_codes, resolution=args.mc_resolution) meshes[0].export(os.path.join(output_dir, str(i), f"mesh.{args.model_save_format}")) timer.end("Exporting mesh")