TripoSR

Runtime error

App Files Files Community

michaelj commited on Mar 29

Commit

a12b8d1

•

1 Parent(s): 23f5383

Upload folder using huggingface_hub

Browse files

Files changed (35) hide show

.gitattributes +5 -0
.gitignore +164 -0
LICENSE +21 -0
README.md +2 -6
app.py +65 -39
examples/captured.jpeg +3 -0
examples/chair.png +0 -0
examples/flamingo.png +0 -0
examples/hamburger.png +0 -0
examples/horse.png +0 -0
examples/iso_house.png +3 -0
examples/marble.png +0 -0
examples/police_woman.png +0 -0
examples/poly_fox.png +0 -0
examples/robot.png +0 -0
examples/stripes.png +0 -0
examples/teapot.png +0 -0
examples/tiger_girl.png +0 -0
examples/unicorn.png +0 -0
figures/comparison800.gif +3 -0
figures/scatter-comparison.png +0 -0
figures/teaser800.gif +3 -0
figures/visual_comparisons.jpg +3 -0
requirements.txt +2 -2
run.py +162 -0
tsr/models/isosurface.py +8 -4
tsr/models/nerf_renderer.py +62 -55
tsr/models/network_utils.py +1 -1
tsr/models/tokenizers/image.py +0 -1
tsr/models/transformer/attention.py +25 -0
tsr/models/transformer/basic_transformer_block.py +31 -11
tsr/models/transformer/transformer_1d.py +41 -38
tsr/system.py +10 -27
tsr/utils.py +0 -8
upload.py +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/captured.jpeg filter=lfs diff=lfs merge=lfs -text
+examples/iso_house.png filter=lfs diff=lfs merge=lfs -text
+figures/comparison800.gif filter=lfs diff=lfs merge=lfs -text
+figures/teaser800.gif filter=lfs diff=lfs merge=lfs -text
+figures/visual_comparisons.jpg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,164 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# default output directory
+output/
+outputs/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Tripo AI & Stability AI
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,9 @@
----
 title: TripoSR
 emoji: 🐳
 colorFrom: gray
 colorTo: red
 sdk: docker
-# sdk_version: 4.19.2
 app_file: app.py
 pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 title: TripoSR
 emoji: 🐳
 colorFrom: gray
 colorTo: red
 sdk: docker
 app_file: app.py
 pinned: false
+license: mit

app.py CHANGED Viewed

@@ -13,14 +13,7 @@ from functools import partial
 from tsr.system import TSR
 from tsr.utils import remove_background, resize_foreground, to_gradio_3d_orientation
-#HF_TOKEN = os.getenv("HF_TOKEN")
-HEADER = """
-**TripoSR** is a state-of-the-art open-source model for **fast** feedforward 3D reconstruction from a single image, developed in collaboration between [Tripo AI](https://www.tripo3d.ai/) and [Stability AI](https://stability.ai/).
-**Tips:**
-1. If you find the result is unsatisfied, please try to change the foreground ratio. It might improve the results.
-2. Please disable "Remove Background" option only if your input image is RGBA with transparent background, image contents are centered and occupy more than 70% of image width or height.
-"""
 if torch.cuda.is_available():
@@ -28,17 +21,14 @@ if torch.cuda.is_available():
 else:
     device = "cpu"
-d = os.environ.get("DEVICE", None)
-if d != None:
-    device = d
 model = TSR.from_pretrained(
     "stabilityai/TripoSR",
     config_name="config.yaml",
     weight_name="model.ckpt",
-#    token=HF_TOKEN
 )
-model.renderer.set_chunk_size(131072)
 model.to(device)
 rembg_session = rembg.new_session()
@@ -68,23 +58,36 @@ def preprocess(input_image, do_remove_background, foreground_ratio):
     return image
-def generate(image):
     scene_codes = model(image, device=device)
-    mesh = model.extract_mesh(scene_codes, resolution=1024)[0]
     mesh = to_gradio_3d_orientation(mesh)
-    mesh_path = tempfile.NamedTemporaryFile(suffix=".obj", delete=False)
-    mesh_path2 = tempfile.NamedTemporaryFile(suffix=".glb", delete=False)
-    mesh.export(mesh_path.name)
-    mesh.export(mesh_path2.name)
-    return mesh_path.name, mesh_path2.name
 def run_example(image_pil):
     preprocessed = preprocess(image_pil, False, 0.9)
-    mesh_name, mesn_name2 = generate(preprocessed)
-    return preprocessed, mesh_name, mesh_name2
-with gr.Blocks() as demo:
-    gr.Markdown(HEADER)
     with gr.Row(variant="panel"):
         with gr.Column():
             with gr.Row():
@@ -108,30 +111,51 @@ with gr.Blocks() as demo:
                         value=0.85,
                         step=0.05,
                     )
             with gr.Row():
                 submit = gr.Button("Generate", elem_id="generate", variant="primary")
         with gr.Column():
-            with gr.Tab("obj"):
-                output_model = gr.Model3D(
-                    label="Output Model",
                     interactive=False,
                 )
-            with gr.Tab("glb"):
-                output_model2 = gr.Model3D(
-                    label="Output Model",
                     interactive=False,
                 )
     with gr.Row(variant="panel"):
         gr.Examples(
             examples=[
-                os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
             ],
             inputs=[input_image],
-            outputs=[processed_image, output_model, output_model2],
-            #cache_examples=True,
             fn=partial(run_example),
             label="Examples",
-            examples_per_page=20
         )
     submit.click(fn=check_input_image, inputs=[input_image]).success(
         fn=preprocess,
@@ -139,9 +163,11 @@ with gr.Blocks() as demo:
         outputs=[processed_image],
     ).success(
         fn=generate,
-        inputs=[processed_image],
-        outputs=[output_model, output_model2],
     )
 demo.queue(max_size=10)
-demo.launch()

 from tsr.system import TSR
 from tsr.utils import remove_background, resize_foreground, to_gradio_3d_orientation
+import argparse
 if torch.cuda.is_available():
 else:
     device = "cpu"
 model = TSR.from_pretrained(
     "stabilityai/TripoSR",
     config_name="config.yaml",
     weight_name="model.ckpt",
 )
+# adjust the chunk size to balance between speed and memory usage
+model.renderer.set_chunk_size(8192)
 model.to(device)
 rembg_session = rembg.new_session()
     return image
+def generate(image, mc_resolution, formats=["obj", "glb"]):
     scene_codes = model(image, device=device)
+    mesh = model.extract_mesh(scene_codes, resolution=mc_resolution)[0]
     mesh = to_gradio_3d_orientation(mesh)
+    rv = []
+    for format in formats:
+        mesh_path = tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False)
+        mesh.export(mesh_path.name)
+        rv.append(mesh_path.name)
+    return rv
 def run_example(image_pil):
     preprocessed = preprocess(image_pil, False, 0.9)
+    mesh_name_obj, mesh_name_glb = generate(preprocessed, 256, ["obj", "glb"])
+    return preprocessed, mesh_name_obj, mesh_name_glb
+with gr.Blocks(title="TripoSR") as Demo:
+    gr.Markdown(
+        """
+    # TripoSR Demo
+    [TripoSR](https://github.com/VAST-AI-Research/TripoSR) is a state-of-the-art open-source model for **fast** feedforward 3D reconstruction from a single image, collaboratively developed by [Tripo AI](https://www.tripo3d.ai/) and [Stability AI](https://stability.ai/).
+    **Tips:**
+    1. If you find the result is unsatisfied, please try to change the foreground ratio. It might improve the results.
+    2. It's better to disable "Remove Background" for the provided examples (except fot the last one) since they have been already preprocessed.
+    3. Otherwise, please disable "Remove Background" option only if your input image is RGBA with transparent background, image contents are centered and occupy more than 70% of image width or height.
+    """
+    )
     with gr.Row(variant="panel"):
         with gr.Column():
             with gr.Row():
                         value=0.85,
                         step=0.05,
                     )
+                    mc_resolution = gr.Slider(
+                        label="Marching Cubes Resolution",
+                        minimum=32,
+                        maximum=1024,
+                        value=256,
+                        step=32
+                    )
             with gr.Row():
                 submit = gr.Button("Generate", elem_id="generate", variant="primary")
         with gr.Column():
+            with gr.Tab("OBJ"):
+                output_model_obj = gr.Model3D(
+                    label="Output Model (OBJ Format)",
                     interactive=False,
                 )
+                gr.Markdown("Note: The model shown here is flipped. Download to get correct results.")
+            with gr.Tab("GLB"):
+                output_model_glb = gr.Model3D(
+                    label="Output Model (GLB Format)",
                     interactive=False,
                 )
+                gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
     with gr.Row(variant="panel"):
         gr.Examples(
             examples=[
+                "examples/hamburger.png",
+                "examples/poly_fox.png",
+                "examples/robot.png",
+                "examples/teapot.png",
+                "examples/tiger_girl.png",
+                "examples/horse.png",
+                "examples/flamingo.png",
+                "examples/unicorn.png",
+                "examples/chair.png",
+                "examples/iso_house.png",
+                "examples/marble.png",
+                "examples/police_woman.png",
+                "examples/captured.jpeg",
             ],
             inputs=[input_image],
+            outputs=[processed_image, output_model_obj, output_model_glb],
+            cache_examples=False,
             fn=partial(run_example),
             label="Examples",
+            examples_per_page=20,
         )
     submit.click(fn=check_input_image, inputs=[input_image]).success(
         fn=preprocess,
         outputs=[processed_image],
     ).success(
         fn=generate,
+        inputs=[processed_image, mc_resolution],
+        outputs=[output_model_obj, output_model_glb],
     )
 demo.queue(max_size=10)
+demo.launch()

examples/captured.jpeg ADDED Viewed

Git LFS Details

SHA256: c6eb2768703a0e3d6034daa7fd5e0b286450b1077a90f36da8110749bb1cb8a8
Pointer size: 132 Bytes
Size of remote file: 5.94 MB

examples/chair.png ADDED Viewed

examples/flamingo.png ADDED Viewed

examples/hamburger.png ADDED Viewed

examples/horse.png ADDED Viewed

examples/iso_house.png ADDED Viewed

Git LFS Details

SHA256: b6063cbbc55b9aa4a4785ddbfcd13ca86fb07eca5a4ea7f9dda5eebcf7c17765
Pointer size: 132 Bytes
Size of remote file: 1.26 MB

examples/marble.png ADDED Viewed

examples/police_woman.png ADDED Viewed

examples/poly_fox.png ADDED Viewed

examples/robot.png ADDED Viewed

examples/stripes.png ADDED Viewed

examples/teapot.png ADDED Viewed

examples/tiger_girl.png ADDED Viewed

examples/unicorn.png ADDED Viewed

figures/comparison800.gif ADDED Viewed

Git LFS Details

SHA256: 887e69297e4446f122801ff2cc39962eda0933906d7ed7be7abf659e721914be
Pointer size: 132 Bytes
Size of remote file: 8.87 MB

figures/scatter-comparison.png ADDED Viewed

figures/teaser800.gif ADDED Viewed

Git LFS Details

SHA256: 52ecc6ff24e008b0d28236425a1b59718931841f6fb9f5e6f8471829fc9bc292
Pointer size: 132 Bytes
Size of remote file: 3.84 MB

figures/visual_comparisons.jpg ADDED Viewed

Git LFS Details

SHA256: 019235d716d8832aaa659acd31cf17267af94df6b5a9beca2a7002b41d59c8db
Pointer size: 133 Bytes
Size of remote file: 10.3 MB

requirements.txt CHANGED Viewed

@@ -1,10 +1,10 @@
 omegaconf==2.3.0
 Pillow==10.1.0
 einops==0.7.0
-#git+https://github.com/tatsy/torchmcubes.git
-git+https://github.com/cocktailpeanut/torchmcubes.git
 transformers==4.35.0
 trimesh==4.0.5
 rembg
 huggingface-hub
 gradio

 omegaconf==2.3.0
 Pillow==10.1.0
 einops==0.7.0
+git+https://github.com/tatsy/torchmcubes.git
 transformers==4.35.0
 trimesh==4.0.5
 rembg
 huggingface-hub
+imageio[ffmpeg]
 gradio

run.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import argparse
+import logging
+import os
+import time
+import numpy as np
+import rembg
+import torch
+from PIL import Image
+from tsr.system import TSR
+from tsr.utils import remove_background, resize_foreground, save_video
+class Timer:
+    def __init__(self):
+        self.items = {}
+        self.time_scale = 1000.0  # ms
+        self.time_unit = "ms"
+    def start(self, name: str) -> None:
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+        self.items[name] = time.time()
+        logging.info(f"{name} ...")
+    def end(self, name: str) -> float:
+        if name not in self.items:
+            return
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+        start_time = self.items.pop(name)
+        delta = time.time() - start_time
+        t = delta * self.time_scale
+        logging.info(f"{name} finished in {t:.2f}{self.time_unit}.")
+timer = Timer()
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
+)
+parser = argparse.ArgumentParser()
+parser.add_argument("image", type=str, nargs="+", help="Path to input image(s).")
+parser.add_argument(
+    "--device",
+    default="cuda:0",
+    type=str,
+    help="Device to use. If no CUDA-compatible device is found, will fallback to 'cpu'. Default: 'cuda:0'",
+)
+parser.add_argument(
+    "--pretrained-model-name-or-path",
+    default="stabilityai/TripoSR",
+    type=str,
+    help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/TripoSR'",
+)
+parser.add_argument(
+    "--chunk-size",
+    default=8192,
+    type=int,
+    help="Evaluation chunk size for surface extraction and rendering. Smaller chunk size reduces VRAM usage but increases computation time. 0 for no chunking. Default: 8192",
+)
+parser.add_argument(
+    "--mc-resolution",
+    default=256,
+    type=int,
+    help="Marching cubes grid resolution. Default: 256"
+)
+parser.add_argument(
+    "--no-remove-bg",
+    action="store_true",
+    help="If specified, the background will NOT be automatically removed from the input image, and the input image should be an RGB image with gray background and properly-sized foreground. Default: false",
+)
+parser.add_argument(
+    "--foreground-ratio",
+    default=0.85,
+    type=float,
+    help="Ratio of the foreground size to the image size. Only used when --no-remove-bg is not specified. Default: 0.85",
+)
+parser.add_argument(
+    "--output-dir",
+    default="output/",
+    type=str,
+    help="Output directory to save the results. Default: 'output/'",
+)
+parser.add_argument(
+    "--model-save-format",
+    default="obj",
+    type=str,
+    choices=["obj", "glb"],
+    help="Format to save the extracted mesh. Default: 'obj'",
+)
+parser.add_argument(
+    "--render",
+    action="store_true",
+    help="If specified, save a NeRF-rendered video. Default: false",
+)
+args = parser.parse_args()
+output_dir = args.output_dir
+os.makedirs(output_dir, exist_ok=True)
+device = args.device
+if not torch.cuda.is_available():
+    device = "cpu"
+timer.start("Initializing model")
+model = TSR.from_pretrained(
+    args.pretrained_model_name_or_path,
+    config_name="config.yaml",
+    weight_name="model.ckpt",
+)
+model.renderer.set_chunk_size(args.chunk_size)
+model.to(device)
+timer.end("Initializing model")
+timer.start("Processing images")
+images = []
+if args.no_remove_bg:
+    rembg_session = None
+else:
+    rembg_session = rembg.new_session()
+for i, image_path in enumerate(args.image):
+    if args.no_remove_bg:
+        image = np.array(Image.open(image_path).convert("RGB"))
+    else:
+        image = remove_background(Image.open(image_path), rembg_session)
+        image = resize_foreground(image, args.foreground_ratio)
+        image = np.array(image).astype(np.float32) / 255.0
+        image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
+        image = Image.fromarray((image * 255.0).astype(np.uint8))
+        if not os.path.exists(os.path.join(output_dir, str(i))):
+            os.makedirs(os.path.join(output_dir, str(i)))
+        image.save(os.path.join(output_dir, str(i), f"input.png"))
+    images.append(image)
+timer.end("Processing images")
+for i, image in enumerate(images):
+    logging.info(f"Running image {i + 1}/{len(images)} ...")
+    timer.start("Running model")
+    with torch.no_grad():
+        scene_codes = model([image], device=device)
+    timer.end("Running model")
+    if args.render:
+        timer.start("Rendering")
+        render_images = model.render(scene_codes, n_views=30, return_type="pil")
+        for ri, render_image in enumerate(render_images[0]):
+            render_image.save(os.path.join(output_dir, str(i), f"render_{ri:03d}.png"))
+        save_video(
+            render_images[0], os.path.join(output_dir, str(i), f"render.mp4"), fps=30
+        )
+        timer.end("Rendering")
+    timer.start("Exporting mesh")
+    meshes = model.extract_mesh(scene_codes, resolution=args.mc_resolution)
+    meshes[0].export(os.path.join(output_dir, str(i), f"mesh.{args.model_save_format}"))
+    timer.end("Exporting mesh")

tsr/models/isosurface.py CHANGED Viewed

@@ -7,7 +7,7 @@ from torchmcubes import marching_cubes
 class IsosurfaceHelper(nn.Module):
-    points_range: Tuple[float, float] = (-1, 1)
     @property
     def grid_vertices(self) -> torch.FloatTensor:
@@ -41,8 +41,12 @@ class MarchingCubeHelper(IsosurfaceHelper):
         self,
         level: torch.FloatTensor,
     ) -> Tuple[torch.FloatTensor, torch.LongTensor]:
-        level = level.view(self.resolution, self.resolution, self.resolution)
-        v_pos, t_pos_idx = self.mc_func(level.detach(), 0.0)
         v_pos = v_pos[..., [2, 1, 0]]
-        v_pos = v_pos * 2.0 / (self.resolution - 1.0) - 1.0
         return v_pos.to(level.device), t_pos_idx.to(level.device)

 class IsosurfaceHelper(nn.Module):
+    points_range: Tuple[float, float] = (0, 1)
     @property
     def grid_vertices(self) -> torch.FloatTensor:
         self,
         level: torch.FloatTensor,
     ) -> Tuple[torch.FloatTensor, torch.LongTensor]:
+        level = -level.view(self.resolution, self.resolution, self.resolution)
+        try:
+            v_pos, t_pos_idx = self.mc_func(level.detach(), 0.0)
+        except AttributeError:
+            print("torchmcubes was not compiled with CUDA support, use CPU version instead.")
+            v_pos, t_pos_idx = self.mc_func(level.detach().cpu(), 0.0)
         v_pos = v_pos[..., [2, 1, 0]]
+        v_pos = v_pos / (self.resolution - 1.0)
         return v_pos.to(level.device), t_pos_idx.to(level.device)

tsr/models/nerf_renderer.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from dataclasses import dataclass, field
 from typing import Dict, Optional
 import torch
 import torch.nn.functional as F
 from einops import rearrange, reduce
 from ..utils import (
     BaseModule,
@@ -37,73 +38,79 @@ class TriplaneNeRFRenderer(BaseModule):
             chunk_size >= 0
         ), "chunk_size must be a non-negative integer (0 for no chunking)."
         self.chunk_size = chunk_size
-    def make_step_grid(self,device, resolution: int, chunk_size: int = 32):
-        coords = torch.linspace(-1.0, 1.0, resolution, device = device)
-        x, y, z = torch.meshgrid(coords[0:chunk_size], coords, coords, indexing="ij")
-        x = x.reshape(-1, 1)
-        y = y.reshape(-1, 1)
-        z = z.reshape(-1, 1)
-        verts = torch.cat([x, y, z], dim = -1).view(-1, 3)
-        indices2D: torch.Tensor = torch.stack(
-            (verts[..., [0, 1]], verts[..., [0, 2]], verts[..., [1, 2]]),
-            dim=-3,
-        )
-        return indices2D
-    def query_triplane_volume_density(self, decoder: torch.nn.Module, triplane: torch.Tensor, resolution: int, sample_count: int = 1024 * 1024 * 4) -> torch.Tensor:
-        layer_count = sample_count // (resolution * resolution)
-        out_list = self.do_query_triplane_volume_density(decoder, triplane, resolution, layer_count)
-        return get_activation(self.cfg.density_activation)(
-            out_list.view([resolution * resolution * resolution, 1]) + self.cfg.density_bias
-        )
-    def do_query_triplane_volume_density(self, decoder: torch.nn.Module, triplane: torch.Tensor, resolution: int, layer_count: int) -> torch.Tensor:
-        step = 2.0 * layer_count / (resolution - 1)
-        indices2D = self.make_step_grid(triplane.device, resolution, layer_count)
-        out_list = torch.zeros([resolution, resolution * resolution, 1], device = triplane.device
-                               )
-        for i in range(0, resolution, layer_count):
-            if i + layer_count > resolution:
-                layer_count = resolution - i
-                indices2D = indices2D[..., :resolution * resolution * layer_count, :]
-            density_step = self.sample_step_triplane_volume_density(decoder, triplane, indices2D)
-            # todo directly march cube
-            out_list[i:i + layer_count] = density_step.view([layer_count, resolution * resolution, 1])
-            #out_list.append(net_out['density'])
-            indices2D.transpose(1, 2)[0, 0] += step
-            indices2D.transpose(1, 2)[1, 0] += step
-        return out_list
-    def sample_step_triplane_volume_density(self, decoder, triplane, indices2D):
-        out: torch.Tensor = F.grid_sample(
-            rearrange(triplane, "Np Cp Hp Wp -> Np Cp Hp Wp", Np=3),
-            rearrange(indices2D, "Np N Nd -> Np () N Nd", Np=3),
-            align_corners=False,
-            mode="bilinear",
-        )
-        if self.cfg.feature_reduction == "concat":
-            out = rearrange(out, "Np Cp () N -> N (Np Cp)", Np=3)
-        elif self.cfg.feature_reduction == "mean":
-            out = reduce(out, "Np Cp () N -> N Cp", Np=3, reduction="mean")
-        else:
-            raise NotImplementedError
-        net_out: Dict[str, torch.Tensor] = decoder(out)
-        return net_out['density']
     def query_triplane(
         self,
         decoder: torch.nn.Module,
         positions: torch.Tensor,
         triplane: torch.Tensor,
     ) -> Dict[str, torch.Tensor]:
         input_shape = positions.shape[:-1]
         positions = positions.view(-1, 3)
         # positions in (-radius, radius)
         # normalized to (-1, 1) for grid sample
-        #positions = scale_tensor(
-        #    positions, (-self.cfg.radius, self.cfg.radius), (-1, 1)
-        #)
         def _query_chunk(x):
             indices2D: torch.Tensor = torch.stack(

+from dataclasses import dataclass
 from typing import Dict, Optional
 import torch
 import torch.nn.functional as F
 from einops import rearrange, reduce
+from torchmcubes import marching_cubes
 from ..utils import (
     BaseModule,
             chunk_size >= 0
         ), "chunk_size must be a non-negative integer (0 for no chunking)."
         self.chunk_size = chunk_size
+    def interpolate_triplane(self, triplane: torch.Tensor, resolution: int):
+        coords = torch.linspace(-1.0, 1.0, resolution, device = triplane.device)
+        x, y = torch.meshgrid(coords, coords, indexing="ij")
+        verts2D = torch.cat([x.view(resolution, resolution,1), y.view(resolution, resolution,1)], dim = -1)
+        verts2D = verts2D.expand(3, -1, -1, -1)
+        return F.grid_sample(triplane, verts2D, align_corners=False,mode="bilinear") # [3 40 H W] xy xz yz
+    def block_based_marchingcube(self, decoder: torch.nn.Module, triplane: torch.Tensor, resolution: int, threshold, block_resolution = 128) -> torch.Tensor:
+        resolution += 1 # sample 1 more line of density, 1024 + 1 == 1025, 0 mapping to -1.0f, 512 mapping to 0.0f, 1025 mapping to 1.0f,  for better floating point precision.
+        block_size = 2.0 * block_resolution / (resolution - 1)
+        voxel_size = block_size / block_resolution
+        interpolated = self.interpolate_triplane(triplane, resolution)
+        pos_list = []
+        indices_list = []
+        for x in range(0, resolution - 1, block_resolution):
+            size_x = resolution - x if x + block_resolution >= resolution else block_resolution + 1 # sample 1 more line of density, so marching cubes resolution match block_resolution
+            for y in range(0, resolution - 1, block_resolution):
+                size_y = resolution - y if y + block_resolution >= resolution else block_resolution + 1
+                for z in range(0, resolution - 1, block_resolution):
+                    size_z = resolution - z if z + block_resolution >= resolution else block_resolution + 1
+                    xyplane = interpolated[0:1, :, x:x+size_x, y:y+size_y].expand(size_z, -1, -1, -1, -1).permute(3, 4, 0, 1, 2)
+                    xzplane = interpolated[1:2, :, x:x+size_x, z:z+size_z].expand(size_y, -1, -1, -1, -1).permute(3, 0, 4, 1, 2)
+                    yzplane = interpolated[2:3, :, y:y+size_y, z:z+size_z].expand(size_x, -1, -1, -1, -1).permute(0, 3, 4, 1, 2)
+                    sz = size_x * size_y * size_z
+                    out = torch.cat([xyplane, xzplane, yzplane], dim=3).view(sz, 3, -1)
+                    if self.cfg.feature_reduction == "concat":
+                        out = out.view(sz, -1)
+                    elif self.cfg.feature_reduction == "mean":
+                        out = reduce(out, "N Np Cp -> N Cp", Np=3, reduction="mean")
+                    else:
+                        raise NotImplementedError
+                    net_out = decoder(out)
+                    out = None # discard samples
+                    density = net_out["density"]
+                    net_out = None # discard colors
+                    density = get_activation(self.cfg.density_activation)(density + self.cfg.density_bias).view(size_x, size_y, size_z)
+                    try: # now do the marching cube
+                        v_pos, indices = marching_cubes(density.detach(), threshold)
+                    except AttributeError:
+                        print("torchmcubes was not compiled with CUDA support, use CPU version instead.")
+                        v_pos, indices = self.mc_func(density.detach().cpu(), 0.0)
+                    offset = torch.tensor([x * voxel_size - 1.0, y * voxel_size - 1.0, z * voxel_size - 1.0], device = triplane.device)
+                    v_pos = v_pos[..., [2, 1, 0]] * voxel_size + offset
+                    indices_list.append(indices)
+                    pos_list.append(v_pos)
+        vertex_count = 0
+        for i in range(0, len(pos_list)):
+            indices_list[i] += vertex_count
+            vertex_count += pos_list[i].size(0)
+        return torch.cat(pos_list), torch.cat(indices_list)
     def query_triplane(
         self,
         decoder: torch.nn.Module,
         positions: torch.Tensor,
         triplane: torch.Tensor,
+        scale_pos = True
     ) -> Dict[str, torch.Tensor]:
         input_shape = positions.shape[:-1]
         positions = positions.view(-1, 3)
         # positions in (-radius, radius)
         # normalized to (-1, 1) for grid sample
+        if scale_pos:
+            positions = scale_tensor(
+                positions, (-self.cfg.radius, self.cfg.radius), (-1, 1)
+            )
         def _query_chunk(x):
             indices2D: torch.Tensor = torch.stack(

tsr/models/network_utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from dataclasses import dataclass, field
 from typing import Optional
 import torch

+from dataclasses import dataclass
 from typing import Optional
 import torch

tsr/models/tokenizers/image.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-from typing import Optional
 import torch
 import torch.nn as nn

 from dataclasses import dataclass
 import torch
 import torch.nn as nn

tsr/models/transformer/attention.py CHANGED Viewed

@@ -11,6 +11,31 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import Optional
 import torch

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
+# --------
+#
+# Modified 2024 by the Tripo AI and Stability AI Team.
+#
+# Copyright (c) 2024 Tripo AI & Stability AI
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
 from typing import Optional
 import torch

tsr/models/transformer/basic_transformer_block.py CHANGED Viewed

@@ -11,8 +11,32 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Optional
 import torch
 import torch.nn.functional as F
@@ -32,8 +56,6 @@ class BasicTransformerBlock(nn.Module):
         dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
         cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
         activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
-        num_embeds_ada_norm (:
-            obj: `int`, *optional*): The number of diffusion steps used during training. See `Transformer2DModel`.
         attention_bias (:
             obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
         only_cross_attention (`bool`, *optional*):
@@ -48,8 +70,6 @@ class BasicTransformerBlock(nn.Module):
             The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`.
         final_dropout (`bool` *optional*, defaults to False):
             Whether to apply a final dropout after the last feed-forward layer.
-        attention_type (`str`, *optional*, defaults to `"default"`):
-            The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`.
     """
     def __init__(
@@ -95,9 +115,9 @@ class BasicTransformerBlock(nn.Module):
             self.attn2 = Attention(
                 query_dim=dim,
-                cross_attention_dim=cross_attention_dim
-                if not double_self_attention
-                else None,
                 heads=num_attention_heads,
                 dim_head=attention_head_dim,
                 dropout=dropout,
@@ -139,9 +159,9 @@ class BasicTransformerBlock(nn.Module):
         attn_output = self.attn1(
             norm_hidden_states,
-            encoder_hidden_states=encoder_hidden_states
-            if self.only_cross_attention
-            else None,
             attention_mask=attention_mask,
         )

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
+# --------
+#
+# Modified 2024 by the Tripo AI and Stability AI Team.
+#
+# Copyright (c) 2024 Tripo AI & Stability AI
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from typing import Optional
 import torch
 import torch.nn.functional as F
         dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
         cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
         activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
         attention_bias (:
             obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
         only_cross_attention (`bool`, *optional*):
             The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`.
         final_dropout (`bool` *optional*, defaults to False):
             Whether to apply a final dropout after the last feed-forward layer.
     """
     def __init__(
             self.attn2 = Attention(
                 query_dim=dim,
+                cross_attention_dim=(
+                    cross_attention_dim if not double_self_attention else None
+                ),
                 heads=num_attention_heads,
                 dim_head=attention_head_dim,
                 dropout=dropout,
         attn_output = self.attn1(
             norm_hidden_states,
+            encoder_hidden_states=(
+                encoder_hidden_states if self.only_cross_attention else None
+            ),
             attention_mask=attention_mask,
         )

tsr/models/transformer/transformer_1d.py CHANGED Viewed

@@ -1,5 +1,43 @@
-from dataclasses import dataclass, field
-from typing import Any, Dict, Optional
 import torch
 import torch.nn.functional as F
@@ -10,28 +48,6 @@ from .basic_transformer_block import BasicTransformerBlock
 class Transformer1D(BaseModule):
-    """
-    A 1D Transformer model for sequence data.
-    Parameters:
-        num_attention_heads (`int`, *optional*, defaults to 16): The number of heads to use for multi-head attention.
-        attention_head_dim (`int`, *optional*, defaults to 88): The number of channels in each head.
-        in_channels (`int`, *optional*):
-            The number of channels in the input and output (specify if the input is **continuous**).
-        num_layers (`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use.
-        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
-        cross_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use.
-        activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to use in feed-forward.
-        num_embeds_ada_norm ( `int`, *optional*):
-            The number of diffusion steps used during training. Pass if at least one of the norm_layers is
-            `AdaLayerNorm`. This is fixed during training since it is used to learn a number of embeddings that are
-            added to the hidden states.
-            During inference, you can denoise for up to but not more steps than `num_embeds_ada_norm`.
-        attention_bias (`bool`, *optional*):
-            Configure if the `TransformerBlocks` attention should contain a bias parameter.
-    """
     @dataclass
     class Config(BaseModule.Config):
         num_attention_heads: int = 16
@@ -119,15 +135,6 @@ class Transformer1D(BaseModule):
             encoder_hidden_states ( `torch.FloatTensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
                 Conditional embeddings for cross attention layer. If not given, cross-attention defaults to
                 self-attention.
-            timestep ( `torch.LongTensor`, *optional*):
-                Used to indicate denoising step. Optional timestep to be applied as an embedding in `AdaLayerNorm`.
-            class_labels ( `torch.LongTensor` of shape `(batch size, num classes)`, *optional*):
-                Used to indicate class labels conditioning. Optional class labels to be applied as an embedding in
-                `AdaLayerZeroNorm`.
-            cross_attention_kwargs ( `Dict[str, Any]`, *optional*):
-                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
-                `self.processor` in
-                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
             attention_mask ( `torch.Tensor`, *optional*):
                 An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask
                 is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large
@@ -140,13 +147,9 @@ class Transformer1D(BaseModule):
                 If `ndim == 2`: will be interpreted as a mask, then converted into a bias consistent with the format
                 above. This bias will be added to the cross-attention scores.
-            return_dict (`bool`, *optional*, defaults to `True`):
-                Whether or not to return a [`~models.unet_2d_condition.UNet2DConditionOutput`] instead of a plain
-                tuple.
         Returns:
-            If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
-            `tuple` where the first element is the sample tensor.
         """
         # ensure attention_mask is a bias, and give it a singleton query_tokens dimension.
         #   we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward.

+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# --------
+#
+# Modified 2024 by the Tripo AI and Stability AI Team.
+#
+# Copyright (c) 2024 Tripo AI & Stability AI
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from dataclasses import dataclass
+from typing import Optional
 import torch
 import torch.nn.functional as F
 class Transformer1D(BaseModule):
     @dataclass
     class Config(BaseModule.Config):
         num_attention_heads: int = 16
             encoder_hidden_states ( `torch.FloatTensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
                 Conditional embeddings for cross attention layer. If not given, cross-attention defaults to
                 self-attention.
             attention_mask ( `torch.Tensor`, *optional*):
                 An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask
                 is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large
                 If `ndim == 2`: will be interpreted as a mask, then converted into a bias consistent with the format
                 above. This bias will be added to the cross-attention scores.
         Returns:
+            torch.FloatTensor
         """
         # ensure attention_mask is a bias, and give it a singleton query_tokens dimension.
         #   we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward.

tsr/system.py CHANGED Viewed

@@ -13,7 +13,6 @@ from huggingface_hub import hf_hub_download
 from omegaconf import OmegaConf
 from PIL import Image
-from .models.isosurface import MarchingCubeHelper
 from .utils import (
     BaseModule,
     ImagePreprocessor,
@@ -50,17 +49,17 @@ class TSR(BaseModule):
     @classmethod
     def from_pretrained(
-        cls, pretrained_model_name_or_path: str, config_name: str, weight_name: str, token=None
     ):
         if os.path.isdir(pretrained_model_name_or_path):
             config_path = os.path.join(pretrained_model_name_or_path, config_name)
             weight_path = os.path.join(pretrained_model_name_or_path, weight_name)
         else:
             config_path = hf_hub_download(
-                repo_id=pretrained_model_name_or_path, filename=config_name, token=token
             )
             weight_path = hf_hub_download(
-                repo_id=pretrained_model_name_or_path, filename=weight_name, token=token
             )
         cfg = OmegaConf.load(config_path)
@@ -160,36 +159,20 @@ class TSR(BaseModule):
         return images
-    def set_marching_cubes_resolution(self, resolution: int):
-        if (
-            self.isosurface_helper is not None
-            and self.isosurface_helper.resolution == resolution
-        ):
-            return
-        self.isosurface_helper = MarchingCubeHelper(resolution)
     def extract_mesh(self, scene_codes, resolution: int = 256, threshold: float = 25.0):
-        self.set_marching_cubes_resolution(resolution)
         meshes = []
         for scene_code in scene_codes:
             with torch.no_grad():
-                density = self.renderer.query_triplane_volume_density(
-                    self.decoder.to(scene_codes.device),
-                    scene_code,
-                    resolution
-                ) - threshold
-                v_pos, t_pos_idx = self.isosurface_helper(density)
-                density = None
-                v_pos = v_pos.to(scene_codes.device)
-                color = self.renderer.query_triplane(
-                    self.decoder.to(scene_codes.device),
-                    v_pos,
                     scene_code,
-                )["color"]
                 v_pos = scale_tensor(
                     v_pos,
-                    self.isosurface_helper.points_range,
-                    (-self.renderer.cfg.radius, self.renderer.cfg.radius),
                 )
                 mesh = trimesh.Trimesh(
                     vertices=v_pos.cpu().numpy(),

 from omegaconf import OmegaConf
 from PIL import Image
 from .utils import (
     BaseModule,
     ImagePreprocessor,
     @classmethod
     def from_pretrained(
+        cls, pretrained_model_name_or_path: str, config_name: str, weight_name: str
     ):
         if os.path.isdir(pretrained_model_name_or_path):
             config_path = os.path.join(pretrained_model_name_or_path, config_name)
             weight_path = os.path.join(pretrained_model_name_or_path, weight_name)
         else:
             config_path = hf_hub_download(
+                repo_id=pretrained_model_name_or_path, filename=config_name
             )
             weight_path = hf_hub_download(
+                repo_id=pretrained_model_name_or_path, filename=weight_name
             )
         cfg = OmegaConf.load(config_path)
         return images
     def extract_mesh(self, scene_codes, resolution: int = 256, threshold: float = 25.0):
         meshes = []
         for scene_code in scene_codes:
             with torch.no_grad():
+                v_pos, t_pos_idx = self.renderer.block_based_marchingcube(self.decoder.to(scene_codes.device),
                     scene_code,
+                    resolution,
+                    threshold
+                    )
+                color = self.renderer.query_triplane(self.decoder.to(scene_codes.device), v_pos.to(scene_codes.device), scene_code, False)["color"]
                 v_pos = scale_tensor(
                     v_pos,
+                    (-1.0, 1.0),
+                    (-self.renderer.cfg.radius, self.renderer.cfg.radius)
                 )
                 mesh = trimesh.Trimesh(
                     vertices=v_pos.cpu().numpy(),

tsr/utils.py CHANGED Viewed

@@ -300,7 +300,6 @@ def get_rays(
     directions,
     c2w,
     keepdim=False,
-    noise_scale=0.0,
     normalize=False,
 ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
     # Rotate ray directions from camera coordinate to the world coordinate
@@ -331,12 +330,6 @@ def get_rays(
         )  # (B, H, W, 3)
         rays_o = c2w[:, None, None, :3, 3].expand(rays_d.shape)
-    # add camera noise to avoid grid-like artifect
-    # https://github.com/ashawkey/stable-dreamfusion/blob/49c3d4fa01d68a4f027755acf94e1ff6020458cc/nerf/utils.py#L373
-    if noise_scale > 0:
-        rays_o = rays_o + torch.randn(3, device=rays_o.device) * noise_scale
-        rays_d = rays_d + torch.randn(3, device=rays_d.device) * noise_scale
     if normalize:
         rays_d = F.normalize(rays_d, dim=-1)
     if not keepdim:
@@ -477,6 +470,5 @@ def save_video(
 def to_gradio_3d_orientation(mesh):
     mesh.apply_transform(trimesh.transformations.rotation_matrix(-np.pi/2, [1, 0, 0]))
-    mesh.apply_scale([1, 1, -1])
     mesh.apply_transform(trimesh.transformations.rotation_matrix(np.pi/2, [0, 1, 0]))
     return mesh

     directions,
     c2w,
     keepdim=False,
     normalize=False,
 ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
     # Rotate ray directions from camera coordinate to the world coordinate
         )  # (B, H, W, 3)
         rays_o = c2w[:, None, None, :3, 3].expand(rays_d.shape)
     if normalize:
         rays_d = F.normalize(rays_d, dim=-1)
     if not keepdim:
 def to_gradio_3d_orientation(mesh):
     mesh.apply_transform(trimesh.transformations.rotation_matrix(-np.pi/2, [1, 0, 0]))
     mesh.apply_transform(trimesh.transformations.rotation_matrix(np.pi/2, [0, 1, 0]))
     return mesh

upload.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from huggingface_hub import HfApi
+api = HfApi()
+api.upload_folder(
+    folder_path="/workspaces/TripoSR",
+    repo_id="michaelj/TripoSR",
+    repo_type="space",
+)