dylanebert
/

InstantMesh

Image-to-3D

Diffusers

Safetensors

InstantMeshPipeline

Model card Files Files and versions Community

dylanebert commited on Oct 24, 2024

Commit

b788c50

1 Parent(s): f16d4f3

no texture option

Browse files

Files changed (3) hide show

README.md +1 -0
lrm/lrm.py +1 -1
pipeline.py +61 -94

README.md CHANGED Viewed

@@ -1,6 +1,7 @@
 ---
 library_name: diffusers
 pipeline_tag: image-to-3d
 ---
 Rehosted weights for [InstantMesh](https://huggingface.co/TencentARC/InstantMesh) using the diffusers pipeline.

 ---
 library_name: diffusers
 pipeline_tag: image-to-3d
+license: mit
 ---
 Rehosted weights for [InstantMesh](https://huggingface.co/TencentARC/InstantMesh) using the diffusers pipeline.

lrm/lrm.py CHANGED Viewed

@@ -3358,7 +3358,7 @@ class LRM(ModelMixin, ConfigMixin):
             )
             vertices_colors = (vertices_colors * 255).astype(np.uint8)
-            return vertices.cpu().numpy(), faces.cpu().numpy(), vertices_colors
         uvs, mesh_tex_idx, gb_pos, tex_hard_mask = xatlas_uvmap(
             self.geometry.renderer.ctx, vertices, faces, resolution=texture_resolution

             )
             vertices_colors = (vertices_colors * 255).astype(np.uint8)
+            return vertices, faces, vertices_colors
         uvs, mesh_tex_idx, gb_pos, tex_hard_mask = xatlas_uvmap(
             self.geometry.renderer.ctx, vertices, faces, resolution=texture_resolution

pipeline.py CHANGED Viewed

@@ -97,100 +97,67 @@ class InstantMeshPipeline(DiffusionPipeline):
         self.register_modules(lrm=self.lrm)
     @torch.no_grad()
-    def __call__(self, images: torch.Tensor):
-        """if remove_bg:
-            image = rembg.remove(image)
-        image = np.array(image)
-        alpha = np.where(image[..., 3] > 0)
-        y1, y2, x1, x2 = (
-            alpha[0].min(),
-            alpha[0].max(),
-            alpha[1].min(),
-            alpha[1].max(),
-        )
-        fg = image[y1:y2, x1:x2]
-        size = max(fg.shape[0], fg.shape[1])
-        ph0, pw0 = (size - fg.shape[0]) // 2, (size - fg.shape[1]) // 2
-        ph1, pw1 = size - fg.shape[0] - ph0, size - fg.shape[1] - pw0
-        image = np.pad(
-            fg,
-            ((ph0, ph1), (pw0, pw1), (0, 0)),
-            mode="constant",
-            constant_values=((0, 0), (0, 0), (0, 0)),
-        )
-        new_size = int(image.shape[0] / 0.85)
-        ph0, pw0 = (new_size - size) // 2, (new_size - size) // 2
-        ph1, pw1 = new_size - size - ph0, new_size - size - pw0
-        image = np.pad(
-            image,
-            ((ph0, ph1), (pw0, pw1), (0, 0)),
-            mode="constant",
-            constant_values=((0, 0), (0, 0), (0, 0)),
-        )
-        image = Image.fromarray(image)
-        self.multi_view_diffusion = self.multi_view_diffusion.to(self._execution_device)
-        images = self.multi_view_diffusion(image).images[0]
-        images = np.asarray(images, dtype=np.float32) / 255.0
-        images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()
-        n, m = 3, 2
-        c, h, w = images.shape
-        images = (
-            images.view(c, n, h // n, m, w // m).permute(1, 3, 0, 2, 4).contiguous()
-        )
-        images = images.view(n * m, c, h // n, w // m)
-        images = images.unsqueeze(0)
-        images = v2.functional.resize(
-            images, 320, interpolation=3, antialias=True
-        ).clamp(0, 1)"""
         self.lrm.init_flexicubes_geometry(self._execution_device, fovy=30.0)
         cameras = get_zero123plus_input_cameras().to(self._execution_device)
         planes = self.lrm.forward_planes(images, cameras)
-        mesh_out = self.lrm.extract_mesh(
-            planes,
-            use_texture_map=True,
-            texture_resolution=1024,
-        )
-        vertices, vertex_indices, uvs, uv_indices, texture = mesh_out
-        vertices = vertices.cpu().numpy()
-        vertex_indices = vertex_indices.cpu().numpy()
-        uvs = uvs.cpu().numpy()
-        uv_indices = uv_indices.cpu().numpy()
-        texture = texture.permute(1, 2, 0).cpu().numpy()
-        vertex_indices_flat = vertex_indices.reshape(-1)
-        uv_indices_flat = uv_indices.reshape(-1)
-        vertex_uv_pairs = np.stack([vertex_indices_flat, uv_indices_flat], axis=1)
-        unique_pairs, unique_indices = np.unique(
-            vertex_uv_pairs, axis=0, return_inverse=True
-        )
-        vertices = vertices[unique_pairs[:, 0]]
-        uvs = uvs[unique_pairs[:, 1]]
-        faces = unique_indices.reshape(-1, 3)
-        lo, hi = 0, 1
-        img = np.asarray(texture, dtype=np.float32)
-        img = (img - lo) * (255 / (hi - lo))
-        img = img.clip(0, 255)
-        mask = np.sum(img.astype(np.float32), axis=-1, keepdims=True)
-        mask = (mask <= 3.0).astype(np.float32)
-        kernel = np.ones((3, 3), "uint8")
-        dilate_img = cv2.dilate(img, kernel, iterations=1)
-        img = img * (1 - mask) + dilate_img * mask
-        img = img.clip(0, 255).astype(np.uint8)
-        texture = np.ascontiguousarray(img[::-1, :, :])
-        return InstantMeshPipelineOutput(
-            vertices=vertices,
-            faces=faces,
-            uvs=uvs,
-            texture=texture,
-        )

         self.register_modules(lrm=self.lrm)
     @torch.no_grad()
+    def __call__(self, images: torch.Tensor, generate_texture: bool = False):
         self.lrm.init_flexicubes_geometry(self._execution_device, fovy=30.0)
         cameras = get_zero123plus_input_cameras().to(self._execution_device)
         planes = self.lrm.forward_planes(images, cameras)
+        if generate_texture:
+            mesh_out = self.lrm.extract_mesh(
+                planes,
+                use_texture_map=True,
+                texture_resolution=1024,
+            )
+            vertices, vertex_indices, uvs, uv_indices, texture = mesh_out
+            vertices = vertices.cpu().numpy()
+            vertex_indices = vertex_indices.cpu().numpy()
+            uvs = uvs.cpu().numpy()
+            uv_indices = uv_indices.cpu().numpy()
+            texture = texture.permute(1, 2, 0).cpu().numpy()
+            vertex_indices_flat = vertex_indices.reshape(-1)
+            uv_indices_flat = uv_indices.reshape(-1)
+            vertex_uv_pairs = np.stack([vertex_indices_flat, uv_indices_flat], axis=1)
+            unique_pairs, unique_indices = np.unique(
+                vertex_uv_pairs, axis=0, return_inverse=True
+            )
+            vertices = vertices[unique_pairs[:, 0]]
+            uvs = uvs[unique_pairs[:, 1]]
+            faces = unique_indices.reshape(-1, 3)
+            lo, hi = 0, 1
+            img = np.asarray(texture, dtype=np.float32)
+            img = (img - lo) * (255 / (hi - lo))
+            img = img.clip(0, 255)
+            mask = np.sum(img.astype(np.float32), axis=-1, keepdims=True)
+            mask = (mask <= 3.0).astype(np.float32)
+            kernel = np.ones((3, 3), "uint8")
+            dilate_img = cv2.dilate(img, kernel, iterations=1)
+            img = img * (1 - mask) + dilate_img * mask
+            img = img.clip(0, 255).astype(np.uint8)
+            texture = np.ascontiguousarray(img[::-1, :, :])
+            return InstantMeshPipelineOutput(
+                vertices=vertices,
+                faces=faces,
+                uvs=uvs,
+                texture=texture,
+            )
+        else:
+            mesh_out = self.lrm.extract_mesh(
+                planes,
+                use_texture_map=False,
+            )
+            vertices, faces, _ = mesh_out
+            vertices = vertices.cpu().numpy()
+            faces = faces.cpu().numpy()
+            return InstantMeshPipelineOutput(
+                vertices=vertices,
+                faces=faces,
+                uvs=None,
+                texture=None,
+            )