Spaces:

VAST-AI
/

MIDI-3D

Running on Zero

App Files Files Community

Update app.py

by ameerazam08 - opened Mar 11

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+111

-78

Files changed (3) hide show

Dockerfile +58 -0
README.md +2 -3
app.py +51 -75

Dockerfile ADDED Viewed

	@@ -0,0 +1,58 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
+# Set compute capability for nerfacc and tiny-cuda-nn
+# See https://developer.nvidia.com/cuda-gpus and limit number to speed-up build
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+ENV TCNN_CUDA_ARCHITECTURES=90;89;86;80;75;70;61;60
+# Speed-up build for RTX 30xx
+# ENV TORCH_CUDA_ARCH_LIST="8.6"
+# ENV TCNN_CUDA_ARCHITECTURES=86
+# Speed-up build for RTX 40xx
+# ENV TORCH_CUDA_ARCH_LIST="8.9"
+# ENV TCNN_CUDA_ARCHITECTURES=89
+# apt install by root user
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    cmake \
+    git \
+    git-lfs \
+    ffmpeg \
+    libegl1-mesa-dev \
+    libgl1-mesa-dev \
+    libgles2-mesa-dev \
+    libglib2.0-0 \
+    libgl1-mesa-glx \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    python-is-python3 \
+    python3.10-dev \
+    python3-pip \
+    rsync \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=${CUDA_HOME}/bin:/home/user/.local/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
+ENV LIBRARY_PATH=${CUDA_HOME}/lib64/stubs:${LIBRARY_PATH}
+WORKDIR /app
+RUN pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121
+RUN pip install --no-cache-dir datasets "huggingface-hub>=0.19" "hf-transfer>=0.1.4" "protobuf<4" "click<8.1" "pydantic~=1.0"
+RUN pip install --no-cache-dir gradio[oauth]==4.44.1 "uvicorn>=0.14.0" spaces
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,9 @@
 ---
 title: MIDI 3D
 emoji: 📚
-colorFrom: purple
 colorTo: red
-sdk: gradio
-sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: apache-2.0

 ---
 title: MIDI 3D
 emoji: 📚
+colorFrom: gray
 colorTo: red
+sdk: docker
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -6,20 +6,18 @@ from typing import Any, List, Union
 import gradio as gr
 import numpy as np
-import spaces
 import torch
-import trimesh
 from gradio_image_prompter import ImagePrompter
 from gradio_litmodel3d import LitModel3D
 from huggingface_hub import snapshot_download
 from PIL import Image
-from skimage import measure
 from transformers import AutoModelForMaskGeneration, AutoProcessor
 from midi.pipelines.pipeline_midi import MIDIPipeline
-from midi.utils.smoothing import smooth_gpu
 from scripts.grounding_sam import plot_segmentation, segment
-from scripts.inference_midi import preprocess_image, split_rgb_mask
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
@@ -30,7 +28,7 @@ REPO_ID = "VAST-AI/MIDI-3D"
 MARKDOWN = """
 ## Image to 3D Scene with [MIDI-3D](https://huanngzh.github.io/MIDI-Page/)
-<b>Important!</b> Please check out our [instruction video](https://github.com/user-attachments/assets/814c046e-f5c3-47cf-bb56-60154be8374c)!
 1. Upload an image, and draw bounding boxes for each instance by holding and dragging the mouse. Then clik "Run Segmentation" to generate the segmentation result. <b>Ensure instances should not be too small and bounding boxes fit snugly around each instance.</b>
 2. <b>Check "Do image padding" in "Generation Settings" if instances in your image are too close to the image border.</b> Then click "Run Generation" to generate a 3D scene from the image and segmentation result.
 3. If you find the generated 3D scene satisfactory, download it by clicking the "Download GLB" button.
@@ -39,9 +37,9 @@ MARKDOWN = """
 EXAMPLES = [
     [
         {
-            "image": "assets/example_data/Cartoon-Style/03_rgb.png",
         },
-        "assets/example_data/Cartoon-Style/03_seg.png",
         42,
         False,
         False,
@@ -57,39 +55,39 @@ EXAMPLES = [
     ],
     [
         {
-            "image": "assets/example_data/Realistic-Style/02_rgb.png",
         },
-        "assets/example_data/Realistic-Style/02_seg.png",
         42,
         False,
         False,
     ],
     [
         {
-            "image": "assets/example_data/Cartoon-Style/00_rgb.png",
         },
-        "assets/example_data/Cartoon-Style/00_seg.png",
         42,
         False,
-        False,
     ],
     [
         {
-            "image": "assets/example_data/Realistic-Style/00_rgb.png",
         },
-        "assets/example_data/Realistic-Style/00_seg.png",
         42,
         False,
         True,
     ],
     [
         {
-            "image": "assets/example_data/Realistic-Style/01_rgb.png",
         },
-        "assets/example_data/Realistic-Style/01_seg.png",
         42,
         False,
-        True,
     ],
     [
         {
@@ -127,10 +125,38 @@ pipe.init_custom_adapter(
 # Utils
-def get_random_hex():
-    random_bytes = os.urandom(8)
-    random_hex = random_bytes.hex()
-    return random_hex
 @spaces.GPU()
@@ -164,37 +190,7 @@ def run_segmentation(image_prompts: Any, polygon_refinement: bool) -> Image.Imag
     return seg_map_pil
-@torch.no_grad()
-def run_midi(
-    pipe: Any,
-    rgb_image: Union[str, Image.Image],
-    seg_image: Union[str, Image.Image],
-    seed: int,
-    num_inference_steps: int = 50,
-    guidance_scale: float = 7.0,
-    do_image_padding: bool = False,
-) -> trimesh.Scene:
-    if do_image_padding:
-        rgb_image, seg_image = preprocess_image(rgb_image, seg_image)
-    instance_rgbs, instance_masks, scene_rgbs = split_rgb_mask(rgb_image, seg_image)
-    num_instances = len(instance_rgbs)
-    outputs = pipe(
-        image=instance_rgbs,
-        mask=instance_masks,
-        image_scene=scene_rgbs,
-        attention_kwargs={"num_instances": num_instances},
-        generator=torch.Generator(device=pipe.device).manual_seed(seed),
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-        decode_progressive=True,
-        return_dict=False,
-    )
-    return outputs
-@spaces.GPU(duration=180)
 @torch.no_grad()
 @torch.autocast(device_type=DEVICE, dtype=torch.bfloat16)
 def run_generation(
@@ -212,7 +208,7 @@ def run_generation(
     if not isinstance(rgb_image, Image.Image) and "image" in rgb_image:
         rgb_image = rgb_image["image"]
-    outputs = run_midi(
         pipe,
         rgb_image,
         seg_image,
@@ -222,27 +218,7 @@ def run_generation(
         do_image_padding,
     )
-    # marching cubes
-    trimeshes = []
-    for _, (logits_, grid_size, bbox_size, bbox_min, bbox_max) in enumerate(
-        zip(*outputs)
-    ):
-        grid_logits = logits_.view(grid_size)
-        grid_logits = smooth_gpu(grid_logits, method="gaussian", sigma=1)
-        torch.cuda.empty_cache()
-        vertices, faces, normals, _ = measure.marching_cubes(
-            grid_logits.float().cpu().numpy(), 0, method="lewiner"
-        )
-        vertices = vertices / grid_size * bbox_size + bbox_min
-        # Trimesh
-        mesh = trimesh.Trimesh(vertices.astype(np.float32), np.ascontiguousarray(faces))
-        trimeshes.append(mesh)
-    # compose the output meshes
-    scene = trimesh.Scene(trimeshes)
-    tmp_path = os.path.join(TMP_DIR, f"midi3d_{get_random_hex()}.glb")
     scene.export(tmp_path)
     torch.cuda.empty_cache()

 import gradio as gr
 import numpy as np
 import torch
 from gradio_image_prompter import ImagePrompter
 from gradio_litmodel3d import LitModel3D
 from huggingface_hub import snapshot_download
 from PIL import Image
 from transformers import AutoModelForMaskGeneration, AutoProcessor
 from midi.pipelines.pipeline_midi import MIDIPipeline
 from scripts.grounding_sam import plot_segmentation, segment
+from scripts.inference_midi import run_midi
+import spaces
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
 MARKDOWN = """
 ## Image to 3D Scene with [MIDI-3D](https://huanngzh.github.io/MIDI-Page/)
+<b>Important!</b> Please check out our [instruction video](https://github.com/user-attachments/assets/4fc8aea4-010f-40c7-989d-6b1d9d3e3e09)!
 1. Upload an image, and draw bounding boxes for each instance by holding and dragging the mouse. Then clik "Run Segmentation" to generate the segmentation result. <b>Ensure instances should not be too small and bounding boxes fit snugly around each instance.</b>
 2. <b>Check "Do image padding" in "Generation Settings" if instances in your image are too close to the image border.</b> Then click "Run Generation" to generate a 3D scene from the image and segmentation result.
 3. If you find the generated 3D scene satisfactory, download it by clicking the "Download GLB" button.
 EXAMPLES = [
     [
         {
+            "image": "assets/example_data/Cartoon-Style/00_rgb.png",
         },
+        "assets/example_data/Cartoon-Style/00_seg.png",
         42,
         False,
         False,
     ],
     [
         {
+            "image": "assets/example_data/Cartoon-Style/03_rgb.png",
         },
+        "assets/example_data/Cartoon-Style/03_seg.png",
         42,
         False,
         False,
     ],
     [
         {
+            "image": "assets/example_data/Realistic-Style/00_rgb.png",
         },
+        "assets/example_data/Realistic-Style/00_seg.png",
         42,
         False,
+        True,
     ],
     [
         {
+            "image": "assets/example_data/Realistic-Style/01_rgb.png",
         },
+        "assets/example_data/Realistic-Style/01_seg.png",
         42,
         False,
         True,
     ],
     [
         {
+            "image": "assets/example_data/Realistic-Style/02_rgb.png",
         },
+        "assets/example_data/Realistic-Style/02_seg.png",
         42,
         False,
+        False,
     ],
     [
         {
 # Utils
+def split_rgb_mask(rgb_image, seg_image):
+    if isinstance(rgb_image, str):
+        rgb_image = Image.open(rgb_image)
+    if isinstance(seg_image, str):
+        seg_image = Image.open(seg_image)
+    rgb_image = rgb_image.convert("RGB")
+    seg_image = seg_image.convert("L")
+    rgb_array = np.array(rgb_image)
+    seg_array = np.array(seg_image)
+    label_ids = np.unique(seg_array)
+    label_ids = label_ids[label_ids > 0]
+    instance_rgbs, instance_masks, scene_rgbs = [], [], []
+    for segment_id in sorted(label_ids):
+        # Here we set the background to white
+        white_background = np.ones_like(rgb_array) * 255
+        mask = np.zeros_like(seg_array, dtype=np.uint8)
+        mask[seg_array == segment_id] = 255
+        segment_rgb = white_background.copy()
+        segment_rgb[mask == 255] = rgb_array[mask == 255]
+        segment_rgb_image = Image.fromarray(segment_rgb)
+        segment_mask_image = Image.fromarray(mask)
+        instance_rgbs.append(segment_rgb_image)
+        instance_masks.append(segment_mask_image)
+        scene_rgbs.append(rgb_image)
+    return instance_rgbs, instance_masks, scene_rgbs
 @spaces.GPU()
     return seg_map_pil
+# @spaces.GPU()
 @torch.no_grad()
 @torch.autocast(device_type=DEVICE, dtype=torch.bfloat16)
 def run_generation(
     if not isinstance(rgb_image, Image.Image) and "image" in rgb_image:
         rgb_image = rgb_image["image"]
+    scene = run_midi(
         pipe,
         rgb_image,
         seg_image,
         do_image_padding,
     )
+    _, tmp_path = tempfile.mkstemp(suffix=".glb", prefix="midi3d_", dir=TMP_DIR)
     scene.export(tmp_path)
     torch.cuda.empty_cache()