Spaces:

Tohru127
/

3d-model-GLPN

Sleeping

App Files Files Community

Tohru127 commited on Oct 3

Commit

254216d

verified ·

1 Parent(s): 5f9b409

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -53

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, time, tempfile
 from pathlib import Path
 import numpy as np
 from PIL import Image
@@ -7,21 +7,25 @@ import torch
 from transformers import GLPNForDepthEstimation, GLPNImageProcessor
 import gradio as gr
-# Keep Spaces stable
 os.environ.setdefault("OMP_NUM_THREADS", "1")
-# Device pick
 DEVICE = torch.device(
     "cuda" if torch.cuda.is_available()
     else ("mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu")
 )
-# Load GLPN once (same family as your main.py)
 PROC = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
 MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE).eval()
-# Import Open3D here so the Space fails fast if wheel isn’t available
-import open3d as o3d  # requires the pinned wheel below
 def _resize_h480_m32(pil_img: Image.Image):
     h = min(pil_img.height, 480)
@@ -29,7 +33,7 @@ def _resize_h480_m32(pil_img: Image.Image):
     w = max(1, int(h * pil_img.width / max(1, pil_img.height)))
     return pil_img.resize((w, h), Image.BILINEAR)
-def _infer_depth(pil_img: Image.Image):
     t0 = time.time()
     img_proc = _resize_h480_m32(pil_img)
     inputs = PROC(images=img_proc, return_tensors="pt")
@@ -39,18 +43,19 @@ def _infer_depth(pil_img: Image.Image):
     pred = getattr(out, "predicted_depth", None)
     if pred is None:
         pred = out[0] if isinstance(out, (tuple, list)) else next(iter(out.values()))
-    if pred.dim() == 3:  # (B,H,W) → (B,1,H,W)
         pred = pred.unsqueeze(1)
-    # Upsample back to original size
     pred = torch.nn.functional.interpolate(
         pred, size=pil_img.size[::-1], mode="bicubic", align_corners=False
     ).squeeze(0).squeeze(0)
     depth = pred.detach().cpu().float().numpy()
-    return depth, time.time() - t0
 def _depth_preview(depth: np.ndarray) -> Image.Image:
     d = depth - float(depth.min())
-    d /= (float(d.max()) + 1e-8)
     return Image.fromarray((d * 255).astype(np.uint8))
 def _to_u16(depth: np.ndarray) -> np.ndarray:
@@ -71,61 +76,109 @@ def _rgbd_intrinsics(rgb: np.ndarray, depth_u16: np.ndarray, fx, fy):
     intr.set_intrinsics(w, h, fx, fy, w/2.0, h/2.0)
     return rgbd, intr
-def _make_pointcloud(rgbd, intr, nb_neighbors=20, std_ratio=20.0):
     pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
-    # Clean outliers (stable on CPU)
-    _, ind = pcd.remove_statistical_outlier(nb_neighbors=nb_neighbors, std_ratio=std_ratio)
-    return pcd.select_by_index(ind)
-def _make_mesh(pcd, poisson_depth=10):
     pcd.estimate_normals()
     pcd.orient_normals_to_align_with_direction()
-    mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
-        pcd, depth=int(poisson_depth), n_threads=1
-    )[0]
-    # Rotate like your script (np.pi, 0, 0)
-    R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
-    mesh.rotate(R, center=(0, 0, 0))
-    return mesh
 def run(pil_img: Image.Image, fx: int, fy: int, poisson_depth: int, down_voxel: float, verbose: bool):
-    log = []
     try:
         # 1) Depth
-        depth, t = _infer_depth(pil_img)
-        log += [f"Device: {DEVICE}", f"Depth: {depth.shape}  time={t:.2f}s"]
         depth_prev = _depth_preview(depth)
-        # 2) RGBD + Intrinsics
         rgb = np.array(pil_img.convert("RGB"))
         depth_u16 = _to_u16(depth)
         rgbd, intr = _rgbd_intrinsics(rgb, depth_u16, fx, fy)
         # 3) Point cloud
-        t0 = time.time()
-        pcd = _make_pointcloud(rgbd, intr, nb_neighbors=20, std_ratio=20.0)
-        if down_voxel > 0:
-            pcd = pcd.voxel_down_sample(voxel_size=float(down_voxel))
-        npts = np.asarray(pcd.points).shape[0]
-        log.append(f"PointCloud: {npts} pts  time={time.time()-t0:.2f}s")
-        # 4) Mesh
-        t0 = time.time()
-        mesh = _make_mesh(pcd, poisson_depth=poisson_depth)
-        v = np.asarray(mesh.vertices).shape[0]
-        f = np.asarray(mesh.triangles).shape[0]
-        log.append(f"Mesh: V={v}  F={f}  time={time.time()-t0:.2f}s")
-        # 5) Save artifacts
-        work = Path(tempfile.mkdtemp(prefix="recon_"))
-        depth_png = work / "depth_preview.png"
-        pcd_ply = work / "pointcloud.ply"
-        mesh_ply = work / "mesh.ply"
         depth_prev.save(depth_png)
         o3d.io.write_point_cloud(str(pcd_ply), pcd, write_ascii=False)
         o3d.io.write_triangle_mesh(str(mesh_ply), mesh, write_ascii=False)
-        log_txt = "\n".join(log if verbose else log[-12:])
         return (
             depth_prev,           # preview image
             str(pcd_ply),         # for Model3D viewer
@@ -136,19 +189,19 @@ def run(pil_img: Image.Image, fx: int, fy: int, poisson_depth: int, down_voxel:
             log_txt
         )
     except Exception as e:
-        log.append(f"ERROR: {e}")
-        return None, None, None, None, None, None, "\n".join(log)
 with gr.Blocks(title="Room 3D Reconstruction (GLPN + Open3D)") as demo:
-    gr.Markdown("### Room 3D Reconstruction — GLPN → RGB-D → Point Cloud → Poisson Mesh\nUpload a room photo. Get depth, PCD, and mesh with downloads.")
     with gr.Row():
         with gr.Column():
             inp = gr.Image(type="pil", label="Input room image")
             fx = gr.Slider(200, 1200, value=500, step=10, label="fx (px)")
             fy = gr.Slider(200, 1200, value=500, step=10, label="fy (px)")
-            pdepth = gr.Slider(7, 12, value=10, step=1, label="Poisson depth")
-            down = gr.Slider(0.0, 0.02, value=0.0, step=0.002, label="Voxel downsample (m)")
-            verbose = gr.Checkbox(value=False, label="Verbose logs")
             btn = gr.Button("Reconstruct 3D", variant="primary")
         with gr.Column():
             depth_img = gr.Image(label="Depth preview", interactive=False)
@@ -158,7 +211,7 @@ with gr.Blocks(title="Room 3D Reconstruction (GLPN + Open3D)") as demo:
         depth_file = gr.File(label="Download depth (PNG)")
         pcd_file = gr.File(label="Download point cloud (.ply)")
         mesh_file = gr.File(label="Download mesh (.ply)")
-    logs = gr.Textbox(label="Logs", max_lines=28, lines=16)
     btn.click(
         run,

+import os, time, datetime
 from pathlib import Path
 import numpy as np
 from PIL import Image
 from transformers import GLPNForDepthEstimation, GLPNImageProcessor
 import gradio as gr
+# ---- Keep Spaces stable (CPU-safe; quiet threading) ----
+os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
+os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
 os.environ.setdefault("OMP_NUM_THREADS", "1")
+os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
 DEVICE = torch.device(
     "cuda" if torch.cuda.is_available()
     else ("mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu")
 )
 PROC = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
 MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE).eval()
+# Import Open3D (fail fast if missing)
+import open3d as o3d
+OUT_DIR = Path("outputs")
+OUT_DIR.mkdir(parents=True, exist_ok=True)
 def _resize_h480_m32(pil_img: Image.Image):
     h = min(pil_img.height, 480)
     w = max(1, int(h * pil_img.width / max(1, pil_img.height)))
     return pil_img.resize((w, h), Image.BILINEAR)
+def _infer_depth(pil_img: Image.Image, logs):
     t0 = time.time()
     img_proc = _resize_h480_m32(pil_img)
     inputs = PROC(images=img_proc, return_tensors="pt")
     pred = getattr(out, "predicted_depth", None)
     if pred is None:
         pred = out[0] if isinstance(out, (tuple, list)) else next(iter(out.values()))
+    if pred.dim() == 3:
         pred = pred.unsqueeze(1)
     pred = torch.nn.functional.interpolate(
         pred, size=pil_img.size[::-1], mode="bicubic", align_corners=False
     ).squeeze(0).squeeze(0)
     depth = pred.detach().cpu().float().numpy()
+    logs.append(f"[Depth] shape={depth.shape} device={DEVICE} time={time.time()-t0:.2f}s")
+    return depth
 def _depth_preview(depth: np.ndarray) -> Image.Image:
     d = depth - float(depth.min())
+    rng = float(d.max()) + 1e-8
+    d /= rng
     return Image.fromarray((d * 255).astype(np.uint8))
 def _to_u16(depth: np.ndarray) -> np.ndarray:
     intr.set_intrinsics(w, h, fx, fy, w/2.0, h/2.0)
     return rgbd, intr
+def _make_pointcloud(rgbd, intr, logs, nb_neighbors=20, std_ratio=20.0, down_voxel=0.0):
+    t0 = time.time()
     pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
+    # If extremely sparse, skip aggressive outlier removal
+    if np.asarray(pcd.points).shape[0] > 500:
+        _, ind = pcd.remove_statistical_outlier(nb_neighbors=nb_neighbors, std_ratio=std_ratio)
+        if len(ind) > 50:  # keep at least some points
+            pcd = pcd.select_by_index(ind)
+        else:
+            logs.append("[PCD] Outlier removal would drop almost all points; skipping.")
+    else:
+        logs.append("[PCD] Too few points for outlier removal; skipping.")
+    if down_voxel and down_voxel > 0:
+        pcd = pcd.voxel_down_sample(voxel_size=float(down_voxel))
+    npts = np.asarray(pcd.points).shape[0]
+    logs.append(f"[PCD] points={npts} time={time.time()-t0:.2f}s (voxel={down_voxel})")
+    return pcd
+def _make_mesh_with_fallback(pcd, poisson_depth, logs, method="poisson"):
+    t0 = time.time()
+    if np.asarray(pcd.points).shape[0] < 30:
+        raise RuntimeError("Point cloud too small for meshing.")
     pcd.estimate_normals()
     pcd.orient_normals_to_align_with_direction()
+    try:
+        if method == "poisson":
+            # Many Open3D wheels don’t support n_threads kwarg; don’t pass it.
+            mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+                pcd, depth=int(poisson_depth)
+            )[0]
+            used = "Poisson"
+        else:
+            # Ball-Pivoting fallback
+            distances = pcd.compute_nearest_neighbor_distance()
+            if not distances:
+                raise RuntimeError("No neighbor distances for Ball-Pivoting.")
+            avg = float(sum(distances)) / len(distances)
+            radii = [avg * r for r in (1.5, 2.5)]
+            mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_ball_pivoting(
+                pcd, o3d.utility.DoubleVector(radii)
+            )
+            used = "Ball-Pivoting"
+        # Post clean & orient
+        mesh.remove_duplicated_vertices()
+        mesh.remove_duplicated_triangles()
+        mesh.remove_degenerate_triangles()
+        mesh.remove_non_manifold_edges()
+        R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
+        mesh.rotate(R, center=(0, 0, 0))
+        v = np.asarray(mesh.vertices).shape[0]
+        f = np.asarray(mesh.triangles).shape[0]
+        logs.append(f"[Mesh] method={used} V={v} F={f} time={time.time()-t0:.2f}s")
+        return mesh
+    except Exception as e:
+        if method == "poisson":
+            logs.append(f"[Mesh] Poisson failed: {e}. Falling back to Ball-Pivoting…")
+            return _make_mesh_with_fallback(pcd, poisson_depth, logs, method="ball")
+        raise
+def _timestamped(name: str, ext: str) -> Path:
+    ts = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
+    return OUT_DIR / f"{name}_{ts}.{ext}"
 def run(pil_img: Image.Image, fx: int, fy: int, poisson_depth: int, down_voxel: float, verbose: bool):
+    logs = []
     try:
+        if pil_img is None:
+            return None, None, None, None, None, None, "Upload an image."
         # 1) Depth
+        depth = _infer_depth(pil_img, logs)
         depth_prev = _depth_preview(depth)
+        # 2) RGBD + intrinsics
         rgb = np.array(pil_img.convert("RGB"))
         depth_u16 = _to_u16(depth)
         rgbd, intr = _rgbd_intrinsics(rgb, depth_u16, fx, fy)
         # 3) Point cloud
+        pcd = _make_pointcloud(rgbd, intr, logs, down_voxel=down_voxel)
+        if np.asarray(pcd.points).shape[0] < 30:
+            raise RuntimeError("Got < 30 points after filtering; try lowering outlier removal or increasing voxel size to 0.")
+        # 4) Mesh with fallback
+        mesh = _make_mesh_with_fallback(pcd, poisson_depth, logs)
+        # 5) Save artifacts (persistent + timestamped)
+        depth_png = _timestamped("depth_preview", "png")
+        pcd_ply   = _timestamped("pointcloud", "ply")
+        mesh_ply  = _timestamped("mesh", "ply")
         depth_prev.save(depth_png)
         o3d.io.write_point_cloud(str(pcd_ply), pcd, write_ascii=False)
         o3d.io.write_triangle_mesh(str(mesh_ply), mesh, write_ascii=False)
+        log_txt = "\n".join(logs if verbose else logs[-20:])
         return (
             depth_prev,           # preview image
             str(pcd_ply),         # for Model3D viewer
             log_txt
         )
     except Exception as e:
+        logs.append(f"[ERROR] {type(e).__name__}: {e}")
+        return None, None, None, None, None, None, "\n".join(logs)
 with gr.Blocks(title="Room 3D Reconstruction (GLPN + Open3D)") as demo:
+    gr.Markdown("### Room 3D Reconstruction — GLPN → RGB-D → Point Cloud → Mesh\nUpload a room photo. If Poisson fails, we auto-fallback to Ball-Pivoting.")
     with gr.Row():
         with gr.Column():
             inp = gr.Image(type="pil", label="Input room image")
             fx = gr.Slider(200, 1200, value=500, step=10, label="fx (px)")
             fy = gr.Slider(200, 1200, value=500, step=10, label="fy (px)")
+            pdepth = gr.Slider(6, 11, value=9, step=1, label="Poisson depth (lower = faster/stabler)")
+            down = gr.Slider(0.0, 0.02, value=0.01, step=0.002, label="Voxel downsample (m)")
+            verbose = gr.Checkbox(value=True, label="Verbose logs")
             btn = gr.Button("Reconstruct 3D", variant="primary")
         with gr.Column():
             depth_img = gr.Image(label="Depth preview", interactive=False)
         depth_file = gr.File(label="Download depth (PNG)")
         pcd_file = gr.File(label="Download point cloud (.ply)")
         mesh_file = gr.File(label="Download mesh (.ply)")
+    logs = gr.Textbox(label="Logs", max_lines=48, lines=20)
     btn.click(
         run,