Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import os, time,
|
| 2 |
from pathlib import Path
|
| 3 |
import numpy as np
|
| 4 |
from PIL import Image
|
|
@@ -7,21 +7,25 @@ import torch
|
|
| 7 |
from transformers import GLPNForDepthEstimation, GLPNImageProcessor
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
-
# Keep Spaces stable
|
|
|
|
|
|
|
| 11 |
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
|
|
|
| 12 |
|
| 13 |
-
# Device pick
|
| 14 |
DEVICE = torch.device(
|
| 15 |
"cuda" if torch.cuda.is_available()
|
| 16 |
else ("mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu")
|
| 17 |
)
|
| 18 |
|
| 19 |
-
# Load GLPN once (same family as your main.py)
|
| 20 |
PROC = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
|
| 21 |
MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE).eval()
|
| 22 |
|
| 23 |
-
# Import Open3D
|
| 24 |
-
import open3d as o3d
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def _resize_h480_m32(pil_img: Image.Image):
|
| 27 |
h = min(pil_img.height, 480)
|
|
@@ -29,7 +33,7 @@ def _resize_h480_m32(pil_img: Image.Image):
|
|
| 29 |
w = max(1, int(h * pil_img.width / max(1, pil_img.height)))
|
| 30 |
return pil_img.resize((w, h), Image.BILINEAR)
|
| 31 |
|
| 32 |
-
def _infer_depth(pil_img: Image.Image):
|
| 33 |
t0 = time.time()
|
| 34 |
img_proc = _resize_h480_m32(pil_img)
|
| 35 |
inputs = PROC(images=img_proc, return_tensors="pt")
|
|
@@ -39,18 +43,19 @@ def _infer_depth(pil_img: Image.Image):
|
|
| 39 |
pred = getattr(out, "predicted_depth", None)
|
| 40 |
if pred is None:
|
| 41 |
pred = out[0] if isinstance(out, (tuple, list)) else next(iter(out.values()))
|
| 42 |
-
if pred.dim() == 3:
|
| 43 |
pred = pred.unsqueeze(1)
|
| 44 |
-
# Upsample back to original size
|
| 45 |
pred = torch.nn.functional.interpolate(
|
| 46 |
pred, size=pil_img.size[::-1], mode="bicubic", align_corners=False
|
| 47 |
).squeeze(0).squeeze(0)
|
| 48 |
depth = pred.detach().cpu().float().numpy()
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
def _depth_preview(depth: np.ndarray) -> Image.Image:
|
| 52 |
d = depth - float(depth.min())
|
| 53 |
-
|
|
|
|
| 54 |
return Image.fromarray((d * 255).astype(np.uint8))
|
| 55 |
|
| 56 |
def _to_u16(depth: np.ndarray) -> np.ndarray:
|
|
@@ -71,61 +76,109 @@ def _rgbd_intrinsics(rgb: np.ndarray, depth_u16: np.ndarray, fx, fy):
|
|
| 71 |
intr.set_intrinsics(w, h, fx, fy, w/2.0, h/2.0)
|
| 72 |
return rgbd, intr
|
| 73 |
|
| 74 |
-
def _make_pointcloud(rgbd, intr, nb_neighbors=20, std_ratio=20.0):
|
|
|
|
| 75 |
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
|
| 76 |
-
# Clean outliers (stable on CPU)
|
| 77 |
-
_, ind = pcd.remove_statistical_outlier(nb_neighbors=nb_neighbors, std_ratio=std_ratio)
|
| 78 |
-
return pcd.select_by_index(ind)
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
pcd.estimate_normals()
|
| 82 |
pcd.orient_normals_to_align_with_direction()
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
def run(pil_img: Image.Image, fx: int, fy: int, poisson_depth: int, down_voxel: float, verbose: bool):
|
| 92 |
-
|
| 93 |
try:
|
|
|
|
|
|
|
|
|
|
| 94 |
# 1) Depth
|
| 95 |
-
depth
|
| 96 |
-
log += [f"Device: {DEVICE}", f"Depth: {depth.shape} time={t:.2f}s"]
|
| 97 |
depth_prev = _depth_preview(depth)
|
| 98 |
|
| 99 |
-
# 2) RGBD +
|
| 100 |
rgb = np.array(pil_img.convert("RGB"))
|
| 101 |
depth_u16 = _to_u16(depth)
|
| 102 |
rgbd, intr = _rgbd_intrinsics(rgb, depth_u16, fx, fy)
|
| 103 |
|
| 104 |
# 3) Point cloud
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
#
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
f = np.asarray(mesh.triangles).shape[0]
|
| 117 |
-
log.append(f"Mesh: V={v} F={f} time={time.time()-t0:.2f}s")
|
| 118 |
|
| 119 |
-
# 5) Save artifacts
|
| 120 |
-
work = Path(tempfile.mkdtemp(prefix="recon_"))
|
| 121 |
-
depth_png = work / "depth_preview.png"
|
| 122 |
-
pcd_ply = work / "pointcloud.ply"
|
| 123 |
-
mesh_ply = work / "mesh.ply"
|
| 124 |
depth_prev.save(depth_png)
|
| 125 |
o3d.io.write_point_cloud(str(pcd_ply), pcd, write_ascii=False)
|
| 126 |
o3d.io.write_triangle_mesh(str(mesh_ply), mesh, write_ascii=False)
|
| 127 |
|
| 128 |
-
log_txt = "\n".join(
|
| 129 |
return (
|
| 130 |
depth_prev, # preview image
|
| 131 |
str(pcd_ply), # for Model3D viewer
|
|
@@ -136,19 +189,19 @@ def run(pil_img: Image.Image, fx: int, fy: int, poisson_depth: int, down_voxel:
|
|
| 136 |
log_txt
|
| 137 |
)
|
| 138 |
except Exception as e:
|
| 139 |
-
|
| 140 |
-
return None, None, None, None, None, None, "\n".join(
|
| 141 |
|
| 142 |
with gr.Blocks(title="Room 3D Reconstruction (GLPN + Open3D)") as demo:
|
| 143 |
-
gr.Markdown("### Room 3D Reconstruction — GLPN → RGB-D → Point Cloud →
|
| 144 |
with gr.Row():
|
| 145 |
with gr.Column():
|
| 146 |
inp = gr.Image(type="pil", label="Input room image")
|
| 147 |
fx = gr.Slider(200, 1200, value=500, step=10, label="fx (px)")
|
| 148 |
fy = gr.Slider(200, 1200, value=500, step=10, label="fy (px)")
|
| 149 |
-
pdepth = gr.Slider(
|
| 150 |
-
down = gr.Slider(0.0, 0.02, value=0.
|
| 151 |
-
verbose = gr.Checkbox(value=
|
| 152 |
btn = gr.Button("Reconstruct 3D", variant="primary")
|
| 153 |
with gr.Column():
|
| 154 |
depth_img = gr.Image(label="Depth preview", interactive=False)
|
|
@@ -158,7 +211,7 @@ with gr.Blocks(title="Room 3D Reconstruction (GLPN + Open3D)") as demo:
|
|
| 158 |
depth_file = gr.File(label="Download depth (PNG)")
|
| 159 |
pcd_file = gr.File(label="Download point cloud (.ply)")
|
| 160 |
mesh_file = gr.File(label="Download mesh (.ply)")
|
| 161 |
-
logs = gr.Textbox(label="Logs", max_lines=
|
| 162 |
|
| 163 |
btn.click(
|
| 164 |
run,
|
|
|
|
| 1 |
+
import os, time, datetime
|
| 2 |
from pathlib import Path
|
| 3 |
import numpy as np
|
| 4 |
from PIL import Image
|
|
|
|
| 7 |
from transformers import GLPNForDepthEstimation, GLPNImageProcessor
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
+
# ---- Keep Spaces stable (CPU-safe; quiet threading) ----
|
| 11 |
+
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
|
| 12 |
+
os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
|
| 13 |
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
| 14 |
+
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
|
| 15 |
|
|
|
|
| 16 |
DEVICE = torch.device(
|
| 17 |
"cuda" if torch.cuda.is_available()
|
| 18 |
else ("mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu")
|
| 19 |
)
|
| 20 |
|
|
|
|
| 21 |
PROC = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
|
| 22 |
MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE).eval()
|
| 23 |
|
| 24 |
+
# Import Open3D (fail fast if missing)
|
| 25 |
+
import open3d as o3d
|
| 26 |
+
|
| 27 |
+
OUT_DIR = Path("outputs")
|
| 28 |
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 29 |
|
| 30 |
def _resize_h480_m32(pil_img: Image.Image):
|
| 31 |
h = min(pil_img.height, 480)
|
|
|
|
| 33 |
w = max(1, int(h * pil_img.width / max(1, pil_img.height)))
|
| 34 |
return pil_img.resize((w, h), Image.BILINEAR)
|
| 35 |
|
| 36 |
+
def _infer_depth(pil_img: Image.Image, logs):
|
| 37 |
t0 = time.time()
|
| 38 |
img_proc = _resize_h480_m32(pil_img)
|
| 39 |
inputs = PROC(images=img_proc, return_tensors="pt")
|
|
|
|
| 43 |
pred = getattr(out, "predicted_depth", None)
|
| 44 |
if pred is None:
|
| 45 |
pred = out[0] if isinstance(out, (tuple, list)) else next(iter(out.values()))
|
| 46 |
+
if pred.dim() == 3:
|
| 47 |
pred = pred.unsqueeze(1)
|
|
|
|
| 48 |
pred = torch.nn.functional.interpolate(
|
| 49 |
pred, size=pil_img.size[::-1], mode="bicubic", align_corners=False
|
| 50 |
).squeeze(0).squeeze(0)
|
| 51 |
depth = pred.detach().cpu().float().numpy()
|
| 52 |
+
logs.append(f"[Depth] shape={depth.shape} device={DEVICE} time={time.time()-t0:.2f}s")
|
| 53 |
+
return depth
|
| 54 |
|
| 55 |
def _depth_preview(depth: np.ndarray) -> Image.Image:
|
| 56 |
d = depth - float(depth.min())
|
| 57 |
+
rng = float(d.max()) + 1e-8
|
| 58 |
+
d /= rng
|
| 59 |
return Image.fromarray((d * 255).astype(np.uint8))
|
| 60 |
|
| 61 |
def _to_u16(depth: np.ndarray) -> np.ndarray:
|
|
|
|
| 76 |
intr.set_intrinsics(w, h, fx, fy, w/2.0, h/2.0)
|
| 77 |
return rgbd, intr
|
| 78 |
|
| 79 |
+
def _make_pointcloud(rgbd, intr, logs, nb_neighbors=20, std_ratio=20.0, down_voxel=0.0):
|
| 80 |
+
t0 = time.time()
|
| 81 |
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
# If extremely sparse, skip aggressive outlier removal
|
| 84 |
+
if np.asarray(pcd.points).shape[0] > 500:
|
| 85 |
+
_, ind = pcd.remove_statistical_outlier(nb_neighbors=nb_neighbors, std_ratio=std_ratio)
|
| 86 |
+
if len(ind) > 50: # keep at least some points
|
| 87 |
+
pcd = pcd.select_by_index(ind)
|
| 88 |
+
else:
|
| 89 |
+
logs.append("[PCD] Outlier removal would drop almost all points; skipping.")
|
| 90 |
+
else:
|
| 91 |
+
logs.append("[PCD] Too few points for outlier removal; skipping.")
|
| 92 |
+
|
| 93 |
+
if down_voxel and down_voxel > 0:
|
| 94 |
+
pcd = pcd.voxel_down_sample(voxel_size=float(down_voxel))
|
| 95 |
+
|
| 96 |
+
npts = np.asarray(pcd.points).shape[0]
|
| 97 |
+
logs.append(f"[PCD] points={npts} time={time.time()-t0:.2f}s (voxel={down_voxel})")
|
| 98 |
+
return pcd
|
| 99 |
+
|
| 100 |
+
def _make_mesh_with_fallback(pcd, poisson_depth, logs, method="poisson"):
|
| 101 |
+
t0 = time.time()
|
| 102 |
+
if np.asarray(pcd.points).shape[0] < 30:
|
| 103 |
+
raise RuntimeError("Point cloud too small for meshing.")
|
| 104 |
+
|
| 105 |
pcd.estimate_normals()
|
| 106 |
pcd.orient_normals_to_align_with_direction()
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
if method == "poisson":
|
| 110 |
+
# Many Open3D wheels don’t support n_threads kwarg; don’t pass it.
|
| 111 |
+
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
|
| 112 |
+
pcd, depth=int(poisson_depth)
|
| 113 |
+
)[0]
|
| 114 |
+
used = "Poisson"
|
| 115 |
+
else:
|
| 116 |
+
# Ball-Pivoting fallback
|
| 117 |
+
distances = pcd.compute_nearest_neighbor_distance()
|
| 118 |
+
if not distances:
|
| 119 |
+
raise RuntimeError("No neighbor distances for Ball-Pivoting.")
|
| 120 |
+
avg = float(sum(distances)) / len(distances)
|
| 121 |
+
radii = [avg * r for r in (1.5, 2.5)]
|
| 122 |
+
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_ball_pivoting(
|
| 123 |
+
pcd, o3d.utility.DoubleVector(radii)
|
| 124 |
+
)
|
| 125 |
+
used = "Ball-Pivoting"
|
| 126 |
+
|
| 127 |
+
# Post clean & orient
|
| 128 |
+
mesh.remove_duplicated_vertices()
|
| 129 |
+
mesh.remove_duplicated_triangles()
|
| 130 |
+
mesh.remove_degenerate_triangles()
|
| 131 |
+
mesh.remove_non_manifold_edges()
|
| 132 |
+
R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
|
| 133 |
+
mesh.rotate(R, center=(0, 0, 0))
|
| 134 |
+
|
| 135 |
+
v = np.asarray(mesh.vertices).shape[0]
|
| 136 |
+
f = np.asarray(mesh.triangles).shape[0]
|
| 137 |
+
logs.append(f"[Mesh] method={used} V={v} F={f} time={time.time()-t0:.2f}s")
|
| 138 |
+
return mesh
|
| 139 |
+
except Exception as e:
|
| 140 |
+
if method == "poisson":
|
| 141 |
+
logs.append(f"[Mesh] Poisson failed: {e}. Falling back to Ball-Pivoting…")
|
| 142 |
+
return _make_mesh_with_fallback(pcd, poisson_depth, logs, method="ball")
|
| 143 |
+
raise
|
| 144 |
+
|
| 145 |
+
def _timestamped(name: str, ext: str) -> Path:
|
| 146 |
+
ts = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
| 147 |
+
return OUT_DIR / f"{name}_{ts}.{ext}"
|
| 148 |
|
| 149 |
def run(pil_img: Image.Image, fx: int, fy: int, poisson_depth: int, down_voxel: float, verbose: bool):
|
| 150 |
+
logs = []
|
| 151 |
try:
|
| 152 |
+
if pil_img is None:
|
| 153 |
+
return None, None, None, None, None, None, "Upload an image."
|
| 154 |
+
|
| 155 |
# 1) Depth
|
| 156 |
+
depth = _infer_depth(pil_img, logs)
|
|
|
|
| 157 |
depth_prev = _depth_preview(depth)
|
| 158 |
|
| 159 |
+
# 2) RGBD + intrinsics
|
| 160 |
rgb = np.array(pil_img.convert("RGB"))
|
| 161 |
depth_u16 = _to_u16(depth)
|
| 162 |
rgbd, intr = _rgbd_intrinsics(rgb, depth_u16, fx, fy)
|
| 163 |
|
| 164 |
# 3) Point cloud
|
| 165 |
+
pcd = _make_pointcloud(rgbd, intr, logs, down_voxel=down_voxel)
|
| 166 |
+
if np.asarray(pcd.points).shape[0] < 30:
|
| 167 |
+
raise RuntimeError("Got < 30 points after filtering; try lowering outlier removal or increasing voxel size to 0.")
|
| 168 |
+
|
| 169 |
+
# 4) Mesh with fallback
|
| 170 |
+
mesh = _make_mesh_with_fallback(pcd, poisson_depth, logs)
|
| 171 |
+
|
| 172 |
+
# 5) Save artifacts (persistent + timestamped)
|
| 173 |
+
depth_png = _timestamped("depth_preview", "png")
|
| 174 |
+
pcd_ply = _timestamped("pointcloud", "ply")
|
| 175 |
+
mesh_ply = _timestamped("mesh", "ply")
|
|
|
|
|
|
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
depth_prev.save(depth_png)
|
| 178 |
o3d.io.write_point_cloud(str(pcd_ply), pcd, write_ascii=False)
|
| 179 |
o3d.io.write_triangle_mesh(str(mesh_ply), mesh, write_ascii=False)
|
| 180 |
|
| 181 |
+
log_txt = "\n".join(logs if verbose else logs[-20:])
|
| 182 |
return (
|
| 183 |
depth_prev, # preview image
|
| 184 |
str(pcd_ply), # for Model3D viewer
|
|
|
|
| 189 |
log_txt
|
| 190 |
)
|
| 191 |
except Exception as e:
|
| 192 |
+
logs.append(f"[ERROR] {type(e).__name__}: {e}")
|
| 193 |
+
return None, None, None, None, None, None, "\n".join(logs)
|
| 194 |
|
| 195 |
with gr.Blocks(title="Room 3D Reconstruction (GLPN + Open3D)") as demo:
|
| 196 |
+
gr.Markdown("### Room 3D Reconstruction — GLPN → RGB-D → Point Cloud → Mesh\nUpload a room photo. If Poisson fails, we auto-fallback to Ball-Pivoting.")
|
| 197 |
with gr.Row():
|
| 198 |
with gr.Column():
|
| 199 |
inp = gr.Image(type="pil", label="Input room image")
|
| 200 |
fx = gr.Slider(200, 1200, value=500, step=10, label="fx (px)")
|
| 201 |
fy = gr.Slider(200, 1200, value=500, step=10, label="fy (px)")
|
| 202 |
+
pdepth = gr.Slider(6, 11, value=9, step=1, label="Poisson depth (lower = faster/stabler)")
|
| 203 |
+
down = gr.Slider(0.0, 0.02, value=0.01, step=0.002, label="Voxel downsample (m)")
|
| 204 |
+
verbose = gr.Checkbox(value=True, label="Verbose logs")
|
| 205 |
btn = gr.Button("Reconstruct 3D", variant="primary")
|
| 206 |
with gr.Column():
|
| 207 |
depth_img = gr.Image(label="Depth preview", interactive=False)
|
|
|
|
| 211 |
depth_file = gr.File(label="Download depth (PNG)")
|
| 212 |
pcd_file = gr.File(label="Download point cloud (.ply)")
|
| 213 |
mesh_file = gr.File(label="Download mesh (.ply)")
|
| 214 |
+
logs = gr.Textbox(label="Logs", max_lines=48, lines=20)
|
| 215 |
|
| 216 |
btn.click(
|
| 217 |
run,
|