File size: 7,862 Bytes
05ad083 e4b3e88 05ad083 657723f 05ad083 e4b3e88 05ad083 e4b3e88 05ad083 e4b3e88 05ad083 e4b3e88 05ad083 e4b3e88 05ad083 0e5bb40 05ad083 0e5bb40 05ad083 e4b3e88 bc37f18 05ad083 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
import os
os.environ.setdefault("OMP_NUM_THREADS", "1") # silence libgomp spam on HF
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
from pathlib import Path
import io
import numpy as np
from PIL import Image
import torch
from transformers import GLPNForDepthEstimation, GLPNImageProcessor
import open3d as o3d
import gradio as gr
# ----------------------------
# Device & model (load once)
# ----------------------------
DEVICE = torch.device(
"cuda" if torch.cuda.is_available()
else ("mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu")
)
PROCESSOR = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE).eval()
# ----------------------------
# Helpers (faithful to main.py logic)
# ----------------------------
def _resize_like_main(pil_img: Image.Image, cap_h: int = 480):
"""Mirror your main.py: cap height at 480, then round down to multiple of 32, preserve aspect."""
new_h = min(pil_img.height, cap_h)
new_h -= (new_h % 32)
if new_h < 32:
new_h = 32
new_w = int(new_h * pil_img.width / pil_img.height)
return pil_img.resize((new_w, new_h), Image.BILINEAR), (pil_img.width, pil_img.height)
@torch.inference_mode()
def estimate_depth_glpn(pil_img: Image.Image) -> np.ndarray:
"""
GLPN forward that DOES NOT rely on .post_process_depth()
(fix for your AttributeError). We upsample back to the original size manually.
Returns depth in float32 (larger = farther).
"""
resized, (orig_w, orig_h) = _resize_like_main(pil_img)
inputs = PROCESSOR(images=resized, return_tensors="pt")
for k in inputs:
inputs[k] = inputs[k].to(DEVICE)
outputs = MODEL(**inputs)
pred = outputs.predicted_depth # [B, 1, h, w]
depth = pred[0, 0].float().detach().cpu().numpy() # resized size
# Resize depth back to original image size for downstream Open3D steps
depth_img = Image.fromarray(depth)
depth_full = depth_img.resize((orig_w, orig_h), Image.BILINEAR)
depth_full = np.array(depth_full).astype(np.float32)
return depth_full
def depth_vis(depth: np.ndarray) -> Image.Image:
"""Normalize depth to 0..255 for a PNG preview (like your matplotlib preview)."""
d = depth.copy()
d = d - np.nanmin(d)
maxv = np.nanmax(d)
if maxv <= 0:
maxv = 1.0
d = (255.0 * d / maxv).astype(np.uint8)
return Image.fromarray(d)
def rgbd_from_rgb_depth(rgb: Image.Image, depth_f32: np.ndarray) -> o3d.geometry.RGBDImage:
"""
Create Open3D RGBD using an 8-bit depth *preview* for visualization consistency
(same as your main.py normalization step).
"""
rgb_np = np.array(rgb)
# match your main.py: depth to 0..255 uint8 before feeding create_from_color_and_depth
d8 = (depth_f32 * 255.0 / (depth_f32.max() + 1e-8)).astype(np.uint8)
depth_o3d = o3d.geometry.Image(d8)
color_o3d = o3d.geometry.Image(rgb_np)
rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
color_o3d, depth_o3d, convert_rgb_to_intensity=False
)
return rgbd
def pointcloud_from_rgbd(rgbd: o3d.geometry.RGBDImage, w: int, h: int) -> o3d.geometry.PointCloud:
"""
Reproduce your simple pinhole intrinsics (fx=fy=500, cx=w/2, cy=h/2) and back-project.
"""
K = o3d.camera.PinholeCameraIntrinsic()
K.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, K)
return pcd
def filter_pointcloud(pcd: o3d.geometry.PointCloud):
"""
Statistical outlier removal ~ your 'noise removal' step. Tuned conservatively.
"""
if len(pcd.points) == 0:
return pcd
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
pcd_f = pcd.select_by_index(ind)
pcd_f.estimate_normals(
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.05, max_nn=30)
)
return pcd_f
def poisson_mesh(pcd: o3d.geometry.PointCloud, rotate_up=True) -> o3d.geometry.TriangleMesh:
if len(pcd.points) == 0:
return o3d.geometry.TriangleMesh()
mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=10, n_threads=1
)
# Flip like your main.py (rotate X by pi)
if rotate_up:
R = mesh.get_rotation_matrix_from_xyz((np.pi, 0.0, 0.0))
mesh.rotate(R, center=(0, 0, 0))
mesh.compute_vertex_normals()
return mesh
def o3d_to_ply_bytes(geom: o3d.geometry.Geometry) -> bytes:
"""Serialize an Open3D geometry to .ply bytes (so Gradio can offer a download)."""
tmp = Path("tmp_out.ply")
if isinstance(geom, o3d.geometry.PointCloud):
o3d.io.write_point_cloud(str(tmp), geom)
else:
o3d.io.write_triangle_mesh(str(tmp), geom)
data = tmp.read_bytes()
tmp.unlink(missing_ok=True)
return data
def render_point_count(pcd: o3d.geometry.PointCloud) -> str:
return f"Points: {len(pcd.points):,}"
def render_face_count(mesh: o3d.geometry.TriangleMesh) -> str:
return f"Vertices: {len(mesh.vertices):,} | Triangles: {len(mesh.triangles):,}"
# ----------------------------
# Gradio pipeline
# ----------------------------
def pipeline(image: Image.Image):
logs = []
if image is None:
raise gr.Error("Please upload an image of a room.")
logs.append("Step 1 — Loaded image.")
image = image.convert("RGB")
w, h = image.size
# Depth
logs.append("Step 2 — Estimating depth with GLPN (vinvino02/glpn-nyu)…")
depth = estimate_depth_glpn(image)
depth_preview = depth_vis(depth)
# RGBD
logs.append("Step 3 — Creating RGBD image…")
rgbd = rgbd_from_rgb_depth(image, depth)
# Point cloud
logs.append("Step 4 — Back-projecting to point cloud…")
pcd = pointcloud_from_rgbd(rgbd, w, h)
logs.append("Step 5 — Filtering noise & estimating normals…")
pcd_f = filter_pointcloud(pcd)
# Mesh
logs.append("Step 6 — Poisson surface reconstruction…")
mesh = poisson_mesh(pcd_f, rotate_up=True)
# Prepare downloads
logs.append("Step 7 — Preparing downloads…")
pcd_bytes = o3d_to_ply_bytes(pcd_f)
mesh_bytes = o3d_to_ply_bytes(mesh)
# Small text stats
pcd_stats = render_point_count(pcd_f)
mesh_stats = render_face_count(mesh)
logs.append("Done.")
return (
image, # RGB preview
depth_preview, # Depth preview
pcd_stats, # point cloud stats
mesh_stats, # mesh stats
("point_cloud.ply", pcd_bytes),
("mesh.ply", mesh_bytes),
"\n".join(logs),
)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 2D → 3D (GLPN → RGBD → Point Cloud → Poisson Mesh)\nUpload a single image to reproduce your main.py workflow.")
with gr.Row():
with gr.Column():
inp = gr.Image(type="pil", label="Input Image")
run = gr.Button("Reconstruct 3D", variant="primary")
log_box = gr.Textbox(label="Log", lines=14, interactive=False)
with gr.Column():
rgb_out = gr.Image(label="RGB Preview", interactive=False)
depth_out = gr.Image(label="Depth Preview (8-bit normalized)", interactive=False)
pc_txt = gr.Markdown()
mesh_txt = gr.Markdown()
pc_file = gr.File(label="Download Point Cloud (.ply)")
mesh_file = gr.File(label="Download Mesh (.ply)")
run.click(
fn=pipeline,
inputs=[inp],
outputs=[rgb_out, depth_out, pc_txt, mesh_txt, pc_file, mesh_file, log_box],
api_name="reconstruct",
)
# IMPORTANT: older Spaces error came from using unsupported args like concurrency_count.
demo.queue() # default queue works across Gradio 4.x
demo.launch()
|