Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -89,12 +89,14 @@ high_level_config = {
|
|
| 89 |
"resolution": 518,
|
| 90 |
}
|
| 91 |
|
| 92 |
-
# GroundingDINO and SAM Configuration
|
| 93 |
-
GROUNDING_DINO_MODEL_ID = "IDEA-Research/grounding-dino-tiny"
|
| 94 |
GROUNDING_DINO_BOX_THRESHOLD = 0.25
|
| 95 |
GROUNDING_DINO_TEXT_THRESHOLD = 0.2
|
| 96 |
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
|
| 99 |
DEFAULT_TEXT_PROMPT = "chair . table . sofa . bed . desk . cabinet"
|
| 100 |
|
|
@@ -154,8 +156,8 @@ sam_predictor = None
|
|
| 154 |
# Model Loading Functions
|
| 155 |
# ============================================================================
|
| 156 |
|
| 157 |
-
def load_grounding_dino_model(device):
|
| 158 |
-
"""Load GroundingDINO model from HuggingFace"""
|
| 159 |
global grounding_dino_model, grounding_dino_processor
|
| 160 |
|
| 161 |
if grounding_dino_model is not None:
|
|
@@ -165,13 +167,15 @@ def load_grounding_dino_model(device):
|
|
| 165 |
try:
|
| 166 |
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
|
| 167 |
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
grounding_dino_processor = AutoProcessor.from_pretrained(GROUNDING_DINO_MODEL_ID)
|
| 170 |
grounding_dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(
|
| 171 |
GROUNDING_DINO_MODEL_ID
|
| 172 |
-
).to(
|
| 173 |
|
| 174 |
-
print("✅ GroundingDINO loaded successfully")
|
| 175 |
|
| 176 |
except Exception as e:
|
| 177 |
print(f"❌ GroundingDINO loading failed: {e}")
|
|
@@ -179,8 +183,8 @@ def load_grounding_dino_model(device):
|
|
| 179 |
traceback.print_exc()
|
| 180 |
|
| 181 |
|
| 182 |
-
def load_sam_model(device):
|
| 183 |
-
"""Load
|
| 184 |
global sam_predictor
|
| 185 |
|
| 186 |
if sam_predictor is not None:
|
|
@@ -190,8 +194,12 @@ def load_sam_model(device):
|
|
| 190 |
try:
|
| 191 |
from transformers import SamModel, SamProcessor
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
sam_processor = SamProcessor.from_pretrained(SAM_MODEL_ID)
|
| 196 |
|
| 197 |
# Wrap in a predictor-like interface
|
|
@@ -210,20 +218,22 @@ def load_sam_model(device):
|
|
| 210 |
self.image = Image.fromarray((image * 255).astype(np.uint8))
|
| 211 |
|
| 212 |
def predict(self, box, multimask_output=False):
|
| 213 |
-
"""Predict mask from box"""
|
| 214 |
inputs = self.processor(
|
| 215 |
self.image,
|
| 216 |
input_boxes=[[[box]]],
|
| 217 |
return_tensors="pt"
|
| 218 |
-
)
|
|
|
|
|
|
|
| 219 |
|
| 220 |
with torch.no_grad():
|
| 221 |
outputs = self.model(**inputs)
|
| 222 |
|
| 223 |
masks = self.processor.image_processor.post_process_masks(
|
| 224 |
outputs.pred_masks.cpu(),
|
| 225 |
-
inputs["original_sizes"].cpu(),
|
| 226 |
-
inputs["reshaped_input_sizes"].cpu()
|
| 227 |
)[0].squeeze().numpy()
|
| 228 |
|
| 229 |
if len(masks.shape) == 2:
|
|
@@ -231,8 +241,8 @@ def load_sam_model(device):
|
|
| 231 |
|
| 232 |
return masks, None, None
|
| 233 |
|
| 234 |
-
sam_predictor = SAMPredictor(sam_model, sam_processor,
|
| 235 |
-
print("✅
|
| 236 |
|
| 237 |
except Exception as e:
|
| 238 |
print(f"❌ SAM loading failed: {e}")
|
|
@@ -261,14 +271,14 @@ def generate_distinct_colors(n):
|
|
| 261 |
return colors
|
| 262 |
|
| 263 |
|
| 264 |
-
def run_grounding_dino_detection(image_np, text_prompt, device):
|
| 265 |
-
"""Run GroundingDINO detection"""
|
| 266 |
if grounding_dino_model is None or grounding_dino_processor is None:
|
| 267 |
print("⚠️ GroundingDINO not loaded")
|
| 268 |
return []
|
| 269 |
|
| 270 |
try:
|
| 271 |
-
print(f"🔍 GroundingDINO detection: {text_prompt}")
|
| 272 |
|
| 273 |
# Convert to PIL Image
|
| 274 |
if image_np.dtype == np.uint8:
|
|
@@ -276,9 +286,10 @@ def run_grounding_dino_detection(image_np, text_prompt, device):
|
|
| 276 |
else:
|
| 277 |
pil_image = Image.fromarray((image_np * 255).astype(np.uint8))
|
| 278 |
|
| 279 |
-
# Preprocess
|
|
|
|
| 280 |
inputs = grounding_dino_processor(images=pil_image, text=text_prompt, return_tensors="pt")
|
| 281 |
-
inputs = {k: v.to(
|
| 282 |
|
| 283 |
# Inference
|
| 284 |
with torch.no_grad():
|
|
@@ -705,11 +716,11 @@ def run_model(
|
|
| 705 |
|
| 706 |
model.eval()
|
| 707 |
|
| 708 |
-
# Load segmentation models if enabled
|
| 709 |
if enable_segmentation:
|
| 710 |
-
progress(0.1, desc="🎯
|
| 711 |
-
load_grounding_dino_model(
|
| 712 |
-
load_sam_model(
|
| 713 |
|
| 714 |
# Load images
|
| 715 |
progress(0.15, desc="📷 加载图片...")
|
|
@@ -800,7 +811,7 @@ def run_model(
|
|
| 800 |
else:
|
| 801 |
ref_image_np = ref_image
|
| 802 |
|
| 803 |
-
detections = run_grounding_dino_detection(ref_image_np, text_prompt,
|
| 804 |
|
| 805 |
if len(detections) > 0:
|
| 806 |
boxes = [d['bbox'] for d in detections]
|
|
@@ -820,6 +831,9 @@ def run_model(
|
|
| 820 |
all_view_masks.append([])
|
| 821 |
|
| 822 |
# Match objects across views
|
|
|
|
|
|
|
|
|
|
| 823 |
if any(len(dets) > 0 for dets in all_view_detections):
|
| 824 |
progress(0.85, desc="🔗 匹配跨视图物体...")
|
| 825 |
object_id_map, unique_objects = match_objects_across_views(all_view_detections)
|
|
@@ -830,6 +844,13 @@ def run_model(
|
|
| 830 |
processed_data, all_view_detections, all_view_masks,
|
| 831 |
object_id_map, unique_objects, target_dir
|
| 832 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
|
| 834 |
# Cleanup
|
| 835 |
progress(0.95, desc="🧹 清理内存...")
|
|
@@ -1174,6 +1195,13 @@ def gradio_demo(
|
|
| 1174 |
)
|
| 1175 |
|
| 1176 |
progress(1.0, desc="✅ 全部完成!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1177 |
|
| 1178 |
return (
|
| 1179 |
glbfile,
|
|
@@ -1652,6 +1680,37 @@ CUSTOM_CSS = GRADIO_CSS + """
|
|
| 1652 |
.tab-content {
|
| 1653 |
min-height: 550px !important;
|
| 1654 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1655 |
"""
|
| 1656 |
|
| 1657 |
with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与物体分割") as demo:
|
|
@@ -1659,6 +1718,9 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1659 |
processed_data_state = gr.State(value=None)
|
| 1660 |
measure_points_state = gr.State(value=[])
|
| 1661 |
|
|
|
|
|
|
|
|
|
|
| 1662 |
# 顶部标题
|
| 1663 |
gr.HTML("""
|
| 1664 |
<div style="text-align: center; margin: 20px 0;">
|
|
@@ -1674,13 +1736,23 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1674 |
with gr.Column(scale=1, min_width=300):
|
| 1675 |
gr.Markdown("### 📤 输入")
|
| 1676 |
|
|
|
|
| 1677 |
unified_upload = gr.File(
|
| 1678 |
file_count="multiple",
|
| 1679 |
-
label="
|
| 1680 |
interactive=True,
|
| 1681 |
file_types=["image", "video"],
|
| 1682 |
)
|
| 1683 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1684 |
with gr.Row():
|
| 1685 |
s_time_interval = gr.Slider(
|
| 1686 |
minimum=0.1, maximum=5.0, value=1.0, step=0.1,
|
|
@@ -1701,9 +1773,8 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1701 |
show_download_button=True, object_fit="contain", preview=True
|
| 1702 |
)
|
| 1703 |
|
| 1704 |
-
|
| 1705 |
clear_uploads_btn = gr.ClearButton(
|
| 1706 |
-
[unified_upload, image_gallery],
|
| 1707 |
value="清空上传",
|
| 1708 |
variant="secondary",
|
| 1709 |
size="sm",
|
|
@@ -1712,7 +1783,7 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1712 |
with gr.Row():
|
| 1713 |
submit_btn = gr.Button("🚀 开始重建", variant="primary", scale=2)
|
| 1714 |
clear_btn = gr.ClearButton(
|
| 1715 |
-
[unified_upload, target_dir_output, image_gallery],
|
| 1716 |
value="🗑️ 清空", scale=1
|
| 1717 |
)
|
| 1718 |
|
|
@@ -1803,8 +1874,9 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1803 |
)
|
| 1804 |
|
| 1805 |
gr.Markdown("#### 分割参数")
|
|
|
|
| 1806 |
enable_segmentation = gr.Checkbox(
|
| 1807 |
-
label="启用语义分割", value=False
|
| 1808 |
)
|
| 1809 |
|
| 1810 |
text_prompt = gr.Textbox(
|
|
@@ -1818,6 +1890,8 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1818 |
with gr.Row():
|
| 1819 |
detect_all_btn = gr.Button("🔍 检测所有", size="sm")
|
| 1820 |
restore_default_btn = gr.Button("↻ 默认", size="sm")
|
|
|
|
|
|
|
| 1821 |
|
| 1822 |
# 示例场景(可折叠)
|
| 1823 |
with gr.Accordion("🖼️ 示例场景", open=False):
|
|
@@ -1873,6 +1947,18 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1873 |
"✅ 上传完成,点击「开始重建」进行 3D 处理",
|
| 1874 |
)
|
| 1875 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1876 |
def show_resample_button(files):
|
| 1877 |
"""仅当上传的文件包含视频时显示重新采样按钮"""
|
| 1878 |
if not files:
|
|
@@ -1955,6 +2041,13 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 1955 |
outputs=[resample_btn],
|
| 1956 |
)
|
| 1957 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1958 |
# 滑块改变时显示重新采样按钮(仅当已上传文件时)
|
| 1959 |
s_time_interval.change(
|
| 1960 |
fn=show_resample_button,
|
|
@@ -2076,12 +2169,16 @@ with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与
|
|
| 2076 |
)
|
| 2077 |
|
| 2078 |
# 启动信息
|
| 2079 |
-
print("\n" + "="*
|
| 2080 |
print("🚀 MapAnything V2 - 3D重建与物体分割")
|
| 2081 |
-
print("="*
|
| 2082 |
print("📊 核心技术: 自适应DBSCAN聚类 + 多视图融合")
|
| 2083 |
print(f"🔧 质量控制: 置信度≥{MIN_DETECTION_CONFIDENCE} | 面积≥{MIN_MASK_AREA}px")
|
| 2084 |
print(f"🎯 聚类半径: 沙发{DBSCAN_EPS_CONFIG['sofa']}m | 桌子{DBSCAN_EPS_CONFIG['table']}m | 窗户{DBSCAN_EPS_CONFIG['window']}m | 默认{DBSCAN_EPS_CONFIG['default']}m")
|
| 2085 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2086 |
|
| 2087 |
demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)
|
|
|
|
| 89 |
"resolution": 518,
|
| 90 |
}
|
| 91 |
|
| 92 |
+
# GroundingDINO and SAM Configuration (CPU-friendly versions)
|
| 93 |
+
GROUNDING_DINO_MODEL_ID = "IDEA-Research/grounding-dino-tiny" # 已经是tiny版本
|
| 94 |
GROUNDING_DINO_BOX_THRESHOLD = 0.25
|
| 95 |
GROUNDING_DINO_TEXT_THRESHOLD = 0.2
|
| 96 |
|
| 97 |
+
# 使用 MobileSAM (CPU友好,比SAM-huge快60倍,只有10MB)
|
| 98 |
+
SAM_MODEL_ID = "dhkim2810/MobileSAM" # 轻量级SAM,适合CPU
|
| 99 |
+
USE_MOBILE_SAM = True # 标记使用MobileSAM
|
| 100 |
|
| 101 |
DEFAULT_TEXT_PROMPT = "chair . table . sofa . bed . desk . cabinet"
|
| 102 |
|
|
|
|
| 156 |
# Model Loading Functions
|
| 157 |
# ============================================================================
|
| 158 |
|
| 159 |
+
def load_grounding_dino_model(device="cpu"):
|
| 160 |
+
"""Load GroundingDINO model from HuggingFace (CPU优化)"""
|
| 161 |
global grounding_dino_model, grounding_dino_processor
|
| 162 |
|
| 163 |
if grounding_dino_model is not None:
|
|
|
|
| 167 |
try:
|
| 168 |
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
|
| 169 |
|
| 170 |
+
# 强制使用 CPU 进行分割(节省 GPU 资源)
|
| 171 |
+
seg_device = "cpu"
|
| 172 |
+
print(f"📥 Loading GroundingDINO from HuggingFace: {GROUNDING_DINO_MODEL_ID} (使用 {seg_device.upper()})")
|
| 173 |
grounding_dino_processor = AutoProcessor.from_pretrained(GROUNDING_DINO_MODEL_ID)
|
| 174 |
grounding_dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(
|
| 175 |
GROUNDING_DINO_MODEL_ID
|
| 176 |
+
).to(seg_device).eval()
|
| 177 |
|
| 178 |
+
print(f"✅ GroundingDINO loaded successfully on {seg_device.upper()}")
|
| 179 |
|
| 180 |
except Exception as e:
|
| 181 |
print(f"❌ GroundingDINO loading failed: {e}")
|
|
|
|
| 183 |
traceback.print_exc()
|
| 184 |
|
| 185 |
|
| 186 |
+
def load_sam_model(device="cpu"):
|
| 187 |
+
"""Load MobileSAM model from HuggingFace (CPU优化,比SAM快60倍)"""
|
| 188 |
global sam_predictor
|
| 189 |
|
| 190 |
if sam_predictor is not None:
|
|
|
|
| 194 |
try:
|
| 195 |
from transformers import SamModel, SamProcessor
|
| 196 |
|
| 197 |
+
# 强制使用 CPU 进行分割(MobileSAM 专为移动设备/CPU优化)
|
| 198 |
+
seg_device = "cpu"
|
| 199 |
+
print(f"📥 Loading MobileSAM from HuggingFace: {SAM_MODEL_ID} (使用 {seg_device.upper()})")
|
| 200 |
+
print(f" 💡 MobileSAM 是轻量级版本,比 SAM-huge 快60倍,只有10MB,适合CPU运行")
|
| 201 |
+
|
| 202 |
+
sam_model = SamModel.from_pretrained(SAM_MODEL_ID).to(seg_device).eval()
|
| 203 |
sam_processor = SamProcessor.from_pretrained(SAM_MODEL_ID)
|
| 204 |
|
| 205 |
# Wrap in a predictor-like interface
|
|
|
|
| 218 |
self.image = Image.fromarray((image * 255).astype(np.uint8))
|
| 219 |
|
| 220 |
def predict(self, box, multimask_output=False):
|
| 221 |
+
"""Predict mask from box (CPU优化)"""
|
| 222 |
inputs = self.processor(
|
| 223 |
self.image,
|
| 224 |
input_boxes=[[[box]]],
|
| 225 |
return_tensors="pt"
|
| 226 |
+
)
|
| 227 |
+
# 确保在CPU上运行
|
| 228 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
|
| 229 |
|
| 230 |
with torch.no_grad():
|
| 231 |
outputs = self.model(**inputs)
|
| 232 |
|
| 233 |
masks = self.processor.image_processor.post_process_masks(
|
| 234 |
outputs.pred_masks.cpu(),
|
| 235 |
+
inputs["original_sizes"].cpu() if "original_sizes" in inputs else outputs.pred_masks.new_tensor([[self.image.height, self.image.width]]),
|
| 236 |
+
inputs["reshaped_input_sizes"].cpu() if "reshaped_input_sizes" in inputs else outputs.pred_masks.new_tensor([[self.image.height, self.image.width]])
|
| 237 |
)[0].squeeze().numpy()
|
| 238 |
|
| 239 |
if len(masks.shape) == 2:
|
|
|
|
| 241 |
|
| 242 |
return masks, None, None
|
| 243 |
|
| 244 |
+
sam_predictor = SAMPredictor(sam_model, sam_processor, seg_device)
|
| 245 |
+
print(f"✅ MobileSAM loaded successfully on {seg_device.upper()}")
|
| 246 |
|
| 247 |
except Exception as e:
|
| 248 |
print(f"❌ SAM loading failed: {e}")
|
|
|
|
| 271 |
return colors
|
| 272 |
|
| 273 |
|
| 274 |
+
def run_grounding_dino_detection(image_np, text_prompt, device="cpu"):
|
| 275 |
+
"""Run GroundingDINO detection (CPU优化)"""
|
| 276 |
if grounding_dino_model is None or grounding_dino_processor is None:
|
| 277 |
print("⚠️ GroundingDINO not loaded")
|
| 278 |
return []
|
| 279 |
|
| 280 |
try:
|
| 281 |
+
print(f"🔍 GroundingDINO detection (CPU): {text_prompt}")
|
| 282 |
|
| 283 |
# Convert to PIL Image
|
| 284 |
if image_np.dtype == np.uint8:
|
|
|
|
| 286 |
else:
|
| 287 |
pil_image = Image.fromarray((image_np * 255).astype(np.uint8))
|
| 288 |
|
| 289 |
+
# Preprocess - 强制使用CPU
|
| 290 |
+
seg_device = "cpu"
|
| 291 |
inputs = grounding_dino_processor(images=pil_image, text=text_prompt, return_tensors="pt")
|
| 292 |
+
inputs = {k: v.to(seg_device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
|
| 293 |
|
| 294 |
# Inference
|
| 295 |
with torch.no_grad():
|
|
|
|
| 716 |
|
| 717 |
model.eval()
|
| 718 |
|
| 719 |
+
# Load segmentation models if enabled (使用CPU节省GPU资源)
|
| 720 |
if enable_segmentation:
|
| 721 |
+
progress(0.1, desc="🎯 加载分割模型 (CPU)...")
|
| 722 |
+
load_grounding_dino_model("cpu") # 分割使用CPU
|
| 723 |
+
load_sam_model("cpu") # MobileSAM在CPU上运行良好
|
| 724 |
|
| 725 |
# Load images
|
| 726 |
progress(0.15, desc="📷 加载图片...")
|
|
|
|
| 811 |
else:
|
| 812 |
ref_image_np = ref_image
|
| 813 |
|
| 814 |
+
detections = run_grounding_dino_detection(ref_image_np, text_prompt, "cpu") # 使用CPU进行检测
|
| 815 |
|
| 816 |
if len(detections) > 0:
|
| 817 |
boxes = [d['bbox'] for d in detections]
|
|
|
|
| 831 |
all_view_masks.append([])
|
| 832 |
|
| 833 |
# Match objects across views
|
| 834 |
+
total_detections = sum(len(dets) for dets in all_view_detections)
|
| 835 |
+
print(f"\n📊 总检测数: {total_detections}")
|
| 836 |
+
|
| 837 |
if any(len(dets) > 0 for dets in all_view_detections):
|
| 838 |
progress(0.85, desc="🔗 匹配跨视图物体...")
|
| 839 |
object_id_map, unique_objects = match_objects_across_views(all_view_detections)
|
|
|
|
| 844 |
processed_data, all_view_detections, all_view_masks,
|
| 845 |
object_id_map, unique_objects, target_dir
|
| 846 |
)
|
| 847 |
+
|
| 848 |
+
if segmented_glb:
|
| 849 |
+
print(f"✅ 分割3D模型已生成: {segmented_glb}")
|
| 850 |
+
else:
|
| 851 |
+
print(f"⚠️ 分割3D模型生成失败")
|
| 852 |
+
else:
|
| 853 |
+
print(f"⚠️ 未检测到任何物体,无法生成分割模型")
|
| 854 |
|
| 855 |
# Cleanup
|
| 856 |
progress(0.95, desc="🧹 清理内存...")
|
|
|
|
| 1195 |
)
|
| 1196 |
|
| 1197 |
progress(1.0, desc="✅ 全部完成!")
|
| 1198 |
+
|
| 1199 |
+
# 添加分割状态信息
|
| 1200 |
+
if enable_segmentation:
|
| 1201 |
+
if segmented_glb:
|
| 1202 |
+
log_msg += f"\n🎨 分割模型已生成"
|
| 1203 |
+
else:
|
| 1204 |
+
log_msg += f"\n⚠️ 未检测到物体,无分割模型"
|
| 1205 |
|
| 1206 |
return (
|
| 1207 |
glbfile,
|
|
|
|
| 1680 |
.tab-content {
|
| 1681 |
min-height: 550px !important;
|
| 1682 |
}
|
| 1683 |
+
|
| 1684 |
+
/* 增强文件上传区域 */
|
| 1685 |
+
.file-upload-enhanced {
|
| 1686 |
+
position: relative;
|
| 1687 |
+
}
|
| 1688 |
+
"""
|
| 1689 |
+
|
| 1690 |
+
# JavaScript for paste support
|
| 1691 |
+
PASTE_JS = """
|
| 1692 |
+
<script>
|
| 1693 |
+
// 添加粘贴板支持
|
| 1694 |
+
document.addEventListener('paste', function(e) {
|
| 1695 |
+
const items = e.clipboardData.items;
|
| 1696 |
+
for (let i = 0; i < items.length; i++) {
|
| 1697 |
+
if (items[i].type.indexOf('image') !== -1) {
|
| 1698 |
+
const blob = items[i].getAsFile();
|
| 1699 |
+
const fileInput = document.querySelector('input[type="file"][multiple]');
|
| 1700 |
+
if (fileInput) {
|
| 1701 |
+
const dataTransfer = new DataTransfer();
|
| 1702 |
+
dataTransfer.items.add(blob);
|
| 1703 |
+
fileInput.files = dataTransfer.files;
|
| 1704 |
+
fileInput.dispatchEvent(new Event('change', { bubbles: true }));
|
| 1705 |
+
console.log('✅ 图片已从剪贴板粘贴');
|
| 1706 |
+
}
|
| 1707 |
+
}
|
| 1708 |
+
}
|
| 1709 |
+
});
|
| 1710 |
+
|
| 1711 |
+
// 添加提示信息
|
| 1712 |
+
console.log('💡 粘贴板功能已启用:使用 Ctrl+V 可直接粘贴截图');
|
| 1713 |
+
</script>
|
| 1714 |
"""
|
| 1715 |
|
| 1716 |
with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="MapAnything V2 - 3D重建与物体分割") as demo:
|
|
|
|
| 1718 |
processed_data_state = gr.State(value=None)
|
| 1719 |
measure_points_state = gr.State(value=[])
|
| 1720 |
|
| 1721 |
+
# 添加粘贴板支持的 JavaScript
|
| 1722 |
+
gr.HTML(PASTE_JS)
|
| 1723 |
+
|
| 1724 |
# 顶部标题
|
| 1725 |
gr.HTML("""
|
| 1726 |
<div style="text-align: center; margin: 20px 0;">
|
|
|
|
| 1736 |
with gr.Column(scale=1, min_width=300):
|
| 1737 |
gr.Markdown("### 📤 输入")
|
| 1738 |
|
| 1739 |
+
# 统一上传组件(支持文件、拖拽、粘贴板)
|
| 1740 |
unified_upload = gr.File(
|
| 1741 |
file_count="multiple",
|
| 1742 |
+
label="上传视频或图片(支持拖拽、粘贴Ctrl+V📋)",
|
| 1743 |
interactive=True,
|
| 1744 |
file_types=["image", "video"],
|
| 1745 |
)
|
| 1746 |
|
| 1747 |
+
# 摄像头输入(折叠式)
|
| 1748 |
+
with gr.Accordion("📷 使用摄像头拍照", open=False):
|
| 1749 |
+
camera_input = gr.Image(
|
| 1750 |
+
label="拍照后自动添加",
|
| 1751 |
+
sources=["webcam"],
|
| 1752 |
+
type="filepath",
|
| 1753 |
+
interactive=True,
|
| 1754 |
+
)
|
| 1755 |
+
|
| 1756 |
with gr.Row():
|
| 1757 |
s_time_interval = gr.Slider(
|
| 1758 |
minimum=0.1, maximum=5.0, value=1.0, step=0.1,
|
|
|
|
| 1773 |
show_download_button=True, object_fit="contain", preview=True
|
| 1774 |
)
|
| 1775 |
|
|
|
|
| 1776 |
clear_uploads_btn = gr.ClearButton(
|
| 1777 |
+
[unified_upload, camera_input, image_gallery],
|
| 1778 |
value="清空上传",
|
| 1779 |
variant="secondary",
|
| 1780 |
size="sm",
|
|
|
|
| 1783 |
with gr.Row():
|
| 1784 |
submit_btn = gr.Button("🚀 开始重建", variant="primary", scale=2)
|
| 1785 |
clear_btn = gr.ClearButton(
|
| 1786 |
+
[unified_upload, camera_input, target_dir_output, image_gallery],
|
| 1787 |
value="🗑️ 清空", scale=1
|
| 1788 |
)
|
| 1789 |
|
|
|
|
| 1874 |
)
|
| 1875 |
|
| 1876 |
gr.Markdown("#### 分割参数")
|
| 1877 |
+
gr.Markdown("💡 **说明**: 分割使用 CPU 运行(MobileSAM轻量级模型),不占用GPU资源")
|
| 1878 |
enable_segmentation = gr.Checkbox(
|
| 1879 |
+
label="启用语义分割 (CPU)", value=False
|
| 1880 |
)
|
| 1881 |
|
| 1882 |
text_prompt = gr.Textbox(
|
|
|
|
| 1890 |
with gr.Row():
|
| 1891 |
detect_all_btn = gr.Button("🔍 检测所有", size="sm")
|
| 1892 |
restore_default_btn = gr.Button("↻ 默认", size="sm")
|
| 1893 |
+
|
| 1894 |
+
gr.Markdown("📌 **提示**: 启用后会在「分割3D」标签页显示彩色分割模型")
|
| 1895 |
|
| 1896 |
# 示例场景(可折叠)
|
| 1897 |
with gr.Accordion("🖼️ 示例场景", open=False):
|
|
|
|
| 1947 |
"✅ 上传完成,点击「开始重建」进行 3D 处理",
|
| 1948 |
)
|
| 1949 |
|
| 1950 |
+
# 处理摄像头拍照
|
| 1951 |
+
def update_gallery_on_camera(image):
|
| 1952 |
+
if image is None:
|
| 1953 |
+
return None, None, None
|
| 1954 |
+
# 将单张图片包装成列表
|
| 1955 |
+
target_dir, image_paths = handle_uploads([image], 1.0)
|
| 1956 |
+
return (
|
| 1957 |
+
target_dir,
|
| 1958 |
+
image_paths,
|
| 1959 |
+
"✅ 摄像头照片已添加,点击「开始重建」进行 3D 处理",
|
| 1960 |
+
)
|
| 1961 |
+
|
| 1962 |
def show_resample_button(files):
|
| 1963 |
"""仅当上传的文件包含视频时显示重新采样按钮"""
|
| 1964 |
if not files:
|
|
|
|
| 2041 |
outputs=[resample_btn],
|
| 2042 |
)
|
| 2043 |
|
| 2044 |
+
# 摄像头拍照事件
|
| 2045 |
+
camera_input.change(
|
| 2046 |
+
fn=update_gallery_on_camera,
|
| 2047 |
+
inputs=[camera_input],
|
| 2048 |
+
outputs=[target_dir_output, image_gallery, log_output]
|
| 2049 |
+
)
|
| 2050 |
+
|
| 2051 |
# 滑块改变时显示重新采样按钮(仅当已上传文件时)
|
| 2052 |
s_time_interval.change(
|
| 2053 |
fn=show_resample_button,
|
|
|
|
| 2169 |
)
|
| 2170 |
|
| 2171 |
# 启动信息
|
| 2172 |
+
print("\n" + "="*70)
|
| 2173 |
print("🚀 MapAnything V2 - 3D重建与物体分割")
|
| 2174 |
+
print("="*70)
|
| 2175 |
print("📊 核心技术: 自适应DBSCAN聚类 + 多视图融合")
|
| 2176 |
print(f"🔧 质量控制: 置信度≥{MIN_DETECTION_CONFIDENCE} | 面积≥{MIN_MASK_AREA}px")
|
| 2177 |
print(f"🎯 聚类半径: 沙发{DBSCAN_EPS_CONFIG['sofa']}m | 桌子{DBSCAN_EPS_CONFIG['table']}m | 窗户{DBSCAN_EPS_CONFIG['window']}m | 默认{DBSCAN_EPS_CONFIG['default']}m")
|
| 2178 |
+
print("\n💡 分割配置 (CPU优化):")
|
| 2179 |
+
print(f" - 检测模型: {GROUNDING_DINO_MODEL_ID} (CPU)")
|
| 2180 |
+
print(f" - 分割模型: {SAM_MODEL_ID} (MobileSAM, 10MB, CPU)")
|
| 2181 |
+
print(f" - 运行设备: CPU (不占用GPU资源,适合分离部署)")
|
| 2182 |
+
print("="*70 + "\n")
|
| 2183 |
|
| 2184 |
demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)
|