Spaces:

osherr
/

Prior2DSM

Sleeping

App Files Files Community

osherr commited on 22 days ago

Commit

c6dd5dc

verified ·

1 Parent(s): 6029516

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +173 -36

src/streamlit_app.py CHANGED Viewed

@@ -29,7 +29,12 @@ STRIDE = 4
 # Keep LoRA normalization from your local code
 IMAGENET_MEAN = (0.430, 0.411, 0.296)
-IMAGENET_STD  = (0.213, 0.156, 0.143)
 # ============================================================
 # HELPERS
@@ -58,6 +63,65 @@ def preview_rgb(rgb_raw):
         rgb = rgb / (np.percentile(rgb, 98) + 1e-6)
     return np.clip(rgb, 0, 1)
 # ============================================================
 # MODELS
 # ============================================================
@@ -138,9 +202,9 @@ def get_lora_params(model):
             params.extend(list(module.B.parameters()))
     return params
 # ============================================================
 # MODEL LOADING
-# Uses old app paths / loading logic
 # ============================================================
 @st.cache_resource
 def load_models(repo_id, dav_file, dino_file):
@@ -196,15 +260,14 @@ def load_models(repo_id, dav_file, dino_file):
     return dav_model, dino_model
 # ============================================================
 # DEPTH ANYTHING INFERENCE
-# Use exactly the old app style: raw DA output, no disparity2depth
 # ============================================================
 @st.cache_data(show_spinner=False)
 def run_dav_inference(_dav, rgb_raw, h_f, w_f):
     img_448 = cv2.resize(rgb_raw.transpose(1, 2, 0), (448, 448))
-    # Use torch.tensor for HF stability
     dav_in = torch.tensor(img_448, device=DEVICE).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     with torch.no_grad():
@@ -217,17 +280,17 @@ def run_dav_inference(_dav, rgb_raw, h_f, w_f):
             size=(h_f, w_f),
             mode="bilinear",
             align_corners=False
-        ).squeeze(1)  # [1,H,W]
     raw_depth_map = raw_depth[0].detach().float().cpu().numpy()
-    # optional normalized version for more stable LoRA fitting / plotting
     valid = np.isfinite(raw_depth_map)
     raw_depth_01 = normalize_01(raw_depth_map, valid)
     raw_depth_01[~valid] = np.nan
     return raw_depth_map, raw_depth_01
 # ============================================================
 # MAIN LORA PIPELINE
 # ============================================================
@@ -253,7 +316,6 @@ def run_lora_pipeline(
     anchor_mask = anchor_mask_cpu.to(DEVICE)
     prior_gpu = prior_raw_t.to(DEVICE)
-    # make fresh copy each run
     dino = copy.deepcopy(dino_base)
     dino = inject_lora(dino, r=lora_r, alpha=lora_alpha).to(DEVICE).train()
@@ -273,8 +335,8 @@ def run_lora_pipeline(
     Hp, Wp = H // PATCH_SIZE, W // PATCH_SIZE
     prior_p = F.interpolate(prior_gpu.view(1, 1, H, W), size=(Hp, Wp), mode="bilinear").flatten()
-    rel_p   = F.interpolate(rel_cpu.view(1, 1, H, W), size=(Hp, Wp), mode="bilinear").flatten()
-    mask_p  = F.interpolate(anchor_mask.float().view(1, 1, H, W), size=(Hp, Wp), mode="area").flatten() > 0.5
     loss_hist = []
     prog = st.progress(0, text="Running LoRA TTO...")
@@ -297,7 +359,6 @@ def run_lora_pipeline(
     prog.empty()
-    # dense stride-4 inference
     dino.eval()
     mlp_head.eval()
@@ -323,11 +384,11 @@ def run_lora_pipeline(
                 sb_local = mlp_head(t).t().reshape(2, hc // p, wc // p)
-                sb_acc[:, dy//stride:dy//stride + (hc//p)*(p//stride):p//stride,
-                          dx//stride:dx//stride + (wc//p)*(p//stride):p//stride] += sb_local
-                cnt_acc[:, dy//stride:dy//stride + (hc//p)*(p//stride):p//stride,
-                           dx//stride:dx//stride + (wc//p)*(p//stride):p//stride] += 1
         sb_dense = sb_acc / (cnt_acc + 1e-8)
         offset = (p - (p // 2)) // stride + 1
@@ -350,15 +411,54 @@ def run_lora_pipeline(
     return final_dsm, loss_hist, anchor_mask_cpu.cpu().numpy()
 # ============================================================
 # UI
 # ============================================================
 st.title("Prior2DSM | LoRA")
 with st.sidebar:
     st.header("📂 Data")
-    rgb_file = st.file_uploader("RGB Image", type=["tif", "tiff"])
-    prior_file = st.file_uploader("LiDAR Prior", type=["tif", "tiff"])
     st.divider()
     st.write("#### LoRA / TTO")
@@ -367,20 +467,29 @@ with st.sidebar:
     tto_steps = st.slider("TTO steps", 10, 300, 100, step=10)
     tto_lr = st.select_slider("TTO LR", options=[1e-4, 3e-4, 1e-3, 3e-3], value=1e-3)
-if rgb_file and prior_file:
     dav_m, dino_base = load_models(
         repo_id="osherr/Prior2DSM",
         dav_file="depth_anything_v2_vitl.pth",
         dino_file="dinov3_vitl16_pretrain_sat493m-eadcf0ff.pth"
     )
-    with rasterio.open(BytesIO(rgb_file.read())) as src:
-        rgb_raw = src.read([1, 2, 3])
-        h_f, w_f = src.height, src.width
-    with rasterio.open(BytesIO(prior_file.read())) as src:
-        prior_raw = src.read(1).astype(np.float32)
-        prior_meta = src.meta.copy()
     with st.spinner("Generating relative depth with Depth Anything V2..."):
         rel_depth_map, rel_depth_01 = run_dav_inference(dav_m, rgb_raw, h_f, w_f)
@@ -388,15 +497,45 @@ if rgb_file and prior_file:
     st.subheader("1. ROI Selection")
     viz_rgb = preview_rgb(rgb_raw)
     col_img, col_ctrl = st.columns([1.2, 0.8])
     with col_ctrl:
-        x_center = st.slider("X center", 0, w_f - 1, w_f // 2)
-        y_center = st.slider("Y center", 0, h_f - 1, h_f // 2)
-        bbox_size = st.slider("BBox Size (px)", 50, min(400, min(h_f, w_f)), 200)
-        use_normalized_rel = st.checkbox("Use normalized relative depth for LoRA", value=True)
-        run_btn = st.button("🚀 Run LoRA Pipeline", type="primary")
     half_s = bbox_size // 2
     x1, x2 = max(0, x_center - half_s), min(w_f, x_center + half_s)
@@ -406,12 +545,8 @@ if rgb_file and prior_file:
     bbox_mask[y1:y2, x1:x2] = True
     with col_img:
-        fig_roi, ax_roi = plt.subplots(figsize=(6, 6))
-        ax_roi.imshow(viz_rgb)
-        ax_roi.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor="red", lw=2))
-        ax_roi.set_title("ROI Preview")
-        ax_roi.axis("off")
-        st.pyplot(fig_roi)
     if run_btn:
         rel_for_lora = rel_depth_01 if use_normalized_rel else rel_depth_map
@@ -512,4 +647,6 @@ if rgb_file and prior_file:
             "Download Georeferenced DSM",
             out_buf.getvalue(),
             file_name="lora_refined_dsm_georef.tif"
-        )

 # Keep LoRA normalization from your local code
 IMAGENET_MEAN = (0.430, 0.411, 0.296)
+IMAGENET_STD = (0.213, 0.156, 0.143)
+# Example files inside the HF Space repo
+EXAMPLE_RGB_FILENAME = "examples/example_rgb.tif"
+EXAMPLE_PRIOR_FILENAME = "examples/example_prior.tif"
 # ============================================================
 # HELPERS
         rgb = rgb / (np.percentile(rgb, 98) + 1e-6)
     return np.clip(rgb, 0, 1)
+def draw_roi_preview(viz_rgb, x1, y1, x2, y2):
+    preview = (np.clip(viz_rgb, 0, 1) * 255).astype(np.uint8).copy()
+    cv2.rectangle(preview, (x1, y1), (x2, y2), (255, 0, 0), 2)
+    return preview
+@st.cache_data(show_spinner=False)
+def load_tiff_from_hf(repo_id, filename, repo_type="space"):
+    return hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type)
+def read_rgb_tiff(path_or_bytes):
+    if isinstance(path_or_bytes, (str, os.PathLike)):
+        with rasterio.open(path_or_bytes) as src:
+            rgb_raw = src.read([1, 2, 3])
+            h_f, w_f = src.height, src.width
+            meta = src.meta.copy()
+    else:
+        with rasterio.open(BytesIO(path_or_bytes)) as src:
+            rgb_raw = src.read([1, 2, 3])
+            h_f, w_f = src.height, src.width
+            meta = src.meta.copy()
+    return rgb_raw, h_f, w_f, meta
+def read_prior_tiff(path_or_bytes):
+    if isinstance(path_or_bytes, (str, os.PathLike)):
+        with rasterio.open(path_or_bytes) as src:
+            prior_raw = src.read(1).astype(np.float32)
+            meta = src.meta.copy()
+    else:
+        with rasterio.open(BytesIO(path_or_bytes)) as src:
+            prior_raw = src.read(1).astype(np.float32)
+            meta = src.meta.copy()
+    return prior_raw, meta
+def init_roi_state(h_f, w_f):
+    if "x_center" not in st.session_state:
+        st.session_state["x_center"] = w_f // 2
+    if "y_center" not in st.session_state:
+        st.session_state["y_center"] = h_f // 2
+    if "bbox_size" not in st.session_state:
+        st.session_state["bbox_size"] = min(200, min(h_f, w_f))
+    if "use_normalized_rel" not in st.session_state:
+        st.session_state["use_normalized_rel"] = True
+    if "loaded_shape" not in st.session_state:
+        st.session_state["loaded_shape"] = (h_f, w_f)
+    prev_shape = st.session_state.get("loaded_shape", None)
+    if prev_shape != (h_f, w_f):
+        st.session_state["x_center"] = w_f // 2
+        st.session_state["y_center"] = h_f // 2
+        st.session_state["bbox_size"] = min(200, min(h_f, w_f))
+        st.session_state["use_normalized_rel"] = True
+        st.session_state["loaded_shape"] = (h_f, w_f)
 # ============================================================
 # MODELS
 # ============================================================
             params.extend(list(module.B.parameters()))
     return params
 # ============================================================
 # MODEL LOADING
 # ============================================================
 @st.cache_resource
 def load_models(repo_id, dav_file, dino_file):
     return dav_model, dino_model
 # ============================================================
 # DEPTH ANYTHING INFERENCE
 # ============================================================
 @st.cache_data(show_spinner=False)
 def run_dav_inference(_dav, rgb_raw, h_f, w_f):
     img_448 = cv2.resize(rgb_raw.transpose(1, 2, 0), (448, 448))
     dav_in = torch.tensor(img_448, device=DEVICE).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     with torch.no_grad():
             size=(h_f, w_f),
             mode="bilinear",
             align_corners=False
+        ).squeeze(1)
     raw_depth_map = raw_depth[0].detach().float().cpu().numpy()
     valid = np.isfinite(raw_depth_map)
     raw_depth_01 = normalize_01(raw_depth_map, valid)
     raw_depth_01[~valid] = np.nan
     return raw_depth_map, raw_depth_01
 # ============================================================
 # MAIN LORA PIPELINE
 # ============================================================
     anchor_mask = anchor_mask_cpu.to(DEVICE)
     prior_gpu = prior_raw_t.to(DEVICE)
     dino = copy.deepcopy(dino_base)
     dino = inject_lora(dino, r=lora_r, alpha=lora_alpha).to(DEVICE).train()
     Hp, Wp = H // PATCH_SIZE, W // PATCH_SIZE
     prior_p = F.interpolate(prior_gpu.view(1, 1, H, W), size=(Hp, Wp), mode="bilinear").flatten()
+    rel_p = F.interpolate(rel_cpu.view(1, 1, H, W), size=(Hp, Wp), mode="bilinear").flatten()
+    mask_p = F.interpolate(anchor_mask.float().view(1, 1, H, W), size=(Hp, Wp), mode="area").flatten() > 0.5
     loss_hist = []
     prog = st.progress(0, text="Running LoRA TTO...")
     prog.empty()
     dino.eval()
     mlp_head.eval()
                 sb_local = mlp_head(t).t().reshape(2, hc // p, wc // p)
+                sb_acc[:, dy // stride:dy // stride + (hc // p) * (p // stride):p // stride,
+                          dx // stride:dx // stride + (wc // p) * (p // stride):p // stride] += sb_local
+                cnt_acc[:, dy // stride:dy // stride + (hc // p) * (p // stride):p // stride,
+                           dx // stride:dx // stride + (wc // p) * (p // stride):p // stride] += 1
         sb_dense = sb_acc / (cnt_acc + 1e-8)
         offset = (p - (p // 2)) // stride + 1
     return final_dsm, loss_hist, anchor_mask_cpu.cpu().numpy()
 # ============================================================
 # UI
 # ============================================================
 st.title("Prior2DSM | LoRA")
+st.markdown(
+    f"""
+**Example TIFFs**
+- [Download example RGB TIFF](https://huggingface.co/spaces/osherr/Prior2DSM/resolve/main/{EXAMPLE_RGB_FILENAME})
+- [Download example Prior TIFF](https://huggingface.co/spaces/osherr/Prior2DSM/resolve/main/{EXAMPLE_PRIOR_FILENAME})
+"""
+)
 with st.sidebar:
     st.header("📂 Data")
+    data_mode = st.radio(
+        "Data source",
+        ["Upload TIFFs", "Use example TIFFs"],
+        index=0
+    )
+    rgb_file = None
+    prior_file = None
+    rgb_example_path = None
+    prior_example_path = None
+    if data_mode == "Upload TIFFs":
+        rgb_file = st.file_uploader("RGB Image", type=["tif", "tiff"])
+        prior_file = st.file_uploader("LiDAR Prior", type=["tif", "tiff"])
+    else:
+        st.caption("Load demo RGB/Prior TIFFs from the Hugging Face Space.")
+        if st.button("Load example TIFFs"):
+            st.session_state["use_examples"] = True
+        if st.session_state.get("use_examples", False):
+            rgb_example_path = load_tiff_from_hf(
+                repo_id="osherr/Prior2DSM",
+                filename=EXAMPLE_RGB_FILENAME,
+                repo_type="space"
+            )
+            prior_example_path = load_tiff_from_hf(
+                repo_id="osherr/Prior2DSM",
+                filename=EXAMPLE_PRIOR_FILENAME,
+                repo_type="space"
+            )
+            st.success("Example TIFFs loaded.")
     st.divider()
     st.write("#### LoRA / TTO")
     tto_steps = st.slider("TTO steps", 10, 300, 100, step=10)
     tto_lr = st.select_slider("TTO LR", options=[1e-4, 3e-4, 1e-3, 3e-3], value=1e-3)
+has_uploaded = (rgb_file is not None and prior_file is not None)
+has_examples = (
+    data_mode == "Use example TIFFs"
+    and st.session_state.get("use_examples", False)
+    and rgb_example_path is not None
+    and prior_example_path is not None
+)
+if has_uploaded or has_examples:
     dav_m, dino_base = load_models(
         repo_id="osherr/Prior2DSM",
         dav_file="depth_anything_v2_vitl.pth",
         dino_file="dinov3_vitl16_pretrain_sat493m-eadcf0ff.pth"
     )
+    if has_uploaded:
+        rgb_raw, h_f, w_f, _ = read_rgb_tiff(rgb_file.read())
+        prior_raw, prior_meta = read_prior_tiff(prior_file.read())
+    else:
+        rgb_raw, h_f, w_f, _ = read_rgb_tiff(rgb_example_path)
+        prior_raw, prior_meta = read_prior_tiff(prior_example_path)
+    init_roi_state(h_f, w_f)
     with st.spinner("Generating relative depth with Depth Anything V2..."):
         rel_depth_map, rel_depth_01 = run_dav_inference(dav_m, rgb_raw, h_f, w_f)
     st.subheader("1. ROI Selection")
     viz_rgb = preview_rgb(rgb_raw)
     col_img, col_ctrl = st.columns([1.2, 0.8])
     with col_ctrl:
+        with st.form("roi_form", clear_on_submit=False):
+            x_center_form = st.slider(
+                "X center",
+                0, w_f - 1,
+                int(st.session_state["x_center"])
+            )
+            y_center_form = st.slider(
+                "Y center",
+                0, h_f - 1,
+                int(st.session_state["y_center"])
+            )
+            bbox_size_form = st.slider(
+                "BBox Size (px)",
+                50, min(400, min(h_f, w_f)),
+                int(st.session_state["bbox_size"])
+            )
+            use_normalized_rel_form = st.checkbox(
+                "Use normalized relative depth for LoRA",
+                value=bool(st.session_state["use_normalized_rel"])
+            )
+            c1, c2 = st.columns(2)
+            with c1:
+                update_roi = st.form_submit_button("Update ROI")
+            with c2:
+                run_btn = st.form_submit_button("🚀 Run LoRA Pipeline", type="primary")
+        if update_roi or run_btn:
+            st.session_state["x_center"] = x_center_form
+            st.session_state["y_center"] = y_center_form
+            st.session_state["bbox_size"] = bbox_size_form
+            st.session_state["use_normalized_rel"] = use_normalized_rel_form
+    x_center = int(st.session_state["x_center"])
+    y_center = int(st.session_state["y_center"])
+    bbox_size = int(st.session_state["bbox_size"])
+    use_normalized_rel = bool(st.session_state["use_normalized_rel"])
     half_s = bbox_size // 2
     x1, x2 = max(0, x_center - half_s), min(w_f, x_center + half_s)
     bbox_mask[y1:y2, x1:x2] = True
     with col_img:
+        roi_preview = draw_roi_preview(viz_rgb, x1, y1, x2, y2)
+        st.image(roi_preview, caption="ROI Preview", use_container_width=True)
     if run_btn:
         rel_for_lora = rel_depth_01 if use_normalized_rel else rel_depth_map
             "Download Georeferenced DSM",
             out_buf.getvalue(),
             file_name="lora_refined_dsm_georef.tif"
+        )
+else:
+    st.info("Upload RGB and Prior TIFFs, or switch to example TIFFs in the sidebar.")