phiph commited on Nov 27, 2025

Commit

7382c66

verified ·

1 Parent(s): 6dcfb17

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +20 -0
.gitignore +6 -0
DA-2-repo/LICENSE +201 -0
DA-2-repo/README.md +103 -0
DA-2-repo/app.py +165 -0
DA-2-repo/assets/badges/icon2.png +0 -0
DA-2-repo/assets/badges/teaser.jpg +3 -0
DA-2-repo/assets/demos/a0.png +3 -0
DA-2-repo/assets/demos/a1.png +3 -0
DA-2-repo/assets/demos/a10.png +3 -0
DA-2-repo/assets/demos/a11.png +3 -0
DA-2-repo/assets/demos/a2.png +3 -0
DA-2-repo/assets/demos/a3.png +3 -0
DA-2-repo/assets/demos/a4.png +3 -0
DA-2-repo/assets/demos/a5.png +3 -0
DA-2-repo/assets/demos/a6.png +3 -0
DA-2-repo/assets/demos/a7.png +3 -0
DA-2-repo/assets/demos/a8.png +3 -0
DA-2-repo/assets/demos/a9.png +3 -0
DA-2-repo/assets/demos/b0.png +3 -0
DA-2-repo/assets/demos/b1.png +3 -0
DA-2-repo/assets/demos/b2.png +3 -0
DA-2-repo/assets/demos/b3.png +3 -0
DA-2-repo/assets/demos/b4.png +3 -0
DA-2-repo/assets/demos/b5.png +3 -0
DA-2-repo/assets/masks/b0.png +0 -0
DA-2-repo/assets/masks/b1.png +0 -0
DA-2-repo/assets/masks/b2.png +0 -0
DA-2-repo/assets/masks/b3.png +0 -0
DA-2-repo/assets/masks/b4.png +0 -0
DA-2-repo/assets/masks/b5.png +0 -0
DA-2-repo/configs/accelerate/0.yaml +16 -0
DA-2-repo/configs/accelerate/1.yaml +16 -0
DA-2-repo/configs/accelerate/2.yaml +16 -0
DA-2-repo/configs/accelerate/3.yaml +16 -0
DA-2-repo/configs/accelerate/4.yaml +16 -0
DA-2-repo/configs/accelerate/5.yaml +16 -0
DA-2-repo/configs/accelerate/6.yaml +16 -0
DA-2-repo/configs/accelerate/7.yaml +16 -0
DA-2-repo/configs/eval.json +76 -0
DA-2-repo/configs/infer.json +39 -0
DA-2-repo/eval.py +29 -0
DA-2-repo/eval.sh +7 -0
DA-2-repo/eval/__init__.py +0 -0
DA-2-repo/eval/datasets/__init__.py +35 -0
DA-2-repo/eval/datasets/base_depth_dataset.py +268 -0
DA-2-repo/eval/datasets/matterport3d_dataset.py +25 -0
DA-2-repo/eval/datasets/panosuncg_dataset.py +26 -0
DA-2-repo/eval/datasets/splits/2d3ds.txt +0 -0
DA-2-repo/eval/datasets/splits/matterport3d.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/badges/teaser.jpg filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a0.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a1.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a10.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a11.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a2.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a3.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a4.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a5.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a6.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a7.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a8.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/a9.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/b0.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/b1.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/b2.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/b3.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/b4.png filter=lfs diff=lfs merge=lfs -text
+DA-2-repo/assets/demos/b5.png filter=lfs diff=lfs merge=lfs -text
+model.onnx.data filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+venv/
+__pycache__/
+*.pyc
+.DS_Store
+*.safetensors
+.vscode/

DA-2-repo/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

DA-2-repo/README.md ADDED Viewed

	@@ -0,0 +1,103 @@

+# <img src="assets/badges/icon2.png" alt="lotus" style="height:1.2em; vertical-align:bottom;"/>&nbsp;DA<sup>2</sup>: Depth Anything in Any Direction
+[![Page](https://img.shields.io/badge/Project-Website-pink?logo=googlechrome&logoColor=white)](https://depth-any-in-any-dir.github.io/)
+[![Paper](https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=white)](http://arxiv.org/abs/2509.26618)
+[![HuggingFace Demo](https://img.shields.io/badge/🤗%20HuggingFace-Demo%20-yellow)](https://huggingface.co/spaces/haodongli/DA-2)
+[![Data](https://img.shields.io/badge/📂%20HuggingFace-Data-green)](https://huggingface.co/datasets/haodongli/DA-2)
+[![Slides](https://img.shields.io/badge/Google-Slides-blue?logo=slideshare&logoColor=white)](https://docs.google.com/presentation/d/1QUonqLuYGEh0qcqY72pbTXsZimINlyN4rOogy7qX4GY/edit?usp=sharing)
+[![BibTeX](https://img.shields.io/badge/BibTeX-grey?logo=googlescholar&logoColor=white)](https://depth-any-in-any-dir.github.io/bibtex.txt)
+[Haodong Li](https://haodong2000.github.io/)<sup>123&sect;</sup>,
+[Wangguangdong Zheng](https://wangguandongzheng.github.io/)<sup>1</sup>,
+[Jing He](https://jingheya.github.io/)<sup>3</sup>,
+[Yuhao Liu](https://yuhaoliu7456.github.io/)<sup>1</sup>,
+[Xin Lin](https://linxin0.github.io/)<sup>2</sup>,
+[Xin Yang](https://abnervictor.github.io/2023/06/12/Academic-Self-Intro.html)<sup>34</sup>,<br>
+[Ying-Cong Chen](https://www.yingcong.me/)<sup>34&#9993;</sup>,
+[Chunchao Guo]()<sup>1&#9993;</sup>
+<span class="author-block"><sup>1</sup>Tencent Hunyuan</span>
+<span class="author-block"><sup>2</sup>UC San Diego</span>
+<span class="author-block"><sup>3</sup>HKUST(GZ)</span>
+<span class="author-block"><sup>4</sup>HKUST</span><br>
+<span class="author-block">
+    <sup>&sect;</sup>Work primarily done during an internship at Tencent Hunyuan.
+    <sup>&#9993;</sup>Corresponding author.
+</span>
+![teaser](assets/badges/teaser.jpg)
+<strong>DA<sup>2</sup> predicts dense, scale-invariant distance from a single 360&deg; panorama in an end-to-end manner, with remarkable geometric fidelity and strong zero-shot generalization.</strong>
+## 📢 News
+- 2025-10-10 The curated panoramic data is released on [huggingface](https://huggingface.co/datasets/haodongli/DA-2)!
+- 2025-10-10 The evaluation code and the [testing data](https://huggingface.co/datasets/haodongli/DA-2-Evaluation) are released!
+- 2025-10-04 The 🤗Huggingface Gradio demo ([online](https://huggingface.co/spaces/haodongli/DA-2) and [local](https://github.com/EnVision-Research/DA-2?tab=readme-ov-file#-gradio-demo)) are released!
+- 2025-10-04 The inference code and the [model](https://huggingface.co/haodongli/DA-2) are released!
+- 2025-10-01 [Paper](https://arxiv.org/abs/2509.26618) released on arXiv!
+## 🛠️ Setup
+> This installation was tested on: Ubuntu 20.04 LTS, Python 3.12, CUDA 12.2, NVIDIA GeForce RTX 3090.
+1. Clone the repository:
+```
+git clone https://github.com/EnVision-Research/DA-2.git
+cd DA-2
+```
+2. Install dependencies using conda:
+```
+conda create -n da-2 python=3.12 -y
+conda activate da-2
+pip install -e src
+```
+> For macOS users: Please remove `xformers==0.0.28.post2` (line 16) from `src/pyproject.toml` before `pip install -e src`, as [xFormers does not support macOS](https://github.com/facebookresearch/xformers/issues/775#issuecomment-1611284979).
+## 🤗 Gradio Demo
+1. Online demo: [Hugggingface Space](https://huggingface.co/spaces/haodongli/DA-2)
+2. Local demo:
+```
+python app.py
+```
+## 🕹️ Inference
+> We've pre-uploaded the cases appeared in the [project page](https://depth-any-in-any-dir.github.io/). So you can proceed directly to step 3.
+1. Images are placed in a directory, e.g., `assets/demos`.
+2. (Optional) Masks (e.g., sky masks for outdoor images) in another directory, e.g., `assets/masks`. The filenames under both directories should be consistent.
+3. Run the inference command:
+```
+sh infer.sh
+```
+4. The visualized distance and normal maps will be saved at `output/infer/vis_all.png`. The projected 3D point clouds will be saved at `output/infer/3dpc`.
+## 🚗 Evaluation
+1. Download the evaluation datasets from [huggingface](https://huggingface.co/datasets/haodongli/DA-2-Evaluation):
+```
+cd [YOUR_DATA_DIR]
+huggingface-cli login
+hf download --repo-type dataset haodongli/DA-2-Evaluation --local-dir [YOUR_DATA_DIR]
+```
+2. Unzip the downloaded datasets:
+```
+tar -zxvf [DATA_NAME].tar.gz
+```
+3. Set the `datasets_dir` (line 20) in `configs/eval.json` with `YOUR_DATA_DIR`.
+4. Run the evaluation command:
+```
+sh eval.sh
+```
+5. The results will be saved at `output/eval`.
+## 🎓 Citation
+If you find our work useful in your research, please consider citing our paper🌹:
+```bibtex
+@article{li2025depth,
+  title={DA$^{2}$: Depth Anything in Any Direction},
+  author={Li, Haodong and Zheng, Wangguangdong and He, Jing and Liu, Yuhao and Lin, Xin and Yang, Xin and Chen, Ying-Cong and Guo, Chunchao},
+  journal={arXiv preprint arXiv:2509.26618},
+  year={2025}
+}
+```
+## 🤝 Acknowledgement
+This implementation is impossible without the awesome contributions of [MoGe](https://wangrc.site/MoGePage/), [UniK3D](https://lpiccinelli-eth.github.io/pub/unik3d/), [Lotus](https://lotus3d.github.io/), [Marigold](https://marigoldmonodepth.github.io/), [DINOv2](https://github.com/facebookresearch/dinov2), [Accelerate](https://github.com/huggingface/accelerate), [Gradio](https://github.com/gradio-app/gradio), [HuggingFace Hub](https://github.com/huggingface/huggingface_hub), and [PyTorch](https://pytorch.org/) to the open-cource community.

DA-2-repo/app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import os
+from accelerate import Accelerator
+from accelerate.logging import get_logger
+from accelerate.utils import (
+    InitProcessGroupKwargs,
+    ProjectConfiguration,
+    set_seed
+)
+import torch
+from contextlib import nullcontext
+import trimesh
+import gradio as gr
+from gradio_imageslider import ImageSlider
+from da2.utils.base import load_config
+from da2.utils.model import load_model
+from da2.utils.io import (
+    read_cv2_image,
+    torch_transform,
+    tensorize
+)
+from da2.utils.vis import colorize_distance
+from da2.utils.d2pc import distance2pointcloud
+from datetime import (
+    timedelta,
+    datetime
+)
+import cv2
+import numpy as np
+last_glb_path = None
+def prepare_to_run_demo():
+    config = load_config('configs/infer.json')
+    kwargs = InitProcessGroupKwargs(timeout=timedelta(seconds=config['accelerator']['timeout']))
+    output_dir = f'output/infer'
+    if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
+    accu_steps = config['accelerator']['accumulation_nsteps']
+    accelerator = Accelerator(
+        gradient_accumulation_steps=accu_steps,
+        mixed_precision=config['accelerator']['mixed_precision'],
+        log_with=config['accelerator']['report_to'],
+        project_config=ProjectConfiguration(project_dir=output_dir),
+        kwargs_handlers=[kwargs]
+    )
+    logger = get_logger(__name__, log_level='INFO')
+    config['env']['logger'] = logger
+    set_seed(config['env']['seed'])
+    return config, accelerator
+def read_mask_demo(mask_path, shape):
+    if mask_path is None:
+        return np.ones(shape[1:]) > 0
+    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
+    mask = mask > 0
+    return mask
+def load_infer_data_demo(image, mask, model_dtype, device):
+    cv2_image = read_cv2_image(image)
+    image = torch_transform(cv2_image)
+    mask = read_mask_demo(mask, image.shape)
+    image = tensorize(image, model_dtype, device)
+    return image, cv2_image, mask
+def ply2glb(ply_path, glb_path):
+    pcd = trimesh.load(ply_path)
+    points = np.asarray(pcd.vertices)
+    colors = np.asarray(pcd.visual.vertex_colors)
+    cloud = trimesh.points.PointCloud(vertices=points, colors=colors)
+    cloud.export(glb_path)
+    os.remove(ply_path)
+def fn(image_path, mask_path):
+    global last_glb_path
+    config, accelerator = prepare_to_run_demo()
+    model = load_model(config, accelerator)
+    image, cv2_image, mask = load_infer_data_demo(image_path, mask_path,
+        model_dtype=config['spherevit']['dtype'], device=accelerator.device)
+    if torch.backends.mps.is_available():
+        autocast_ctx = nullcontext()
+    else:
+        autocast_ctx = torch.autocast(accelerator.device.type)
+    with autocast_ctx, torch.no_grad():
+        distance = model(image).cpu().numpy()[0]
+        if last_glb_path is not None:
+            os.remove(last_glb_path)
+        distance_vis = colorize_distance(distance, mask)
+        save_path = f'cache/tmp_{datetime.now().strftime("%Y%m%d_%H%M%S")}.glb'
+        last_glb_path = save_path
+        normal_image = distance2pointcloud(distance, cv2_image, mask, save_path=save_path.replace('.glb', '.ply'), return_normal=True, save_distance=False)
+        ply2glb(save_path.replace('.glb', '.ply'), save_path)
+        return save_path, [distance_vis, normal_image]
+inputs = [
+    gr.Image(label="Input Image", type="filepath"),
+    gr.Image(label="Input Mask", type="filepath"),
+]
+outputs = [
+    gr.Model3D(clear_color=[0.0, 0.0, 0.0, 0.0],  label="3D Point Cloud"),
+    gr.ImageSlider(
+        label="Output Depth / Normal (transformed from the depth)",
+        type="pil",
+        slider_position=75,
+    )
+]
+demo = gr.Interface(
+    fn=fn,
+    title="DA<sup>2</sup>: <u>D</u>epth <u>A</u>nything in <u>A</u>ny <u>D</u>irection",
+    description="""
+        <p align="center">
+        <a title="Project Page" href="https://depth-any-in-any-dir.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://img.shields.io/badge/Project-Website-pink?logo=googlechrome&logoColor=white">
+        </a>
+        <a title="arXiv" href="http://arxiv.org/abs/2509.26618" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=white">
+        </a>
+        <a title="Github" href="https://github.com/EnVision-Research/DA-2" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://img.shields.io/github/stars/EnVision-Research/DA-2?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="badge-github-stars">
+        </a>
+        <a title="Social" href="https://x.com/_akhaliq/status/1973283687652606411" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
+        </a>
+        <a title="Social" href="https://x.com/haodongli00/status/1973287870317338747" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
+        </a>
+        <br>
+        <strong>Please consider starring <span style="color: orange">&#9733;</span> our <a href="https://github.com/EnVision-Research/DA-2" target="_blank" rel="noopener noreferrer">GitHub Repo</a> if you find this demo useful!</strong>
+        </p>
+        <p><strong>Note: the "Input Mask" is optional, all pixels are assumed to be valid if mask is None.</strong></p>
+    """,
+    inputs=inputs,
+    outputs=outputs,
+    examples=[
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a1.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a2.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a3.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a4.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/b0.png"),
+         os.path.join(os.path.dirname(__file__), "assets/masks/b0.png")],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/b1.png"),
+         os.path.join(os.path.dirname(__file__), "assets/masks/b1.png")],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a5.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a6.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a7.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a8.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/b2.png"),
+         os.path.join(os.path.dirname(__file__), "assets/masks/b2.png")],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/b3.png"),
+         os.path.join(os.path.dirname(__file__), "assets/masks/b3.png")],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a9.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a10.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a11.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/a0.png"), None],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/b4.png"),
+         os.path.join(os.path.dirname(__file__), "assets/masks/b4.png")],
+        [os.path.join(os.path.dirname(__file__), "assets/demos/b5.png"),
+         os.path.join(os.path.dirname(__file__), "assets/masks/b5.png")],
+    ],
+    examples_per_page=20
+)
+demo.launch(
+        server_name="0.0.0.0",
+        server_port=6381,
+)

DA-2-repo/assets/badges/icon2.png ADDED Viewed

DA-2-repo/assets/badges/teaser.jpg ADDED Viewed

Git LFS Details

SHA256: 5c6786218d0a17115e6ed1320434b2b47101290a7e244f2eed1ebe70e4822464
Pointer size: 132 Bytes
Size of remote file: 1.2 MB

DA-2-repo/assets/demos/a0.png ADDED Viewed

Git LFS Details

SHA256: eedc66f98cf0a949602f691c3eed51511ae520cf8f63674abe542741ba6090b8
Pointer size: 131 Bytes
Size of remote file: 744 kB

DA-2-repo/assets/demos/a1.png ADDED Viewed

Git LFS Details

SHA256: 906f336ab4c6561ee85b9cb883a6aa34cf11289fc86b6a4e4382baed56981aa7
Pointer size: 131 Bytes
Size of remote file: 822 kB

DA-2-repo/assets/demos/a10.png ADDED Viewed

Git LFS Details

SHA256: d6d058aef9322964f5d36de90ab91470e283acab248604bcd488a43c680a9e7d
Pointer size: 131 Bytes
Size of remote file: 882 kB

DA-2-repo/assets/demos/a11.png ADDED Viewed

Git LFS Details

SHA256: 45af8c71b8d44880503b5da1b5f67a0d5638860b9f9149cae7d16a3a3975d090
Pointer size: 131 Bytes
Size of remote file: 848 kB

DA-2-repo/assets/demos/a2.png ADDED Viewed

Git LFS Details

SHA256: 6fa931d70c6220cec0b56a9cdf651f12fa35436d937cd2cf481d10dddb2a114e
Pointer size: 131 Bytes
Size of remote file: 810 kB

DA-2-repo/assets/demos/a3.png ADDED Viewed

Git LFS Details

SHA256: a85573ac5d51a261d82b23475488e769bd9b3e392948e60e6dc73f0c7ace762b
Pointer size: 131 Bytes
Size of remote file: 854 kB

DA-2-repo/assets/demos/a4.png ADDED Viewed

Git LFS Details

SHA256: d0a544ec4b542c59f1fbfaf99f86eb60b4c0dbce7c8e4b1bac9e6e23e889c7ec
Pointer size: 131 Bytes
Size of remote file: 813 kB

DA-2-repo/assets/demos/a5.png ADDED Viewed

Git LFS Details

SHA256: 7e36ed78b74223eae24f8c85f1cdab00d1a3a5b494fec807240cb7d3427fad87
Pointer size: 131 Bytes
Size of remote file: 848 kB

DA-2-repo/assets/demos/a6.png ADDED Viewed

Git LFS Details

SHA256: e48031fcd3e5a84e4ea4513a23e2ec8150f8ec3fbdae1d4b2d51fc67ac588fe6
Pointer size: 131 Bytes
Size of remote file: 818 kB

DA-2-repo/assets/demos/a7.png ADDED Viewed

Git LFS Details

SHA256: 12b99fdddea8eefb6885114bd386fc4fad0484e13c85c88364a43396f9cef3f9
Pointer size: 131 Bytes
Size of remote file: 905 kB

DA-2-repo/assets/demos/a8.png ADDED Viewed

Git LFS Details

SHA256: 5b29df5b6294742acc43d8ce41073b335e98024459273b77d9b943fd3583ac35
Pointer size: 131 Bytes
Size of remote file: 784 kB

DA-2-repo/assets/demos/a9.png ADDED Viewed

Git LFS Details

SHA256: ba92bf3adf1d1b2a775d5b0f895a16876159fc1a43d98328c923fdc994d6e346
Pointer size: 131 Bytes
Size of remote file: 910 kB

DA-2-repo/assets/demos/b0.png ADDED Viewed

Git LFS Details

SHA256: 3b610ae826372778853553810ef0e07e4f91d8507549dc0f5f32eca038348a37
Pointer size: 131 Bytes
Size of remote file: 850 kB

DA-2-repo/assets/demos/b1.png ADDED Viewed

Git LFS Details

SHA256: 2df3207be859cf8524e9a00a76efb606e626ca4cc9dbd81178fe24de43a6b97b
Pointer size: 131 Bytes
Size of remote file: 798 kB

DA-2-repo/assets/demos/b2.png ADDED Viewed

Git LFS Details

SHA256: 790218133cd507f1f9ca65fcdff60f74325df39ebd0df1d5b6e6261a8dfd29a8
Pointer size: 131 Bytes
Size of remote file: 863 kB

DA-2-repo/assets/demos/b3.png ADDED Viewed

Git LFS Details

SHA256: 843b680077e114451285efc6536e811739cbbab07ade423459a5bc24e747455f
Pointer size: 131 Bytes
Size of remote file: 651 kB

DA-2-repo/assets/demos/b4.png ADDED Viewed

Git LFS Details

SHA256: 5615e49fa1bea5ee049a66bbe577d48dd63f441e86a4ae5b225136e7e2295187
Pointer size: 131 Bytes
Size of remote file: 804 kB

DA-2-repo/assets/demos/b5.png ADDED Viewed

Git LFS Details

SHA256: 7957ee9e54dd6b61b74014412ece3de7bbe999ae0c0be41c4d762d62d8352656
Pointer size: 131 Bytes
Size of remote file: 669 kB

DA-2-repo/assets/masks/b0.png ADDED Viewed

DA-2-repo/assets/masks/b1.png ADDED Viewed

DA-2-repo/assets/masks/b2.png ADDED Viewed

DA-2-repo/assets/masks/b3.png ADDED Viewed

DA-2-repo/assets/masks/b4.png ADDED Viewed

DA-2-repo/assets/masks/b5.png ADDED Viewed

DA-2-repo/configs/accelerate/0.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '0'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/accelerate/1.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '1'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/accelerate/2.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '2'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/accelerate/3.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '3'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/accelerate/4.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '4'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/accelerate/5.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '5'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/accelerate/6.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '6'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/accelerate/7.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: 'NO'
+downcast_bf16: 'no'
+gpu_ids: '7'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

DA-2-repo/configs/eval.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+    "env": {
+        "seed": 42,
+        "verbose": true
+    },
+    "accelerator": {
+        "report_to": ["tensorboard"],
+        "mixed_precision": "fp16",
+        "accumulation_nsteps": 4,
+        "timeout": 36000
+    },
+    "inference": {
+        "images": "assets/demos",
+        "masks": "assets/masks",
+        "min_pixels": 580000,
+        "max_pixels": 620000
+    },
+    "evaluation": {
+        "alignment": "median",
+        "datasets_dir": "/home/haodong/data/DA-2/Evaluation",
+        "metric_names": [
+            "abs_relative_difference",
+            "squared_relative_difference",
+            "rmse_linear",
+            "rmse_log",
+            "log10",
+            "delta1_acc",
+            "delta2_acc",
+            "delta3_acc",
+            "i_rmse",
+            "silog_rmse"
+        ],
+        "metric_show": {
+            "abs_relative_difference": "AbsRel",
+            "delta1_acc": "δ_1"
+        },
+        "datasets": {
+            "2d3ds": {
+                "dir": "2D3DS/wo_xyz",
+                "filenames": "eval/datasets/splits/2d3ds.txt",
+                "alignment_max_res": 2048
+            },
+            "matterport3d": {
+                "dir": "Matterport3D",
+                "filenames": "eval/datasets/splits/matterport3d.txt",
+                "alignment_max_res": 2048
+            },
+            "panosuncg": {
+                "dir": "PanoSUNCG/rotated",
+                "filenames": "eval/datasets/splits/panosuncg.txt",
+                "alignment_max_res": 1024
+            }
+        }
+    },
+    "spherevit": {
+        "vit_w_esphere": {
+            "input_dims": [1024, 1024, 1024, 1024],
+            "hidden_dim": 512,
+            "num_heads": 8,
+            "expansion": 4,
+            "num_layers_head": [2, 2, 2],
+            "dropout": 0.0,
+            "layer_scale": 0.0001,
+            "out_dim": 64,
+            "kernel_size": 3,
+            "num_prompt_blocks": 1,
+            "use_norm": false
+        },
+        "sphere": {
+            "width": 1092,
+            "height": 546,
+            "hfov": 6.2832,
+            "vfov": 3.1416
+        }
+    }
+}

DA-2-repo/configs/infer.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+    "env": {
+        "seed": 42,
+        "verbose": true
+    },
+    "accelerator": {
+        "report_to": ["tensorboard"],
+        "mixed_precision": "fp16",
+        "accumulation_nsteps": 4,
+        "timeout": 36000
+    },
+    "inference": {
+        "images": "assets/demos",
+        "masks": "assets/masks",
+        "min_pixels": 580000,
+        "max_pixels": 620000
+    },
+    "spherevit": {
+        "vit_w_esphere": {
+            "input_dims": [1024, 1024, 1024, 1024],
+            "hidden_dim": 512,
+            "num_heads": 8,
+            "expansion": 4,
+            "num_layers_head": [2, 2, 2],
+            "dropout": 0.0,
+            "layer_scale": 0.0001,
+            "out_dim": 64,
+            "kernel_size": 3,
+            "num_prompt_blocks": 1,
+            "use_norm": false
+        },
+        "sphere": {
+            "width": 1092,
+            "height": 546,
+            "hfov": 6.2832,
+            "vfov": 3.1416
+        }
+    }
+}

DA-2-repo/eval.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import torch
+from contextlib import nullcontext
+from tqdm import tqdm
+from da2 import (
+    prepare_to_run,
+    load_model
+)
+from eval.utils import run_evaluation
+def eval(model, config, accelerator, output_dir):
+    model = model.eval()
+    eval_datasets = config['evaluation']['datasets']
+    if accelerator.is_main_process:
+        if torch.backends.mps.is_available():
+            autocast_ctx = nullcontext()
+        else:
+            autocast_ctx = torch.autocast(accelerator.device.type)
+        with autocast_ctx, torch.no_grad():
+            for dataset_name in eval_datasets.keys():
+                metrics = run_evaluation(model, config, dataset_name, output_dir, accelerator.device)
+                for metric_name in config['evaluation']['metric_show']:
+                    config['env']['logger'].info(f"\033[92mEVAL --> {dataset_name}: {config['evaluation']['metric_show'][metric_name]} = {metrics[metric_name]}.\033[0m")
+if __name__ == '__main__':
+    config, accelerator, output_dir = prepare_to_run()
+    model = load_model(config, accelerator)
+    eval(model, config, accelerator, output_dir)

DA-2-repo/eval.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+export CUDA=0
+export CONFIG_PATH="configs/eval.json"
+accelerate launch \
+  --config_file=configs/accelerate/$CUDA.yaml \
+  --mixed_precision="fp16" \
+  --main_process_port="12345" \
+  eval.py --config_path=$CONFIG_PATH

DA-2-repo/eval/__init__.py ADDED Viewed

File without changes

DA-2-repo/eval/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# Authors: Bingxin Ke, Haodong Li
+# Last modified: 2025-05-25
+# Note: Add PanoSUNCGDataset, Matterport3DDataset, Stanford2D3DSDataset for 360° depth (or distance) evaluation.
+import os
+from .base_depth_dataset import BaseDepthDataset, get_pred_name, DatasetMode
+from .stanford2d3ds_dataset import Stanford2D3DSDataset
+from .matterport3d_dataset import Matterport3DDataset
+from .panosuncg_dataset import PanoSUNCGDataset
+dataset_name_class_dict = {
+    "2d3ds": Stanford2D3DSDataset,
+    "matterport3d": Matterport3DDataset,
+    "panosuncg": PanoSUNCGDataset
+}
+def get_dataset(
+    cfg_data_split, dataset_name, base_data_dir: str, mode: DatasetMode, **kwargs
+) -> BaseDepthDataset:
+    if dataset_name in dataset_name_class_dict.keys():
+        dataset_class = dataset_name_class_dict[dataset_name]
+        dataset = dataset_class(
+            mode=mode,
+            filename_ls_path=cfg_data_split['filenames'],
+            dataset_dir=os.path.join(base_data_dir, cfg_data_split['dir']),
+            disp_name=dataset_name,
+            **cfg_data_split,
+            **kwargs,
+        )
+    else:
+        raise NotImplementedError
+    return dataset

DA-2-repo/eval/datasets/base_depth_dataset.py ADDED Viewed

	@@ -0,0 +1,268 @@

+# Author: Bingxin Ke
+# Last modified: 2024-04-15
+import io
+import os
+import random
+import tarfile
+from enum import Enum
+import numpy as np
+import cv2
+import torch
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision.transforms import InterpolationMode, Resize
+class DatasetMode(Enum):
+    RGB_ONLY = "rgb_only"
+    EVAL = "evaluate"
+    TRAIN = "train"
+def read_image_from_tar(tar_obj, img_rel_path):
+    image = tar_obj.extractfile("./" + img_rel_path)
+    image = image.read()
+    image = Image.open(io.BytesIO(image))
+class BaseDepthDataset(Dataset):
+    def __init__(
+        self,
+        mode: DatasetMode,
+        filename_ls_path: str,
+        dataset_dir: str,
+        disp_name: str,
+        min_depth,
+        max_depth,
+        has_filled_depth,
+        name_mode,
+        depth_transform=None,
+        augmentation_args: dict = None,
+        resize_to_hw=None,
+        move_invalid_to_far_plane: bool = True,
+        rgb_transform=lambda x: x / 255.0 * 2 - 1,  #  [0, 255] -> [-1, 1],
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        self.mode = mode
+        # dataset info
+        self.filename_ls_path = filename_ls_path
+        self.dataset_dir = dataset_dir
+        self.disp_name = disp_name
+        self.has_filled_depth = has_filled_depth
+        self.name_mode: DepthFileNameMode = name_mode
+        self.min_depth = min_depth
+        self.max_depth = max_depth
+        # training arguments
+        self.depth_transform = depth_transform
+        self.augm_args = augmentation_args
+        self.resize_to_hw = resize_to_hw
+        self.rgb_transform = rgb_transform
+        self.move_invalid_to_far_plane = move_invalid_to_far_plane
+        # Load filenames
+        with open(self.filename_ls_path, "r") as f:
+            self.filenames = [
+                s.split() for s in f.readlines()
+            ]  # [['rgb.png', 'depth.tif'], [], ...]
+        # Tar dataset
+        self.tar_obj = None
+        self.is_tar = (
+            True
+            if os.path.isfile(dataset_dir) and tarfile.is_tarfile(dataset_dir)
+            else False
+        )
+    def __len__(self):
+        return len(self.filenames)
+    def __getitem__(self, index):
+        rasters, other = self._get_data_item(index)
+        if DatasetMode.TRAIN == self.mode:
+            rasters = self._training_preprocess(rasters)
+        # merge
+        outputs = rasters
+        outputs.update(other)
+        return outputs
+    def _get_data_item(self, index):
+        rgb_rel_path, depth_rel_path, filled_rel_path = self._get_data_path(index=index)
+        rasters = {}
+        # RGB data
+        rasters.update(self._load_rgb_data(rgb_rel_path=rgb_rel_path))
+        # Depth data
+        if DatasetMode.RGB_ONLY != self.mode:
+            # load data
+            depth_data = self._load_depth_data(
+                depth_rel_path=depth_rel_path, filled_rel_path=filled_rel_path
+            )
+            rasters.update(depth_data)
+            # valid mask
+            rasters["valid_mask_raw"] = self._get_valid_mask(
+                rasters["depth_raw_linear"]
+            ).clone()
+            rasters["valid_mask_filled"] = self._get_valid_mask(
+                rasters["depth_filled_linear"]
+            ).clone()
+        other = {"index": index, "rgb_relative_path": rgb_rel_path}
+        return rasters, other
+    def _load_rgb_data(self, rgb_rel_path):
+        # Read RGB data
+        rgb = self._read_rgb_file(rgb_rel_path)
+        outputs = {
+            "rgb_int": torch.from_numpy(rgb).int(),
+        }
+        return outputs
+    def _load_depth_data(self, depth_rel_path, filled_rel_path):
+        # Read depth data
+        outputs = {}
+        depth_raw = self._read_depth_file(depth_rel_path).squeeze()
+        depth_raw_linear = torch.from_numpy(depth_raw).float().unsqueeze(0)  # [1, H, W]
+        outputs["depth_raw_linear"] = depth_raw_linear.clone()
+        if self.has_filled_depth:
+            depth_filled = self._read_depth_file(filled_rel_path).squeeze()
+            depth_filled_linear = torch.from_numpy(depth_filled).float().unsqueeze(0)
+            outputs["depth_filled_linear"] = depth_filled_linear
+        else:
+            outputs["depth_filled_linear"] = depth_raw_linear.clone()
+        return outputs
+    def _get_data_path(self, index):
+        filename_line = self.filenames[index]
+        # Get data path
+        rgb_rel_path = filename_line[0]
+        depth_rel_path, filled_rel_path = None, None
+        if DatasetMode.RGB_ONLY != self.mode:
+            depth_rel_path = filename_line[1]
+            if self.has_filled_depth:
+                filled_rel_path = filename_line[2]
+        return rgb_rel_path, depth_rel_path, filled_rel_path
+    def _read_image(self, img_rel_path) -> np.ndarray:
+        if self.is_tar:
+            if self.tar_obj is None:
+                self.tar_obj = tarfile.open(self.dataset_dir)
+            image = self.tar_obj.extractfile("./" + img_rel_path)
+            image = image.read()
+            image = Image.open(io.BytesIO(image))  # [H, W, rgb]
+        else:
+            img_path = os.path.join(self.dataset_dir, img_rel_path)
+            image = Image.open(img_path).convert('RGB')
+        image = np.asarray(image)
+        return image
+    def _read_depth_cv2(self, img_rel_path) -> np.ndarray:
+        depth_path = os.path.join(self.dataset_dir, img_rel_path)
+        depth_in = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
+        if depth_in.shape[2] == 3:  # If image has 3 channels
+            depth_in = depth_in[..., 0] # PANO
+        depth_in = depth_in.astype(np.float32)
+        return depth_in
+    def _read_rgb_file(self, rel_path) -> np.ndarray:
+        rgb = self._read_image(rel_path)
+        # Handle RGBA images by converting to RGB
+        if rgb.shape[2] == 4:  # If image has 4 channels (RGBA)
+            rgb = rgb[:, :, :3]  # Take only the RGB channels
+        rgb = np.transpose(rgb, (2, 0, 1)).astype(int)  # [rgb, H, W]
+        return rgb
+    def _read_depth_file(self, rel_path):
+        depth_in = self._read_image(rel_path)
+        #  Replace code below to decode depth according to dataset definition
+        depth_decoded = depth_in
+        return depth_decoded
+    def _get_valid_mask(self, depth: torch.Tensor):
+        valid_mask = torch.logical_and(
+            (depth > self.min_depth), (depth < self.max_depth)
+        ).bool()
+        return valid_mask
+    def _training_preprocess(self, rasters):
+        # Augmentation
+        if self.augm_args is not None:
+            rasters = self._augment_data(rasters)
+        # Normalization
+        rasters["depth_raw_norm"] = self.depth_transform(
+            rasters["depth_raw_linear"], rasters["valid_mask_raw"]
+        ).clone()
+        rasters["depth_filled_norm"] = self.depth_transform(
+            rasters["depth_filled_linear"], rasters["valid_mask_filled"]
+        ).clone()
+        # Set invalid pixel to far plane
+        if self.move_invalid_to_far_plane:
+            if self.depth_transform.far_plane_at_max:
+                rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
+                    self.depth_transform.norm_max
+                )
+            else:
+                rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
+                    self.depth_transform.norm_min
+                )
+        # Resize
+        if self.resize_to_hw is not None:
+            resize_transform = Resize(
+                size=self.resize_to_hw, interpolation=InterpolationMode.NEAREST_EXACT
+            )
+            rasters = {k: resize_transform(v) for k, v in rasters.items()}
+        return rasters
+    def _augment_data(self, rasters_dict):
+        # lr flipping
+        lr_flip_p = self.augm_args.lr_flip_p
+        if random.random() < lr_flip_p:
+            rasters_dict = {k: v.flip(-1) for k, v in rasters_dict.items()}
+        return rasters_dict
+    def __del__(self):
+        if self.tar_obj is not None:
+            self.tar_obj.close()
+            self.tar_obj = None
+# Prediction file naming modes
+class DepthFileNameMode(Enum):
+    id = 1  # id.png
+    rgb_id = 2  # rgb_id.png
+    i_d_rgb = 3  # i_d_1_rgb.png
+    rgb_i_d = 4
+def get_pred_name(rgb_basename, name_mode, suffix=".png"):
+    if DepthFileNameMode.rgb_id == name_mode:
+        pred_basename = "pred_" + rgb_basename.split("_")[1]
+    elif DepthFileNameMode.i_d_rgb == name_mode:
+        pred_basename = rgb_basename.replace("_rgb.", "_pred.")
+    elif DepthFileNameMode.id == name_mode:
+        pred_basename = "pred_" + rgb_basename
+    elif DepthFileNameMode.rgb_i_d == name_mode:
+        pred_basename = "pred_" + "_".join(rgb_basename.split("_")[1:])
+    else:
+        raise NotImplementedError
+    # change suffix
+    pred_basename = os.path.splitext(pred_basename)[0] + suffix
+    return pred_basename

DA-2-repo/eval/datasets/matterport3d_dataset.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Author: Haodong Li
+# Last modified: 2025-05-25
+from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode
+import cv2
+import os
+class Matterport3DDataset(BaseDepthDataset):
+    def __init__(
+        self,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            min_depth=1e-3,
+            max_depth=5,
+            has_filled_depth=False,
+            name_mode=DepthFileNameMode.id,
+            **kwargs,
+        )
+    def _read_depth_file(self, rel_path):
+        img_path = os.path.join(self.dataset_dir, rel_path)
+        depth_in = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        depth_decoded = depth_in / 2560.0
+        return depth_decoded

DA-2-repo/eval/datasets/panosuncg_dataset.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# Author: Haodong Li
+# Last modified: 2025-05-25
+from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode
+import cv2
+import os
+class PanoSUNCGDataset(BaseDepthDataset):
+    def __init__(
+        self,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            min_depth=1e-3,
+            max_depth=5,
+            has_filled_depth=False,
+            name_mode=DepthFileNameMode.id,
+            **kwargs,
+        )
+    def _read_depth_file(self, rel_path):
+        img_path = os.path.join(self.dataset_dir, rel_path)
+        depth_in = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+        depth_in = depth_in[..., 0]
+        depth_decoded = depth_in / 20.0
+        return depth_decoded

DA-2-repo/eval/datasets/splits/2d3ds.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

DA-2-repo/eval/datasets/splits/matterport3d.txt ADDED Viewed

The diff for this file is too large to render. See raw diff