Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +20 -0
- .gitignore +6 -0
- DA-2-repo/LICENSE +201 -0
- DA-2-repo/README.md +103 -0
- DA-2-repo/app.py +165 -0
- DA-2-repo/assets/badges/icon2.png +0 -0
- DA-2-repo/assets/badges/teaser.jpg +3 -0
- DA-2-repo/assets/demos/a0.png +3 -0
- DA-2-repo/assets/demos/a1.png +3 -0
- DA-2-repo/assets/demos/a10.png +3 -0
- DA-2-repo/assets/demos/a11.png +3 -0
- DA-2-repo/assets/demos/a2.png +3 -0
- DA-2-repo/assets/demos/a3.png +3 -0
- DA-2-repo/assets/demos/a4.png +3 -0
- DA-2-repo/assets/demos/a5.png +3 -0
- DA-2-repo/assets/demos/a6.png +3 -0
- DA-2-repo/assets/demos/a7.png +3 -0
- DA-2-repo/assets/demos/a8.png +3 -0
- DA-2-repo/assets/demos/a9.png +3 -0
- DA-2-repo/assets/demos/b0.png +3 -0
- DA-2-repo/assets/demos/b1.png +3 -0
- DA-2-repo/assets/demos/b2.png +3 -0
- DA-2-repo/assets/demos/b3.png +3 -0
- DA-2-repo/assets/demos/b4.png +3 -0
- DA-2-repo/assets/demos/b5.png +3 -0
- DA-2-repo/assets/masks/b0.png +0 -0
- DA-2-repo/assets/masks/b1.png +0 -0
- DA-2-repo/assets/masks/b2.png +0 -0
- DA-2-repo/assets/masks/b3.png +0 -0
- DA-2-repo/assets/masks/b4.png +0 -0
- DA-2-repo/assets/masks/b5.png +0 -0
- DA-2-repo/configs/accelerate/0.yaml +16 -0
- DA-2-repo/configs/accelerate/1.yaml +16 -0
- DA-2-repo/configs/accelerate/2.yaml +16 -0
- DA-2-repo/configs/accelerate/3.yaml +16 -0
- DA-2-repo/configs/accelerate/4.yaml +16 -0
- DA-2-repo/configs/accelerate/5.yaml +16 -0
- DA-2-repo/configs/accelerate/6.yaml +16 -0
- DA-2-repo/configs/accelerate/7.yaml +16 -0
- DA-2-repo/configs/eval.json +76 -0
- DA-2-repo/configs/infer.json +39 -0
- DA-2-repo/eval.py +29 -0
- DA-2-repo/eval.sh +7 -0
- DA-2-repo/eval/__init__.py +0 -0
- DA-2-repo/eval/datasets/__init__.py +35 -0
- DA-2-repo/eval/datasets/base_depth_dataset.py +268 -0
- DA-2-repo/eval/datasets/matterport3d_dataset.py +25 -0
- DA-2-repo/eval/datasets/panosuncg_dataset.py +26 -0
- DA-2-repo/eval/datasets/splits/2d3ds.txt +0 -0
- DA-2-repo/eval/datasets/splits/matterport3d.txt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
DA-2-repo/assets/badges/teaser.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
DA-2-repo/assets/demos/a0.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
DA-2-repo/assets/demos/a1.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
DA-2-repo/assets/demos/a10.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
DA-2-repo/assets/demos/a11.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
DA-2-repo/assets/demos/a2.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
DA-2-repo/assets/demos/a3.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
DA-2-repo/assets/demos/a4.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
DA-2-repo/assets/demos/a5.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
DA-2-repo/assets/demos/a6.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
DA-2-repo/assets/demos/a7.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
DA-2-repo/assets/demos/a8.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
DA-2-repo/assets/demos/a9.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
DA-2-repo/assets/demos/b0.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
DA-2-repo/assets/demos/b1.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
DA-2-repo/assets/demos/b2.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
DA-2-repo/assets/demos/b3.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
DA-2-repo/assets/demos/b4.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
DA-2-repo/assets/demos/b5.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
model.onnx.data filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
.DS_Store
|
| 5 |
+
*.safetensors
|
| 6 |
+
.vscode/
|
DA-2-repo/LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
DA-2-repo/README.md
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# <img src="assets/badges/icon2.png" alt="lotus" style="height:1.2em; vertical-align:bottom;"/> DA<sup>2</sup>: Depth Anything in Any Direction
|
| 2 |
+
|
| 3 |
+
[](https://depth-any-in-any-dir.github.io/)
|
| 4 |
+
[](http://arxiv.org/abs/2509.26618)
|
| 5 |
+
[](https://huggingface.co/spaces/haodongli/DA-2)
|
| 6 |
+
[](https://huggingface.co/datasets/haodongli/DA-2)
|
| 7 |
+
[](https://docs.google.com/presentation/d/1QUonqLuYGEh0qcqY72pbTXsZimINlyN4rOogy7qX4GY/edit?usp=sharing)
|
| 8 |
+
[](https://depth-any-in-any-dir.github.io/bibtex.txt)
|
| 9 |
+
|
| 10 |
+
[Haodong Li](https://haodong2000.github.io/)<sup>123§</sup>,
|
| 11 |
+
[Wangguangdong Zheng](https://wangguandongzheng.github.io/)<sup>1</sup>,
|
| 12 |
+
[Jing He](https://jingheya.github.io/)<sup>3</sup>,
|
| 13 |
+
[Yuhao Liu](https://yuhaoliu7456.github.io/)<sup>1</sup>,
|
| 14 |
+
[Xin Lin](https://linxin0.github.io/)<sup>2</sup>,
|
| 15 |
+
[Xin Yang](https://abnervictor.github.io/2023/06/12/Academic-Self-Intro.html)<sup>34</sup>,<br>
|
| 16 |
+
[Ying-Cong Chen](https://www.yingcong.me/)<sup>34✉</sup>,
|
| 17 |
+
[Chunchao Guo]()<sup>1✉</sup>
|
| 18 |
+
|
| 19 |
+
<span class="author-block"><sup>1</sup>Tencent Hunyuan</span>
|
| 20 |
+
<span class="author-block"><sup>2</sup>UC San Diego</span>
|
| 21 |
+
<span class="author-block"><sup>3</sup>HKUST(GZ)</span>
|
| 22 |
+
<span class="author-block"><sup>4</sup>HKUST</span><br>
|
| 23 |
+
<span class="author-block">
|
| 24 |
+
<sup>§</sup>Work primarily done during an internship at Tencent Hunyuan.
|
| 25 |
+
<sup>✉</sup>Corresponding author.
|
| 26 |
+
</span>
|
| 27 |
+
|
| 28 |
+

|
| 29 |
+
|
| 30 |
+
<strong>DA<sup>2</sup> predicts dense, scale-invariant distance from a single 360° panorama in an end-to-end manner, with remarkable geometric fidelity and strong zero-shot generalization.</strong>
|
| 31 |
+
|
| 32 |
+
## 📢 News
|
| 33 |
+
- 2025-10-10 The curated panoramic data is released on [huggingface](https://huggingface.co/datasets/haodongli/DA-2)!
|
| 34 |
+
- 2025-10-10 The evaluation code and the [testing data](https://huggingface.co/datasets/haodongli/DA-2-Evaluation) are released!
|
| 35 |
+
- 2025-10-04 The 🤗Huggingface Gradio demo ([online](https://huggingface.co/spaces/haodongli/DA-2) and [local](https://github.com/EnVision-Research/DA-2?tab=readme-ov-file#-gradio-demo)) are released!
|
| 36 |
+
- 2025-10-04 The inference code and the [model](https://huggingface.co/haodongli/DA-2) are released!
|
| 37 |
+
- 2025-10-01 [Paper](https://arxiv.org/abs/2509.26618) released on arXiv!
|
| 38 |
+
|
| 39 |
+
## 🛠️ Setup
|
| 40 |
+
> This installation was tested on: Ubuntu 20.04 LTS, Python 3.12, CUDA 12.2, NVIDIA GeForce RTX 3090.
|
| 41 |
+
|
| 42 |
+
1. Clone the repository:
|
| 43 |
+
```
|
| 44 |
+
git clone https://github.com/EnVision-Research/DA-2.git
|
| 45 |
+
cd DA-2
|
| 46 |
+
```
|
| 47 |
+
2. Install dependencies using conda:
|
| 48 |
+
```
|
| 49 |
+
conda create -n da-2 python=3.12 -y
|
| 50 |
+
conda activate da-2
|
| 51 |
+
pip install -e src
|
| 52 |
+
```
|
| 53 |
+
> For macOS users: Please remove `xformers==0.0.28.post2` (line 16) from `src/pyproject.toml` before `pip install -e src`, as [xFormers does not support macOS](https://github.com/facebookresearch/xformers/issues/775#issuecomment-1611284979).
|
| 54 |
+
|
| 55 |
+
## 🤗 Gradio Demo
|
| 56 |
+
1. Online demo: [Hugggingface Space](https://huggingface.co/spaces/haodongli/DA-2)
|
| 57 |
+
2. Local demo:
|
| 58 |
+
```
|
| 59 |
+
python app.py
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
## 🕹️ Inference
|
| 63 |
+
> We've pre-uploaded the cases appeared in the [project page](https://depth-any-in-any-dir.github.io/). So you can proceed directly to step 3.
|
| 64 |
+
|
| 65 |
+
1. Images are placed in a directory, e.g., `assets/demos`.
|
| 66 |
+
2. (Optional) Masks (e.g., sky masks for outdoor images) in another directory, e.g., `assets/masks`. The filenames under both directories should be consistent.
|
| 67 |
+
3. Run the inference command:
|
| 68 |
+
```
|
| 69 |
+
sh infer.sh
|
| 70 |
+
```
|
| 71 |
+
4. The visualized distance and normal maps will be saved at `output/infer/vis_all.png`. The projected 3D point clouds will be saved at `output/infer/3dpc`.
|
| 72 |
+
|
| 73 |
+
## 🚗 Evaluation
|
| 74 |
+
1. Download the evaluation datasets from [huggingface](https://huggingface.co/datasets/haodongli/DA-2-Evaluation):
|
| 75 |
+
```
|
| 76 |
+
cd [YOUR_DATA_DIR]
|
| 77 |
+
huggingface-cli login
|
| 78 |
+
hf download --repo-type dataset haodongli/DA-2-Evaluation --local-dir [YOUR_DATA_DIR]
|
| 79 |
+
```
|
| 80 |
+
2. Unzip the downloaded datasets:
|
| 81 |
+
```
|
| 82 |
+
tar -zxvf [DATA_NAME].tar.gz
|
| 83 |
+
```
|
| 84 |
+
3. Set the `datasets_dir` (line 20) in `configs/eval.json` with `YOUR_DATA_DIR`.
|
| 85 |
+
4. Run the evaluation command:
|
| 86 |
+
```
|
| 87 |
+
sh eval.sh
|
| 88 |
+
```
|
| 89 |
+
5. The results will be saved at `output/eval`.
|
| 90 |
+
|
| 91 |
+
## 🎓 Citation
|
| 92 |
+
If you find our work useful in your research, please consider citing our paper🌹:
|
| 93 |
+
```bibtex
|
| 94 |
+
@article{li2025depth,
|
| 95 |
+
title={DA$^{2}$: Depth Anything in Any Direction},
|
| 96 |
+
author={Li, Haodong and Zheng, Wangguangdong and He, Jing and Liu, Yuhao and Lin, Xin and Yang, Xin and Chen, Ying-Cong and Guo, Chunchao},
|
| 97 |
+
journal={arXiv preprint arXiv:2509.26618},
|
| 98 |
+
year={2025}
|
| 99 |
+
}
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
## 🤝 Acknowledgement
|
| 103 |
+
This implementation is impossible without the awesome contributions of [MoGe](https://wangrc.site/MoGePage/), [UniK3D](https://lpiccinelli-eth.github.io/pub/unik3d/), [Lotus](https://lotus3d.github.io/), [Marigold](https://marigoldmonodepth.github.io/), [DINOv2](https://github.com/facebookresearch/dinov2), [Accelerate](https://github.com/huggingface/accelerate), [Gradio](https://github.com/gradio-app/gradio), [HuggingFace Hub](https://github.com/huggingface/huggingface_hub), and [PyTorch](https://pytorch.org/) to the open-cource community.
|
DA-2-repo/app.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from accelerate import Accelerator
|
| 3 |
+
from accelerate.logging import get_logger
|
| 4 |
+
from accelerate.utils import (
|
| 5 |
+
InitProcessGroupKwargs,
|
| 6 |
+
ProjectConfiguration,
|
| 7 |
+
set_seed
|
| 8 |
+
)
|
| 9 |
+
import torch
|
| 10 |
+
from contextlib import nullcontext
|
| 11 |
+
import trimesh
|
| 12 |
+
import gradio as gr
|
| 13 |
+
from gradio_imageslider import ImageSlider
|
| 14 |
+
from da2.utils.base import load_config
|
| 15 |
+
from da2.utils.model import load_model
|
| 16 |
+
from da2.utils.io import (
|
| 17 |
+
read_cv2_image,
|
| 18 |
+
torch_transform,
|
| 19 |
+
tensorize
|
| 20 |
+
)
|
| 21 |
+
from da2.utils.vis import colorize_distance
|
| 22 |
+
from da2.utils.d2pc import distance2pointcloud
|
| 23 |
+
from datetime import (
|
| 24 |
+
timedelta,
|
| 25 |
+
datetime
|
| 26 |
+
)
|
| 27 |
+
import cv2
|
| 28 |
+
import numpy as np
|
| 29 |
+
|
| 30 |
+
last_glb_path = None
|
| 31 |
+
|
| 32 |
+
def prepare_to_run_demo():
|
| 33 |
+
config = load_config('configs/infer.json')
|
| 34 |
+
kwargs = InitProcessGroupKwargs(timeout=timedelta(seconds=config['accelerator']['timeout']))
|
| 35 |
+
output_dir = f'output/infer'
|
| 36 |
+
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 37 |
+
accu_steps = config['accelerator']['accumulation_nsteps']
|
| 38 |
+
accelerator = Accelerator(
|
| 39 |
+
gradient_accumulation_steps=accu_steps,
|
| 40 |
+
mixed_precision=config['accelerator']['mixed_precision'],
|
| 41 |
+
log_with=config['accelerator']['report_to'],
|
| 42 |
+
project_config=ProjectConfiguration(project_dir=output_dir),
|
| 43 |
+
kwargs_handlers=[kwargs]
|
| 44 |
+
)
|
| 45 |
+
logger = get_logger(__name__, log_level='INFO')
|
| 46 |
+
config['env']['logger'] = logger
|
| 47 |
+
set_seed(config['env']['seed'])
|
| 48 |
+
return config, accelerator
|
| 49 |
+
|
| 50 |
+
def read_mask_demo(mask_path, shape):
|
| 51 |
+
if mask_path is None:
|
| 52 |
+
return np.ones(shape[1:]) > 0
|
| 53 |
+
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
|
| 54 |
+
mask = mask > 0
|
| 55 |
+
return mask
|
| 56 |
+
|
| 57 |
+
def load_infer_data_demo(image, mask, model_dtype, device):
|
| 58 |
+
cv2_image = read_cv2_image(image)
|
| 59 |
+
image = torch_transform(cv2_image)
|
| 60 |
+
mask = read_mask_demo(mask, image.shape)
|
| 61 |
+
image = tensorize(image, model_dtype, device)
|
| 62 |
+
return image, cv2_image, mask
|
| 63 |
+
|
| 64 |
+
def ply2glb(ply_path, glb_path):
|
| 65 |
+
pcd = trimesh.load(ply_path)
|
| 66 |
+
points = np.asarray(pcd.vertices)
|
| 67 |
+
colors = np.asarray(pcd.visual.vertex_colors)
|
| 68 |
+
cloud = trimesh.points.PointCloud(vertices=points, colors=colors)
|
| 69 |
+
cloud.export(glb_path)
|
| 70 |
+
os.remove(ply_path)
|
| 71 |
+
|
| 72 |
+
def fn(image_path, mask_path):
|
| 73 |
+
global last_glb_path
|
| 74 |
+
config, accelerator = prepare_to_run_demo()
|
| 75 |
+
model = load_model(config, accelerator)
|
| 76 |
+
image, cv2_image, mask = load_infer_data_demo(image_path, mask_path,
|
| 77 |
+
model_dtype=config['spherevit']['dtype'], device=accelerator.device)
|
| 78 |
+
if torch.backends.mps.is_available():
|
| 79 |
+
autocast_ctx = nullcontext()
|
| 80 |
+
else:
|
| 81 |
+
autocast_ctx = torch.autocast(accelerator.device.type)
|
| 82 |
+
with autocast_ctx, torch.no_grad():
|
| 83 |
+
distance = model(image).cpu().numpy()[0]
|
| 84 |
+
if last_glb_path is not None:
|
| 85 |
+
os.remove(last_glb_path)
|
| 86 |
+
distance_vis = colorize_distance(distance, mask)
|
| 87 |
+
save_path = f'cache/tmp_{datetime.now().strftime("%Y%m%d_%H%M%S")}.glb'
|
| 88 |
+
last_glb_path = save_path
|
| 89 |
+
normal_image = distance2pointcloud(distance, cv2_image, mask, save_path=save_path.replace('.glb', '.ply'), return_normal=True, save_distance=False)
|
| 90 |
+
ply2glb(save_path.replace('.glb', '.ply'), save_path)
|
| 91 |
+
return save_path, [distance_vis, normal_image]
|
| 92 |
+
|
| 93 |
+
inputs = [
|
| 94 |
+
gr.Image(label="Input Image", type="filepath"),
|
| 95 |
+
gr.Image(label="Input Mask", type="filepath"),
|
| 96 |
+
]
|
| 97 |
+
outputs = [
|
| 98 |
+
gr.Model3D(clear_color=[0.0, 0.0, 0.0, 0.0], label="3D Point Cloud"),
|
| 99 |
+
gr.ImageSlider(
|
| 100 |
+
label="Output Depth / Normal (transformed from the depth)",
|
| 101 |
+
type="pil",
|
| 102 |
+
slider_position=75,
|
| 103 |
+
)
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
demo = gr.Interface(
|
| 107 |
+
fn=fn,
|
| 108 |
+
title="DA<sup>2</sup>: <u>D</u>epth <u>A</u>nything in <u>A</u>ny <u>D</u>irection",
|
| 109 |
+
description="""
|
| 110 |
+
<p align="center">
|
| 111 |
+
<a title="Project Page" href="https://depth-any-in-any-dir.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
| 112 |
+
<img src="https://img.shields.io/badge/Project-Website-pink?logo=googlechrome&logoColor=white">
|
| 113 |
+
</a>
|
| 114 |
+
<a title="arXiv" href="http://arxiv.org/abs/2509.26618" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
| 115 |
+
<img src="https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=white">
|
| 116 |
+
</a>
|
| 117 |
+
<a title="Github" href="https://github.com/EnVision-Research/DA-2" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
| 118 |
+
<img src="https://img.shields.io/github/stars/EnVision-Research/DA-2?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="badge-github-stars">
|
| 119 |
+
</a>
|
| 120 |
+
<a title="Social" href="https://x.com/_akhaliq/status/1973283687652606411" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
| 121 |
+
<img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
|
| 122 |
+
</a>
|
| 123 |
+
<a title="Social" href="https://x.com/haodongli00/status/1973287870317338747" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
| 124 |
+
<img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
|
| 125 |
+
</a>
|
| 126 |
+
<br>
|
| 127 |
+
<strong>Please consider starring <span style="color: orange">★</span> our <a href="https://github.com/EnVision-Research/DA-2" target="_blank" rel="noopener noreferrer">GitHub Repo</a> if you find this demo useful!</strong>
|
| 128 |
+
</p>
|
| 129 |
+
<p><strong>Note: the "Input Mask" is optional, all pixels are assumed to be valid if mask is None.</strong></p>
|
| 130 |
+
""",
|
| 131 |
+
inputs=inputs,
|
| 132 |
+
outputs=outputs,
|
| 133 |
+
examples=[
|
| 134 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a1.png"), None],
|
| 135 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a2.png"), None],
|
| 136 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a3.png"), None],
|
| 137 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a4.png"), None],
|
| 138 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/b0.png"),
|
| 139 |
+
os.path.join(os.path.dirname(__file__), "assets/masks/b0.png")],
|
| 140 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/b1.png"),
|
| 141 |
+
os.path.join(os.path.dirname(__file__), "assets/masks/b1.png")],
|
| 142 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a5.png"), None],
|
| 143 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a6.png"), None],
|
| 144 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a7.png"), None],
|
| 145 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a8.png"), None],
|
| 146 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/b2.png"),
|
| 147 |
+
os.path.join(os.path.dirname(__file__), "assets/masks/b2.png")],
|
| 148 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/b3.png"),
|
| 149 |
+
os.path.join(os.path.dirname(__file__), "assets/masks/b3.png")],
|
| 150 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a9.png"), None],
|
| 151 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a10.png"), None],
|
| 152 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a11.png"), None],
|
| 153 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/a0.png"), None],
|
| 154 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/b4.png"),
|
| 155 |
+
os.path.join(os.path.dirname(__file__), "assets/masks/b4.png")],
|
| 156 |
+
[os.path.join(os.path.dirname(__file__), "assets/demos/b5.png"),
|
| 157 |
+
os.path.join(os.path.dirname(__file__), "assets/masks/b5.png")],
|
| 158 |
+
],
|
| 159 |
+
examples_per_page=20
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
demo.launch(
|
| 163 |
+
server_name="0.0.0.0",
|
| 164 |
+
server_port=6381,
|
| 165 |
+
)
|
DA-2-repo/assets/badges/icon2.png
ADDED
|
|
DA-2-repo/assets/badges/teaser.jpg
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a0.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a1.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a10.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a11.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a2.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a3.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a4.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a5.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a6.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a7.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a8.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/a9.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/b0.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/b1.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/b2.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/b3.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/b4.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/demos/b5.png
ADDED
|
Git LFS Details
|
DA-2-repo/assets/masks/b0.png
ADDED
|
DA-2-repo/assets/masks/b1.png
ADDED
|
DA-2-repo/assets/masks/b2.png
ADDED
|
DA-2-repo/assets/masks/b3.png
ADDED
|
DA-2-repo/assets/masks/b4.png
ADDED
|
DA-2-repo/assets/masks/b5.png
ADDED
|
DA-2-repo/configs/accelerate/0.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '0'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/accelerate/1.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '1'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/accelerate/2.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '2'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/accelerate/3.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '3'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/accelerate/4.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '4'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/accelerate/5.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '5'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/accelerate/6.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '6'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/accelerate/7.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
compute_environment: LOCAL_MACHINE
|
| 2 |
+
debug: false
|
| 3 |
+
distributed_type: 'NO'
|
| 4 |
+
downcast_bf16: 'no'
|
| 5 |
+
gpu_ids: '7'
|
| 6 |
+
machine_rank: 0
|
| 7 |
+
main_training_function: main
|
| 8 |
+
mixed_precision: 'no'
|
| 9 |
+
num_machines: 1
|
| 10 |
+
num_processes: 1
|
| 11 |
+
rdzv_backend: static
|
| 12 |
+
same_network: true
|
| 13 |
+
tpu_env: []
|
| 14 |
+
tpu_use_cluster: false
|
| 15 |
+
tpu_use_sudo: false
|
| 16 |
+
use_cpu: false
|
DA-2-repo/configs/eval.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"env": {
|
| 3 |
+
"seed": 42,
|
| 4 |
+
"verbose": true
|
| 5 |
+
},
|
| 6 |
+
"accelerator": {
|
| 7 |
+
"report_to": ["tensorboard"],
|
| 8 |
+
"mixed_precision": "fp16",
|
| 9 |
+
"accumulation_nsteps": 4,
|
| 10 |
+
"timeout": 36000
|
| 11 |
+
},
|
| 12 |
+
"inference": {
|
| 13 |
+
"images": "assets/demos",
|
| 14 |
+
"masks": "assets/masks",
|
| 15 |
+
"min_pixels": 580000,
|
| 16 |
+
"max_pixels": 620000
|
| 17 |
+
},
|
| 18 |
+
"evaluation": {
|
| 19 |
+
"alignment": "median",
|
| 20 |
+
"datasets_dir": "/home/haodong/data/DA-2/Evaluation",
|
| 21 |
+
"metric_names": [
|
| 22 |
+
"abs_relative_difference",
|
| 23 |
+
"squared_relative_difference",
|
| 24 |
+
"rmse_linear",
|
| 25 |
+
"rmse_log",
|
| 26 |
+
"log10",
|
| 27 |
+
"delta1_acc",
|
| 28 |
+
"delta2_acc",
|
| 29 |
+
"delta3_acc",
|
| 30 |
+
"i_rmse",
|
| 31 |
+
"silog_rmse"
|
| 32 |
+
],
|
| 33 |
+
"metric_show": {
|
| 34 |
+
"abs_relative_difference": "AbsRel",
|
| 35 |
+
"delta1_acc": "δ_1"
|
| 36 |
+
},
|
| 37 |
+
"datasets": {
|
| 38 |
+
"2d3ds": {
|
| 39 |
+
"dir": "2D3DS/wo_xyz",
|
| 40 |
+
"filenames": "eval/datasets/splits/2d3ds.txt",
|
| 41 |
+
"alignment_max_res": 2048
|
| 42 |
+
},
|
| 43 |
+
"matterport3d": {
|
| 44 |
+
"dir": "Matterport3D",
|
| 45 |
+
"filenames": "eval/datasets/splits/matterport3d.txt",
|
| 46 |
+
"alignment_max_res": 2048
|
| 47 |
+
},
|
| 48 |
+
"panosuncg": {
|
| 49 |
+
"dir": "PanoSUNCG/rotated",
|
| 50 |
+
"filenames": "eval/datasets/splits/panosuncg.txt",
|
| 51 |
+
"alignment_max_res": 1024
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
"spherevit": {
|
| 56 |
+
"vit_w_esphere": {
|
| 57 |
+
"input_dims": [1024, 1024, 1024, 1024],
|
| 58 |
+
"hidden_dim": 512,
|
| 59 |
+
"num_heads": 8,
|
| 60 |
+
"expansion": 4,
|
| 61 |
+
"num_layers_head": [2, 2, 2],
|
| 62 |
+
"dropout": 0.0,
|
| 63 |
+
"layer_scale": 0.0001,
|
| 64 |
+
"out_dim": 64,
|
| 65 |
+
"kernel_size": 3,
|
| 66 |
+
"num_prompt_blocks": 1,
|
| 67 |
+
"use_norm": false
|
| 68 |
+
},
|
| 69 |
+
"sphere": {
|
| 70 |
+
"width": 1092,
|
| 71 |
+
"height": 546,
|
| 72 |
+
"hfov": 6.2832,
|
| 73 |
+
"vfov": 3.1416
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
}
|
DA-2-repo/configs/infer.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"env": {
|
| 3 |
+
"seed": 42,
|
| 4 |
+
"verbose": true
|
| 5 |
+
},
|
| 6 |
+
"accelerator": {
|
| 7 |
+
"report_to": ["tensorboard"],
|
| 8 |
+
"mixed_precision": "fp16",
|
| 9 |
+
"accumulation_nsteps": 4,
|
| 10 |
+
"timeout": 36000
|
| 11 |
+
},
|
| 12 |
+
"inference": {
|
| 13 |
+
"images": "assets/demos",
|
| 14 |
+
"masks": "assets/masks",
|
| 15 |
+
"min_pixels": 580000,
|
| 16 |
+
"max_pixels": 620000
|
| 17 |
+
},
|
| 18 |
+
"spherevit": {
|
| 19 |
+
"vit_w_esphere": {
|
| 20 |
+
"input_dims": [1024, 1024, 1024, 1024],
|
| 21 |
+
"hidden_dim": 512,
|
| 22 |
+
"num_heads": 8,
|
| 23 |
+
"expansion": 4,
|
| 24 |
+
"num_layers_head": [2, 2, 2],
|
| 25 |
+
"dropout": 0.0,
|
| 26 |
+
"layer_scale": 0.0001,
|
| 27 |
+
"out_dim": 64,
|
| 28 |
+
"kernel_size": 3,
|
| 29 |
+
"num_prompt_blocks": 1,
|
| 30 |
+
"use_norm": false
|
| 31 |
+
},
|
| 32 |
+
"sphere": {
|
| 33 |
+
"width": 1092,
|
| 34 |
+
"height": 546,
|
| 35 |
+
"hfov": 6.2832,
|
| 36 |
+
"vfov": 3.1416
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
}
|
DA-2-repo/eval.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
from contextlib import nullcontext
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
from da2 import (
|
| 6 |
+
prepare_to_run,
|
| 7 |
+
load_model
|
| 8 |
+
)
|
| 9 |
+
from eval.utils import run_evaluation
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def eval(model, config, accelerator, output_dir):
|
| 13 |
+
model = model.eval()
|
| 14 |
+
eval_datasets = config['evaluation']['datasets']
|
| 15 |
+
if accelerator.is_main_process:
|
| 16 |
+
if torch.backends.mps.is_available():
|
| 17 |
+
autocast_ctx = nullcontext()
|
| 18 |
+
else:
|
| 19 |
+
autocast_ctx = torch.autocast(accelerator.device.type)
|
| 20 |
+
with autocast_ctx, torch.no_grad():
|
| 21 |
+
for dataset_name in eval_datasets.keys():
|
| 22 |
+
metrics = run_evaluation(model, config, dataset_name, output_dir, accelerator.device)
|
| 23 |
+
for metric_name in config['evaluation']['metric_show']:
|
| 24 |
+
config['env']['logger'].info(f"\033[92mEVAL --> {dataset_name}: {config['evaluation']['metric_show'][metric_name]} = {metrics[metric_name]}.\033[0m")
|
| 25 |
+
|
| 26 |
+
if __name__ == '__main__':
|
| 27 |
+
config, accelerator, output_dir = prepare_to_run()
|
| 28 |
+
model = load_model(config, accelerator)
|
| 29 |
+
eval(model, config, accelerator, output_dir)
|
DA-2-repo/eval.sh
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export CUDA=0
|
| 2 |
+
export CONFIG_PATH="configs/eval.json"
|
| 3 |
+
accelerate launch \
|
| 4 |
+
--config_file=configs/accelerate/$CUDA.yaml \
|
| 5 |
+
--mixed_precision="fp16" \
|
| 6 |
+
--main_process_port="12345" \
|
| 7 |
+
eval.py --config_path=$CONFIG_PATH
|
DA-2-repo/eval/__init__.py
ADDED
|
File without changes
|
DA-2-repo/eval/datasets/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Authors: Bingxin Ke, Haodong Li
|
| 2 |
+
# Last modified: 2025-05-25
|
| 3 |
+
# Note: Add PanoSUNCGDataset, Matterport3DDataset, Stanford2D3DSDataset for 360° depth (or distance) evaluation.
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
from .base_depth_dataset import BaseDepthDataset, get_pred_name, DatasetMode
|
| 8 |
+
from .stanford2d3ds_dataset import Stanford2D3DSDataset
|
| 9 |
+
from .matterport3d_dataset import Matterport3DDataset
|
| 10 |
+
from .panosuncg_dataset import PanoSUNCGDataset
|
| 11 |
+
|
| 12 |
+
dataset_name_class_dict = {
|
| 13 |
+
"2d3ds": Stanford2D3DSDataset,
|
| 14 |
+
"matterport3d": Matterport3DDataset,
|
| 15 |
+
"panosuncg": PanoSUNCGDataset
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_dataset(
|
| 20 |
+
cfg_data_split, dataset_name, base_data_dir: str, mode: DatasetMode, **kwargs
|
| 21 |
+
) -> BaseDepthDataset:
|
| 22 |
+
if dataset_name in dataset_name_class_dict.keys():
|
| 23 |
+
dataset_class = dataset_name_class_dict[dataset_name]
|
| 24 |
+
dataset = dataset_class(
|
| 25 |
+
mode=mode,
|
| 26 |
+
filename_ls_path=cfg_data_split['filenames'],
|
| 27 |
+
dataset_dir=os.path.join(base_data_dir, cfg_data_split['dir']),
|
| 28 |
+
disp_name=dataset_name,
|
| 29 |
+
**cfg_data_split,
|
| 30 |
+
**kwargs,
|
| 31 |
+
)
|
| 32 |
+
else:
|
| 33 |
+
raise NotImplementedError
|
| 34 |
+
|
| 35 |
+
return dataset
|
DA-2-repo/eval/datasets/base_depth_dataset.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Author: Bingxin Ke
|
| 2 |
+
# Last modified: 2024-04-15
|
| 3 |
+
|
| 4 |
+
import io
|
| 5 |
+
import os
|
| 6 |
+
import random
|
| 7 |
+
import tarfile
|
| 8 |
+
from enum import Enum
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
import cv2
|
| 12 |
+
import torch
|
| 13 |
+
from PIL import Image
|
| 14 |
+
from torch.utils.data import Dataset
|
| 15 |
+
from torchvision.transforms import InterpolationMode, Resize
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class DatasetMode(Enum):
|
| 19 |
+
RGB_ONLY = "rgb_only"
|
| 20 |
+
EVAL = "evaluate"
|
| 21 |
+
TRAIN = "train"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def read_image_from_tar(tar_obj, img_rel_path):
|
| 25 |
+
image = tar_obj.extractfile("./" + img_rel_path)
|
| 26 |
+
image = image.read()
|
| 27 |
+
image = Image.open(io.BytesIO(image))
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class BaseDepthDataset(Dataset):
|
| 31 |
+
def __init__(
|
| 32 |
+
self,
|
| 33 |
+
mode: DatasetMode,
|
| 34 |
+
filename_ls_path: str,
|
| 35 |
+
dataset_dir: str,
|
| 36 |
+
disp_name: str,
|
| 37 |
+
min_depth,
|
| 38 |
+
max_depth,
|
| 39 |
+
has_filled_depth,
|
| 40 |
+
name_mode,
|
| 41 |
+
depth_transform=None,
|
| 42 |
+
augmentation_args: dict = None,
|
| 43 |
+
resize_to_hw=None,
|
| 44 |
+
move_invalid_to_far_plane: bool = True,
|
| 45 |
+
rgb_transform=lambda x: x / 255.0 * 2 - 1, # [0, 255] -> [-1, 1],
|
| 46 |
+
**kwargs,
|
| 47 |
+
) -> None:
|
| 48 |
+
super().__init__()
|
| 49 |
+
self.mode = mode
|
| 50 |
+
# dataset info
|
| 51 |
+
self.filename_ls_path = filename_ls_path
|
| 52 |
+
self.dataset_dir = dataset_dir
|
| 53 |
+
self.disp_name = disp_name
|
| 54 |
+
self.has_filled_depth = has_filled_depth
|
| 55 |
+
self.name_mode: DepthFileNameMode = name_mode
|
| 56 |
+
self.min_depth = min_depth
|
| 57 |
+
self.max_depth = max_depth
|
| 58 |
+
|
| 59 |
+
# training arguments
|
| 60 |
+
self.depth_transform = depth_transform
|
| 61 |
+
self.augm_args = augmentation_args
|
| 62 |
+
self.resize_to_hw = resize_to_hw
|
| 63 |
+
self.rgb_transform = rgb_transform
|
| 64 |
+
self.move_invalid_to_far_plane = move_invalid_to_far_plane
|
| 65 |
+
|
| 66 |
+
# Load filenames
|
| 67 |
+
with open(self.filename_ls_path, "r") as f:
|
| 68 |
+
self.filenames = [
|
| 69 |
+
s.split() for s in f.readlines()
|
| 70 |
+
] # [['rgb.png', 'depth.tif'], [], ...]
|
| 71 |
+
|
| 72 |
+
# Tar dataset
|
| 73 |
+
self.tar_obj = None
|
| 74 |
+
self.is_tar = (
|
| 75 |
+
True
|
| 76 |
+
if os.path.isfile(dataset_dir) and tarfile.is_tarfile(dataset_dir)
|
| 77 |
+
else False
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
def __len__(self):
|
| 81 |
+
return len(self.filenames)
|
| 82 |
+
|
| 83 |
+
def __getitem__(self, index):
|
| 84 |
+
rasters, other = self._get_data_item(index)
|
| 85 |
+
if DatasetMode.TRAIN == self.mode:
|
| 86 |
+
rasters = self._training_preprocess(rasters)
|
| 87 |
+
# merge
|
| 88 |
+
outputs = rasters
|
| 89 |
+
outputs.update(other)
|
| 90 |
+
return outputs
|
| 91 |
+
|
| 92 |
+
def _get_data_item(self, index):
|
| 93 |
+
rgb_rel_path, depth_rel_path, filled_rel_path = self._get_data_path(index=index)
|
| 94 |
+
|
| 95 |
+
rasters = {}
|
| 96 |
+
|
| 97 |
+
# RGB data
|
| 98 |
+
rasters.update(self._load_rgb_data(rgb_rel_path=rgb_rel_path))
|
| 99 |
+
|
| 100 |
+
# Depth data
|
| 101 |
+
if DatasetMode.RGB_ONLY != self.mode:
|
| 102 |
+
# load data
|
| 103 |
+
depth_data = self._load_depth_data(
|
| 104 |
+
depth_rel_path=depth_rel_path, filled_rel_path=filled_rel_path
|
| 105 |
+
)
|
| 106 |
+
rasters.update(depth_data)
|
| 107 |
+
# valid mask
|
| 108 |
+
rasters["valid_mask_raw"] = self._get_valid_mask(
|
| 109 |
+
rasters["depth_raw_linear"]
|
| 110 |
+
).clone()
|
| 111 |
+
rasters["valid_mask_filled"] = self._get_valid_mask(
|
| 112 |
+
rasters["depth_filled_linear"]
|
| 113 |
+
).clone()
|
| 114 |
+
|
| 115 |
+
other = {"index": index, "rgb_relative_path": rgb_rel_path}
|
| 116 |
+
|
| 117 |
+
return rasters, other
|
| 118 |
+
|
| 119 |
+
def _load_rgb_data(self, rgb_rel_path):
|
| 120 |
+
# Read RGB data
|
| 121 |
+
rgb = self._read_rgb_file(rgb_rel_path)
|
| 122 |
+
|
| 123 |
+
outputs = {
|
| 124 |
+
"rgb_int": torch.from_numpy(rgb).int(),
|
| 125 |
+
}
|
| 126 |
+
return outputs
|
| 127 |
+
|
| 128 |
+
def _load_depth_data(self, depth_rel_path, filled_rel_path):
|
| 129 |
+
# Read depth data
|
| 130 |
+
outputs = {}
|
| 131 |
+
depth_raw = self._read_depth_file(depth_rel_path).squeeze()
|
| 132 |
+
depth_raw_linear = torch.from_numpy(depth_raw).float().unsqueeze(0) # [1, H, W]
|
| 133 |
+
outputs["depth_raw_linear"] = depth_raw_linear.clone()
|
| 134 |
+
|
| 135 |
+
if self.has_filled_depth:
|
| 136 |
+
depth_filled = self._read_depth_file(filled_rel_path).squeeze()
|
| 137 |
+
depth_filled_linear = torch.from_numpy(depth_filled).float().unsqueeze(0)
|
| 138 |
+
outputs["depth_filled_linear"] = depth_filled_linear
|
| 139 |
+
else:
|
| 140 |
+
outputs["depth_filled_linear"] = depth_raw_linear.clone()
|
| 141 |
+
|
| 142 |
+
return outputs
|
| 143 |
+
|
| 144 |
+
def _get_data_path(self, index):
|
| 145 |
+
filename_line = self.filenames[index]
|
| 146 |
+
|
| 147 |
+
# Get data path
|
| 148 |
+
rgb_rel_path = filename_line[0]
|
| 149 |
+
|
| 150 |
+
depth_rel_path, filled_rel_path = None, None
|
| 151 |
+
if DatasetMode.RGB_ONLY != self.mode:
|
| 152 |
+
depth_rel_path = filename_line[1]
|
| 153 |
+
if self.has_filled_depth:
|
| 154 |
+
filled_rel_path = filename_line[2]
|
| 155 |
+
return rgb_rel_path, depth_rel_path, filled_rel_path
|
| 156 |
+
|
| 157 |
+
def _read_image(self, img_rel_path) -> np.ndarray:
|
| 158 |
+
if self.is_tar:
|
| 159 |
+
if self.tar_obj is None:
|
| 160 |
+
self.tar_obj = tarfile.open(self.dataset_dir)
|
| 161 |
+
image = self.tar_obj.extractfile("./" + img_rel_path)
|
| 162 |
+
image = image.read()
|
| 163 |
+
image = Image.open(io.BytesIO(image)) # [H, W, rgb]
|
| 164 |
+
else:
|
| 165 |
+
img_path = os.path.join(self.dataset_dir, img_rel_path)
|
| 166 |
+
image = Image.open(img_path).convert('RGB')
|
| 167 |
+
image = np.asarray(image)
|
| 168 |
+
return image
|
| 169 |
+
|
| 170 |
+
def _read_depth_cv2(self, img_rel_path) -> np.ndarray:
|
| 171 |
+
depth_path = os.path.join(self.dataset_dir, img_rel_path)
|
| 172 |
+
depth_in = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
|
| 173 |
+
if depth_in.shape[2] == 3: # If image has 3 channels
|
| 174 |
+
depth_in = depth_in[..., 0] # PANO
|
| 175 |
+
depth_in = depth_in.astype(np.float32)
|
| 176 |
+
return depth_in
|
| 177 |
+
|
| 178 |
+
def _read_rgb_file(self, rel_path) -> np.ndarray:
|
| 179 |
+
rgb = self._read_image(rel_path)
|
| 180 |
+
# Handle RGBA images by converting to RGB
|
| 181 |
+
if rgb.shape[2] == 4: # If image has 4 channels (RGBA)
|
| 182 |
+
rgb = rgb[:, :, :3] # Take only the RGB channels
|
| 183 |
+
rgb = np.transpose(rgb, (2, 0, 1)).astype(int) # [rgb, H, W]
|
| 184 |
+
return rgb
|
| 185 |
+
|
| 186 |
+
def _read_depth_file(self, rel_path):
|
| 187 |
+
depth_in = self._read_image(rel_path)
|
| 188 |
+
# Replace code below to decode depth according to dataset definition
|
| 189 |
+
depth_decoded = depth_in
|
| 190 |
+
|
| 191 |
+
return depth_decoded
|
| 192 |
+
|
| 193 |
+
def _get_valid_mask(self, depth: torch.Tensor):
|
| 194 |
+
valid_mask = torch.logical_and(
|
| 195 |
+
(depth > self.min_depth), (depth < self.max_depth)
|
| 196 |
+
).bool()
|
| 197 |
+
return valid_mask
|
| 198 |
+
|
| 199 |
+
def _training_preprocess(self, rasters):
|
| 200 |
+
# Augmentation
|
| 201 |
+
if self.augm_args is not None:
|
| 202 |
+
rasters = self._augment_data(rasters)
|
| 203 |
+
|
| 204 |
+
# Normalization
|
| 205 |
+
rasters["depth_raw_norm"] = self.depth_transform(
|
| 206 |
+
rasters["depth_raw_linear"], rasters["valid_mask_raw"]
|
| 207 |
+
).clone()
|
| 208 |
+
rasters["depth_filled_norm"] = self.depth_transform(
|
| 209 |
+
rasters["depth_filled_linear"], rasters["valid_mask_filled"]
|
| 210 |
+
).clone()
|
| 211 |
+
|
| 212 |
+
# Set invalid pixel to far plane
|
| 213 |
+
if self.move_invalid_to_far_plane:
|
| 214 |
+
if self.depth_transform.far_plane_at_max:
|
| 215 |
+
rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
|
| 216 |
+
self.depth_transform.norm_max
|
| 217 |
+
)
|
| 218 |
+
else:
|
| 219 |
+
rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = (
|
| 220 |
+
self.depth_transform.norm_min
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# Resize
|
| 224 |
+
if self.resize_to_hw is not None:
|
| 225 |
+
resize_transform = Resize(
|
| 226 |
+
size=self.resize_to_hw, interpolation=InterpolationMode.NEAREST_EXACT
|
| 227 |
+
)
|
| 228 |
+
rasters = {k: resize_transform(v) for k, v in rasters.items()}
|
| 229 |
+
|
| 230 |
+
return rasters
|
| 231 |
+
|
| 232 |
+
def _augment_data(self, rasters_dict):
|
| 233 |
+
# lr flipping
|
| 234 |
+
lr_flip_p = self.augm_args.lr_flip_p
|
| 235 |
+
if random.random() < lr_flip_p:
|
| 236 |
+
rasters_dict = {k: v.flip(-1) for k, v in rasters_dict.items()}
|
| 237 |
+
|
| 238 |
+
return rasters_dict
|
| 239 |
+
|
| 240 |
+
def __del__(self):
|
| 241 |
+
if self.tar_obj is not None:
|
| 242 |
+
self.tar_obj.close()
|
| 243 |
+
self.tar_obj = None
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
# Prediction file naming modes
|
| 247 |
+
class DepthFileNameMode(Enum):
|
| 248 |
+
id = 1 # id.png
|
| 249 |
+
rgb_id = 2 # rgb_id.png
|
| 250 |
+
i_d_rgb = 3 # i_d_1_rgb.png
|
| 251 |
+
rgb_i_d = 4
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def get_pred_name(rgb_basename, name_mode, suffix=".png"):
|
| 255 |
+
if DepthFileNameMode.rgb_id == name_mode:
|
| 256 |
+
pred_basename = "pred_" + rgb_basename.split("_")[1]
|
| 257 |
+
elif DepthFileNameMode.i_d_rgb == name_mode:
|
| 258 |
+
pred_basename = rgb_basename.replace("_rgb.", "_pred.")
|
| 259 |
+
elif DepthFileNameMode.id == name_mode:
|
| 260 |
+
pred_basename = "pred_" + rgb_basename
|
| 261 |
+
elif DepthFileNameMode.rgb_i_d == name_mode:
|
| 262 |
+
pred_basename = "pred_" + "_".join(rgb_basename.split("_")[1:])
|
| 263 |
+
else:
|
| 264 |
+
raise NotImplementedError
|
| 265 |
+
# change suffix
|
| 266 |
+
pred_basename = os.path.splitext(pred_basename)[0] + suffix
|
| 267 |
+
|
| 268 |
+
return pred_basename
|
DA-2-repo/eval/datasets/matterport3d_dataset.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Author: Haodong Li
|
| 2 |
+
# Last modified: 2025-05-25
|
| 3 |
+
|
| 4 |
+
from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode
|
| 5 |
+
import cv2
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
class Matterport3DDataset(BaseDepthDataset):
|
| 9 |
+
def __init__(
|
| 10 |
+
self,
|
| 11 |
+
**kwargs,
|
| 12 |
+
) -> None:
|
| 13 |
+
super().__init__(
|
| 14 |
+
min_depth=1e-3,
|
| 15 |
+
max_depth=5,
|
| 16 |
+
has_filled_depth=False,
|
| 17 |
+
name_mode=DepthFileNameMode.id,
|
| 18 |
+
**kwargs,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def _read_depth_file(self, rel_path):
|
| 22 |
+
img_path = os.path.join(self.dataset_dir, rel_path)
|
| 23 |
+
depth_in = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
|
| 24 |
+
depth_decoded = depth_in / 2560.0
|
| 25 |
+
return depth_decoded
|
DA-2-repo/eval/datasets/panosuncg_dataset.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Author: Haodong Li
|
| 2 |
+
# Last modified: 2025-05-25
|
| 3 |
+
|
| 4 |
+
from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode
|
| 5 |
+
import cv2
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
class PanoSUNCGDataset(BaseDepthDataset):
|
| 9 |
+
def __init__(
|
| 10 |
+
self,
|
| 11 |
+
**kwargs,
|
| 12 |
+
) -> None:
|
| 13 |
+
super().__init__(
|
| 14 |
+
min_depth=1e-3,
|
| 15 |
+
max_depth=5,
|
| 16 |
+
has_filled_depth=False,
|
| 17 |
+
name_mode=DepthFileNameMode.id,
|
| 18 |
+
**kwargs,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def _read_depth_file(self, rel_path):
|
| 22 |
+
img_path = os.path.join(self.dataset_dir, rel_path)
|
| 23 |
+
depth_in = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
|
| 24 |
+
depth_in = depth_in[..., 0]
|
| 25 |
+
depth_decoded = depth_in / 20.0
|
| 26 |
+
return depth_decoded
|
DA-2-repo/eval/datasets/splits/2d3ds.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
DA-2-repo/eval/datasets/splits/matterport3d.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|