Spaces:
Running
on
A10G
Running
on
A10G
Upload 29 files
Browse files- LICENSE +201 -0
- LICENSE_NVIDIA +99 -0
- LICENSE_WEIGHT +407 -0
- assets/sample_input/building.png +0 -0
- assets/sample_input/ceramic.png +0 -0
- assets/sample_input/fire.png +0 -0
- assets/sample_input/girl.png +0 -0
- assets/sample_input/hotdogs.png +0 -0
- assets/sample_input/hydrant.png +0 -0
- assets/sample_input/lamp.png +0 -0
- assets/sample_input/mailbox.png +0 -0
- assets/sample_input/owl.png +0 -0
- assets/sample_input/traffic.png +0 -0
- lrm/__init__.py +15 -0
- lrm/cam_utils.py +128 -0
- lrm/inferrer.py +260 -0
- lrm/models/__init__.py +15 -0
- lrm/models/encoders/__init__.py +15 -0
- lrm/models/encoders/dino_wrapper.py +59 -0
- lrm/models/generator.py +116 -0
- lrm/models/rendering/__init__.py +15 -0
- lrm/models/rendering/synthesizer.py +191 -0
- lrm/models/rendering/utils/__init__.py +9 -0
- lrm/models/rendering/utils/math_utils.py +118 -0
- lrm/models/rendering/utils/ray_marcher.py +65 -0
- lrm/models/rendering/utils/ray_sampler.py +81 -0
- lrm/models/rendering/utils/renderer.py +303 -0
- lrm/models/transformer.py +143 -0
- requirements.txt +7 -0
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
LICENSE_NVIDIA
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright (c) 2021-2022, NVIDIA Corporation & affiliates. All rights
|
2 |
+
reserved.
|
3 |
+
|
4 |
+
|
5 |
+
NVIDIA Source Code License for EG3D
|
6 |
+
|
7 |
+
|
8 |
+
=======================================================================
|
9 |
+
|
10 |
+
1. Definitions
|
11 |
+
|
12 |
+
"Licensor" means any person or entity that distributes its Work.
|
13 |
+
|
14 |
+
"Software" means the original work of authorship made available under
|
15 |
+
this License.
|
16 |
+
|
17 |
+
"Work" means the Software and any additions to or derivative works of
|
18 |
+
the Software that are made available under this License.
|
19 |
+
|
20 |
+
The terms "reproduce," "reproduction," "derivative works," and
|
21 |
+
"distribution" have the meaning as provided under U.S. copyright law;
|
22 |
+
provided, however, that for the purposes of this License, derivative
|
23 |
+
works shall not include works that remain separable from, or merely
|
24 |
+
link (or bind by name) to the interfaces of, the Work.
|
25 |
+
|
26 |
+
Works, including the Software, are "made available" under this License
|
27 |
+
by including in or with the Work either (a) a copyright notice
|
28 |
+
referencing the applicability of this License to the Work, or (b) a
|
29 |
+
copy of this License.
|
30 |
+
|
31 |
+
2. License Grants
|
32 |
+
|
33 |
+
2.1 Copyright Grant. Subject to the terms and conditions of this
|
34 |
+
License, each Licensor grants to you a perpetual, worldwide,
|
35 |
+
non-exclusive, royalty-free, copyright license to reproduce,
|
36 |
+
prepare derivative works of, publicly display, publicly perform,
|
37 |
+
sublicense and distribute its Work and any resulting derivative
|
38 |
+
works in any form.
|
39 |
+
|
40 |
+
3. Limitations
|
41 |
+
|
42 |
+
3.1 Redistribution. You may reproduce or distribute the Work only
|
43 |
+
if (a) you do so under this License, (b) you include a complete
|
44 |
+
copy of this License with your distribution, and (c) you retain
|
45 |
+
without modification any copyright, patent, trademark, or
|
46 |
+
attribution notices that are present in the Work.
|
47 |
+
|
48 |
+
3.2 Derivative Works. You may specify that additional or different
|
49 |
+
terms apply to the use, reproduction, and distribution of your
|
50 |
+
derivative works of the Work ("Your Terms") only if (a) Your Terms
|
51 |
+
provide that the use limitation in Section 3.3 applies to your
|
52 |
+
derivative works, and (b) you identify the specific derivative
|
53 |
+
works that are subject to Your Terms. Notwithstanding Your Terms,
|
54 |
+
this License (including the redistribution requirements in Section
|
55 |
+
3.1) will continue to apply to the Work itself.
|
56 |
+
|
57 |
+
3.3 Use Limitation. The Work and any derivative works thereof only
|
58 |
+
may be used or intended for use non-commercially. The Work or
|
59 |
+
derivative works thereof may be used or intended for use by NVIDIA
|
60 |
+
or it’s affiliates commercially or non-commercially. As used
|
61 |
+
herein, "non-commercially" means for research or evaluation
|
62 |
+
purposes only and not for any direct or indirect monetary gain.
|
63 |
+
|
64 |
+
3.4 Patent Claims. If you bring or threaten to bring a patent claim
|
65 |
+
against any Licensor (including any claim, cross-claim or
|
66 |
+
counterclaim in a lawsuit) to enforce any patents that you allege
|
67 |
+
are infringed by any Work, then your rights under this License from
|
68 |
+
such Licensor (including the grants in Sections 2.1) will terminate
|
69 |
+
immediately.
|
70 |
+
|
71 |
+
3.5 Trademarks. This License does not grant any rights to use any
|
72 |
+
Licensor’s or its affiliates’ names, logos, or trademarks, except
|
73 |
+
as necessary to reproduce the notices described in this License.
|
74 |
+
|
75 |
+
3.6 Termination. If you violate any term of this License, then your
|
76 |
+
rights under this License (including the grants in Sections 2.1)
|
77 |
+
will terminate immediately.
|
78 |
+
|
79 |
+
4. Disclaimer of Warranty.
|
80 |
+
|
81 |
+
THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
82 |
+
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
|
83 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
|
84 |
+
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
|
85 |
+
THIS LICENSE.
|
86 |
+
|
87 |
+
5. Limitation of Liability.
|
88 |
+
|
89 |
+
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
|
90 |
+
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
|
91 |
+
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
|
92 |
+
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
|
93 |
+
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
|
94 |
+
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
|
95 |
+
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
|
96 |
+
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
|
97 |
+
THE POSSIBILITY OF SUCH DAMAGES.
|
98 |
+
|
99 |
+
=======================================================================
|
LICENSE_WEIGHT
ADDED
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Attribution-NonCommercial 4.0 International
|
2 |
+
|
3 |
+
=======================================================================
|
4 |
+
|
5 |
+
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
6 |
+
does not provide legal services or legal advice. Distribution of
|
7 |
+
Creative Commons public licenses does not create a lawyer-client or
|
8 |
+
other relationship. Creative Commons makes its licenses and related
|
9 |
+
information available on an "as-is" basis. Creative Commons gives no
|
10 |
+
warranties regarding its licenses, any material licensed under their
|
11 |
+
terms and conditions, or any related information. Creative Commons
|
12 |
+
disclaims all liability for damages resulting from their use to the
|
13 |
+
fullest extent possible.
|
14 |
+
|
15 |
+
Using Creative Commons Public Licenses
|
16 |
+
|
17 |
+
Creative Commons public licenses provide a standard set of terms and
|
18 |
+
conditions that creators and other rights holders may use to share
|
19 |
+
original works of authorship and other material subject to copyright
|
20 |
+
and certain other rights specified in the public license below. The
|
21 |
+
following considerations are for informational purposes only, are not
|
22 |
+
exhaustive, and do not form part of our licenses.
|
23 |
+
|
24 |
+
Considerations for licensors: Our public licenses are
|
25 |
+
intended for use by those authorized to give the public
|
26 |
+
permission to use material in ways otherwise restricted by
|
27 |
+
copyright and certain other rights. Our licenses are
|
28 |
+
irrevocable. Licensors should read and understand the terms
|
29 |
+
and conditions of the license they choose before applying it.
|
30 |
+
Licensors should also secure all rights necessary before
|
31 |
+
applying our licenses so that the public can reuse the
|
32 |
+
material as expected. Licensors should clearly mark any
|
33 |
+
material not subject to the license. This includes other CC-
|
34 |
+
licensed material, or material used under an exception or
|
35 |
+
limitation to copyright. More considerations for licensors:
|
36 |
+
wiki.creativecommons.org/Considerations_for_licensors
|
37 |
+
|
38 |
+
Considerations for the public: By using one of our public
|
39 |
+
licenses, a licensor grants the public permission to use the
|
40 |
+
licensed material under specified terms and conditions. If
|
41 |
+
the licensor's permission is not necessary for any reason--for
|
42 |
+
example, because of any applicable exception or limitation to
|
43 |
+
copyright--then that use is not regulated by the license. Our
|
44 |
+
licenses grant only permissions under copyright and certain
|
45 |
+
other rights that a licensor has authority to grant. Use of
|
46 |
+
the licensed material may still be restricted for other
|
47 |
+
reasons, including because others have copyright or other
|
48 |
+
rights in the material. A licensor may make special requests,
|
49 |
+
such as asking that all changes be marked or described.
|
50 |
+
Although not required by our licenses, you are encouraged to
|
51 |
+
respect those requests where reasonable. More considerations
|
52 |
+
for the public:
|
53 |
+
wiki.creativecommons.org/Considerations_for_licensees
|
54 |
+
|
55 |
+
=======================================================================
|
56 |
+
|
57 |
+
Creative Commons Attribution-NonCommercial 4.0 International Public
|
58 |
+
License
|
59 |
+
|
60 |
+
By exercising the Licensed Rights (defined below), You accept and agree
|
61 |
+
to be bound by the terms and conditions of this Creative Commons
|
62 |
+
Attribution-NonCommercial 4.0 International Public License ("Public
|
63 |
+
License"). To the extent this Public License may be interpreted as a
|
64 |
+
contract, You are granted the Licensed Rights in consideration of Your
|
65 |
+
acceptance of these terms and conditions, and the Licensor grants You
|
66 |
+
such rights in consideration of benefits the Licensor receives from
|
67 |
+
making the Licensed Material available under these terms and
|
68 |
+
conditions.
|
69 |
+
|
70 |
+
|
71 |
+
Section 1 -- Definitions.
|
72 |
+
|
73 |
+
a. Adapted Material means material subject to Copyright and Similar
|
74 |
+
Rights that is derived from or based upon the Licensed Material
|
75 |
+
and in which the Licensed Material is translated, altered,
|
76 |
+
arranged, transformed, or otherwise modified in a manner requiring
|
77 |
+
permission under the Copyright and Similar Rights held by the
|
78 |
+
Licensor. For purposes of this Public License, where the Licensed
|
79 |
+
Material is a musical work, performance, or sound recording,
|
80 |
+
Adapted Material is always produced where the Licensed Material is
|
81 |
+
synched in timed relation with a moving image.
|
82 |
+
|
83 |
+
b. Adapter's License means the license You apply to Your Copyright
|
84 |
+
and Similar Rights in Your contributions to Adapted Material in
|
85 |
+
accordance with the terms and conditions of this Public License.
|
86 |
+
|
87 |
+
c. Copyright and Similar Rights means copyright and/or similar rights
|
88 |
+
closely related to copyright including, without limitation,
|
89 |
+
performance, broadcast, sound recording, and Sui Generis Database
|
90 |
+
Rights, without regard to how the rights are labeled or
|
91 |
+
categorized. For purposes of this Public License, the rights
|
92 |
+
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
93 |
+
Rights.
|
94 |
+
d. Effective Technological Measures means those measures that, in the
|
95 |
+
absence of proper authority, may not be circumvented under laws
|
96 |
+
fulfilling obligations under Article 11 of the WIPO Copyright
|
97 |
+
Treaty adopted on December 20, 1996, and/or similar international
|
98 |
+
agreements.
|
99 |
+
|
100 |
+
e. Exceptions and Limitations means fair use, fair dealing, and/or
|
101 |
+
any other exception or limitation to Copyright and Similar Rights
|
102 |
+
that applies to Your use of the Licensed Material.
|
103 |
+
|
104 |
+
f. Licensed Material means the artistic or literary work, database,
|
105 |
+
or other material to which the Licensor applied this Public
|
106 |
+
License.
|
107 |
+
|
108 |
+
g. Licensed Rights means the rights granted to You subject to the
|
109 |
+
terms and conditions of this Public License, which are limited to
|
110 |
+
all Copyright and Similar Rights that apply to Your use of the
|
111 |
+
Licensed Material and that the Licensor has authority to license.
|
112 |
+
|
113 |
+
h. Licensor means the individual(s) or entity(ies) granting rights
|
114 |
+
under this Public License.
|
115 |
+
|
116 |
+
i. NonCommercial means not primarily intended for or directed towards
|
117 |
+
commercial advantage or monetary compensation. For purposes of
|
118 |
+
this Public License, the exchange of the Licensed Material for
|
119 |
+
other material subject to Copyright and Similar Rights by digital
|
120 |
+
file-sharing or similar means is NonCommercial provided there is
|
121 |
+
no payment of monetary compensation in connection with the
|
122 |
+
exchange.
|
123 |
+
|
124 |
+
j. Share means to provide material to the public by any means or
|
125 |
+
process that requires permission under the Licensed Rights, such
|
126 |
+
as reproduction, public display, public performance, distribution,
|
127 |
+
dissemination, communication, or importation, and to make material
|
128 |
+
available to the public including in ways that members of the
|
129 |
+
public may access the material from a place and at a time
|
130 |
+
individually chosen by them.
|
131 |
+
|
132 |
+
k. Sui Generis Database Rights means rights other than copyright
|
133 |
+
resulting from Directive 96/9/EC of the European Parliament and of
|
134 |
+
the Council of 11 March 1996 on the legal protection of databases,
|
135 |
+
as amended and/or succeeded, as well as other essentially
|
136 |
+
equivalent rights anywhere in the world.
|
137 |
+
|
138 |
+
l. You means the individual or entity exercising the Licensed Rights
|
139 |
+
under this Public License. Your has a corresponding meaning.
|
140 |
+
|
141 |
+
|
142 |
+
Section 2 -- Scope.
|
143 |
+
|
144 |
+
a. License grant.
|
145 |
+
|
146 |
+
1. Subject to the terms and conditions of this Public License,
|
147 |
+
the Licensor hereby grants You a worldwide, royalty-free,
|
148 |
+
non-sublicensable, non-exclusive, irrevocable license to
|
149 |
+
exercise the Licensed Rights in the Licensed Material to:
|
150 |
+
|
151 |
+
a. reproduce and Share the Licensed Material, in whole or
|
152 |
+
in part, for NonCommercial purposes only; and
|
153 |
+
|
154 |
+
b. produce, reproduce, and Share Adapted Material for
|
155 |
+
NonCommercial purposes only.
|
156 |
+
|
157 |
+
2. Exceptions and Limitations. For the avoidance of doubt, where
|
158 |
+
Exceptions and Limitations apply to Your use, this Public
|
159 |
+
License does not apply, and You do not need to comply with
|
160 |
+
its terms and conditions.
|
161 |
+
|
162 |
+
3. Term. The term of this Public License is specified in Section
|
163 |
+
6(a).
|
164 |
+
|
165 |
+
4. Media and formats; technical modifications allowed. The
|
166 |
+
Licensor authorizes You to exercise the Licensed Rights in
|
167 |
+
all media and formats whether now known or hereafter created,
|
168 |
+
and to make technical modifications necessary to do so. The
|
169 |
+
Licensor waives and/or agrees not to assert any right or
|
170 |
+
authority to forbid You from making technical modifications
|
171 |
+
necessary to exercise the Licensed Rights, including
|
172 |
+
technical modifications necessary to circumvent Effective
|
173 |
+
Technological Measures. For purposes of this Public License,
|
174 |
+
simply making modifications authorized by this Section 2(a)
|
175 |
+
(4) never produces Adapted Material.
|
176 |
+
|
177 |
+
5. Downstream recipients.
|
178 |
+
|
179 |
+
a. Offer from the Licensor -- Licensed Material. Every
|
180 |
+
recipient of the Licensed Material automatically
|
181 |
+
receives an offer from the Licensor to exercise the
|
182 |
+
Licensed Rights under the terms and conditions of this
|
183 |
+
Public License.
|
184 |
+
|
185 |
+
b. No downstream restrictions. You may not offer or impose
|
186 |
+
any additional or different terms or conditions on, or
|
187 |
+
apply any Effective Technological Measures to, the
|
188 |
+
Licensed Material if doing so restricts exercise of the
|
189 |
+
Licensed Rights by any recipient of the Licensed
|
190 |
+
Material.
|
191 |
+
|
192 |
+
6. No endorsement. Nothing in this Public License constitutes or
|
193 |
+
may be construed as permission to assert or imply that You
|
194 |
+
are, or that Your use of the Licensed Material is, connected
|
195 |
+
with, or sponsored, endorsed, or granted official status by,
|
196 |
+
the Licensor or others designated to receive attribution as
|
197 |
+
provided in Section 3(a)(1)(A)(i).
|
198 |
+
|
199 |
+
b. Other rights.
|
200 |
+
|
201 |
+
1. Moral rights, such as the right of integrity, are not
|
202 |
+
licensed under this Public License, nor are publicity,
|
203 |
+
privacy, and/or other similar personality rights; however, to
|
204 |
+
the extent possible, the Licensor waives and/or agrees not to
|
205 |
+
assert any such rights held by the Licensor to the limited
|
206 |
+
extent necessary to allow You to exercise the Licensed
|
207 |
+
Rights, but not otherwise.
|
208 |
+
|
209 |
+
2. Patent and trademark rights are not licensed under this
|
210 |
+
Public License.
|
211 |
+
|
212 |
+
3. To the extent possible, the Licensor waives any right to
|
213 |
+
collect royalties from You for the exercise of the Licensed
|
214 |
+
Rights, whether directly or through a collecting society
|
215 |
+
under any voluntary or waivable statutory or compulsory
|
216 |
+
licensing scheme. In all other cases the Licensor expressly
|
217 |
+
reserves any right to collect such royalties, including when
|
218 |
+
the Licensed Material is used other than for NonCommercial
|
219 |
+
purposes.
|
220 |
+
|
221 |
+
|
222 |
+
Section 3 -- License Conditions.
|
223 |
+
|
224 |
+
Your exercise of the Licensed Rights is expressly made subject to the
|
225 |
+
following conditions.
|
226 |
+
|
227 |
+
a. Attribution.
|
228 |
+
|
229 |
+
1. If You Share the Licensed Material (including in modified
|
230 |
+
form), You must:
|
231 |
+
|
232 |
+
a. retain the following if it is supplied by the Licensor
|
233 |
+
with the Licensed Material:
|
234 |
+
|
235 |
+
i. identification of the creator(s) of the Licensed
|
236 |
+
Material and any others designated to receive
|
237 |
+
attribution, in any reasonable manner requested by
|
238 |
+
the Licensor (including by pseudonym if
|
239 |
+
designated);
|
240 |
+
|
241 |
+
ii. a copyright notice;
|
242 |
+
|
243 |
+
iii. a notice that refers to this Public License;
|
244 |
+
|
245 |
+
iv. a notice that refers to the disclaimer of
|
246 |
+
warranties;
|
247 |
+
|
248 |
+
v. a URI or hyperlink to the Licensed Material to the
|
249 |
+
extent reasonably practicable;
|
250 |
+
|
251 |
+
b. indicate if You modified the Licensed Material and
|
252 |
+
retain an indication of any previous modifications; and
|
253 |
+
|
254 |
+
c. indicate the Licensed Material is licensed under this
|
255 |
+
Public License, and include the text of, or the URI or
|
256 |
+
hyperlink to, this Public License.
|
257 |
+
|
258 |
+
2. You may satisfy the conditions in Section 3(a)(1) in any
|
259 |
+
reasonable manner based on the medium, means, and context in
|
260 |
+
which You Share the Licensed Material. For example, it may be
|
261 |
+
reasonable to satisfy the conditions by providing a URI or
|
262 |
+
hyperlink to a resource that includes the required
|
263 |
+
information.
|
264 |
+
|
265 |
+
3. If requested by the Licensor, You must remove any of the
|
266 |
+
information required by Section 3(a)(1)(A) to the extent
|
267 |
+
reasonably practicable.
|
268 |
+
|
269 |
+
4. If You Share Adapted Material You produce, the Adapter's
|
270 |
+
License You apply must not prevent recipients of the Adapted
|
271 |
+
Material from complying with this Public License.
|
272 |
+
|
273 |
+
|
274 |
+
Section 4 -- Sui Generis Database Rights.
|
275 |
+
|
276 |
+
Where the Licensed Rights include Sui Generis Database Rights that
|
277 |
+
apply to Your use of the Licensed Material:
|
278 |
+
|
279 |
+
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
280 |
+
to extract, reuse, reproduce, and Share all or a substantial
|
281 |
+
portion of the contents of the database for NonCommercial purposes
|
282 |
+
only;
|
283 |
+
|
284 |
+
b. if You include all or a substantial portion of the database
|
285 |
+
contents in a database in which You have Sui Generis Database
|
286 |
+
Rights, then the database in which You have Sui Generis Database
|
287 |
+
Rights (but not its individual contents) is Adapted Material; and
|
288 |
+
|
289 |
+
c. You must comply with the conditions in Section 3(a) if You Share
|
290 |
+
all or a substantial portion of the contents of the database.
|
291 |
+
|
292 |
+
For the avoidance of doubt, this Section 4 supplements and does not
|
293 |
+
replace Your obligations under this Public License where the Licensed
|
294 |
+
Rights include other Copyright and Similar Rights.
|
295 |
+
|
296 |
+
|
297 |
+
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
298 |
+
|
299 |
+
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
300 |
+
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
301 |
+
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
302 |
+
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
303 |
+
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
304 |
+
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
305 |
+
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
306 |
+
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
307 |
+
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
308 |
+
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
309 |
+
|
310 |
+
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
311 |
+
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
312 |
+
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
313 |
+
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
314 |
+
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
315 |
+
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
316 |
+
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
317 |
+
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
318 |
+
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
319 |
+
|
320 |
+
c. The disclaimer of warranties and limitation of liability provided
|
321 |
+
above shall be interpreted in a manner that, to the extent
|
322 |
+
possible, most closely approximates an absolute disclaimer and
|
323 |
+
waiver of all liability.
|
324 |
+
|
325 |
+
|
326 |
+
Section 6 -- Term and Termination.
|
327 |
+
|
328 |
+
a. This Public License applies for the term of the Copyright and
|
329 |
+
Similar Rights licensed here. However, if You fail to comply with
|
330 |
+
this Public License, then Your rights under this Public License
|
331 |
+
terminate automatically.
|
332 |
+
|
333 |
+
b. Where Your right to use the Licensed Material has terminated under
|
334 |
+
Section 6(a), it reinstates:
|
335 |
+
|
336 |
+
1. automatically as of the date the violation is cured, provided
|
337 |
+
it is cured within 30 days of Your discovery of the
|
338 |
+
violation; or
|
339 |
+
|
340 |
+
2. upon express reinstatement by the Licensor.
|
341 |
+
|
342 |
+
For the avoidance of doubt, this Section 6(b) does not affect any
|
343 |
+
right the Licensor may have to seek remedies for Your violations
|
344 |
+
of this Public License.
|
345 |
+
|
346 |
+
c. For the avoidance of doubt, the Licensor may also offer the
|
347 |
+
Licensed Material under separate terms or conditions or stop
|
348 |
+
distributing the Licensed Material at any time; however, doing so
|
349 |
+
will not terminate this Public License.
|
350 |
+
|
351 |
+
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
352 |
+
License.
|
353 |
+
|
354 |
+
|
355 |
+
Section 7 -- Other Terms and Conditions.
|
356 |
+
|
357 |
+
a. The Licensor shall not be bound by any additional or different
|
358 |
+
terms or conditions communicated by You unless expressly agreed.
|
359 |
+
|
360 |
+
b. Any arrangements, understandings, or agreements regarding the
|
361 |
+
Licensed Material not stated herein are separate from and
|
362 |
+
independent of the terms and conditions of this Public License.
|
363 |
+
|
364 |
+
|
365 |
+
Section 8 -- Interpretation.
|
366 |
+
|
367 |
+
a. For the avoidance of doubt, this Public License does not, and
|
368 |
+
shall not be interpreted to, reduce, limit, restrict, or impose
|
369 |
+
conditions on any use of the Licensed Material that could lawfully
|
370 |
+
be made without permission under this Public License.
|
371 |
+
|
372 |
+
b. To the extent possible, if any provision of this Public License is
|
373 |
+
deemed unenforceable, it shall be automatically reformed to the
|
374 |
+
minimum extent necessary to make it enforceable. If the provision
|
375 |
+
cannot be reformed, it shall be severed from this Public License
|
376 |
+
without affecting the enforceability of the remaining terms and
|
377 |
+
conditions.
|
378 |
+
|
379 |
+
c. No term or condition of this Public License will be waived and no
|
380 |
+
failure to comply consented to unless expressly agreed to by the
|
381 |
+
Licensor.
|
382 |
+
|
383 |
+
d. Nothing in this Public License constitutes or may be interpreted
|
384 |
+
as a limitation upon, or waiver of, any privileges and immunities
|
385 |
+
that apply to the Licensor or You, including from the legal
|
386 |
+
processes of any jurisdiction or authority.
|
387 |
+
|
388 |
+
=======================================================================
|
389 |
+
|
390 |
+
Creative Commons is not a party to its public
|
391 |
+
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
392 |
+
its public licenses to material it publishes and in those instances
|
393 |
+
will be considered the “Licensor.” The text of the Creative Commons
|
394 |
+
public licenses is dedicated to the public domain under the CC0 Public
|
395 |
+
Domain Dedication. Except for the limited purpose of indicating that
|
396 |
+
material is shared under a Creative Commons public license or as
|
397 |
+
otherwise permitted by the Creative Commons policies published at
|
398 |
+
creativecommons.org/policies, Creative Commons does not authorize the
|
399 |
+
use of the trademark "Creative Commons" or any other trademark or logo
|
400 |
+
of Creative Commons without its prior written consent including,
|
401 |
+
without limitation, in connection with any unauthorized modifications
|
402 |
+
to any of its public licenses or any other arrangements,
|
403 |
+
understandings, or agreements concerning use of licensed material. For
|
404 |
+
the avoidance of doubt, this paragraph does not form part of the
|
405 |
+
public licenses.
|
406 |
+
|
407 |
+
Creative Commons may be contacted at creativecommons.org.
|
assets/sample_input/building.png
ADDED
assets/sample_input/ceramic.png
ADDED
assets/sample_input/fire.png
ADDED
assets/sample_input/girl.png
ADDED
assets/sample_input/hotdogs.png
ADDED
assets/sample_input/hydrant.png
ADDED
assets/sample_input/lamp.png
ADDED
assets/sample_input/mailbox.png
ADDED
assets/sample_input/owl.png
ADDED
assets/sample_input/traffic.png
ADDED
lrm/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
#
|
15 |
+
# Empty
|
lrm/cam_utils.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
|
16 |
+
import torch
|
17 |
+
|
18 |
+
"""
|
19 |
+
R: (N, 3, 3)
|
20 |
+
T: (N, 3)
|
21 |
+
E: (N, 4, 4)
|
22 |
+
vector: (N, 3)
|
23 |
+
"""
|
24 |
+
|
25 |
+
|
26 |
+
def compose_extrinsic_R_T(R: torch.Tensor, T: torch.Tensor):
|
27 |
+
"""
|
28 |
+
Compose the standard form extrinsic matrix from R and T.
|
29 |
+
Batched I/O.
|
30 |
+
"""
|
31 |
+
RT = torch.cat((R, T.unsqueeze(-1)), dim=-1)
|
32 |
+
return compose_extrinsic_RT(RT)
|
33 |
+
|
34 |
+
|
35 |
+
def compose_extrinsic_RT(RT: torch.Tensor):
|
36 |
+
"""
|
37 |
+
Compose the standard form extrinsic matrix from RT.
|
38 |
+
Batched I/O.
|
39 |
+
"""
|
40 |
+
return torch.cat([
|
41 |
+
RT,
|
42 |
+
torch.tensor([[[0, 0, 0, 1]]], dtype=torch.float32).repeat(RT.shape[0], 1, 1)
|
43 |
+
], dim=1)
|
44 |
+
|
45 |
+
|
46 |
+
def decompose_extrinsic_R_T(E: torch.Tensor):
|
47 |
+
"""
|
48 |
+
Decompose the standard extrinsic matrix into R and T.
|
49 |
+
Batched I/O.
|
50 |
+
"""
|
51 |
+
RT = decompose_extrinsic_RT(E)
|
52 |
+
return RT[:, :, :3], RT[:, :, 3]
|
53 |
+
|
54 |
+
|
55 |
+
def decompose_extrinsic_RT(E: torch.Tensor):
|
56 |
+
"""
|
57 |
+
Decompose the standard extrinsic matrix into RT.
|
58 |
+
Batched I/O.
|
59 |
+
"""
|
60 |
+
return E[:, :3, :]
|
61 |
+
|
62 |
+
|
63 |
+
def get_normalized_camera_intrinsics(intrinsics: torch.Tensor):
|
64 |
+
"""
|
65 |
+
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
|
66 |
+
Return batched fx, fy, cx, cy
|
67 |
+
"""
|
68 |
+
fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1]
|
69 |
+
cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1]
|
70 |
+
width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1]
|
71 |
+
fx, fy = fx / width, fy / height
|
72 |
+
cx, cy = cx / width, cy / height
|
73 |
+
return fx, fy, cx, cy
|
74 |
+
|
75 |
+
|
76 |
+
def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor):
|
77 |
+
"""
|
78 |
+
RT: (N, 3, 4)
|
79 |
+
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
|
80 |
+
"""
|
81 |
+
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
|
82 |
+
return torch.cat([
|
83 |
+
RT.reshape(-1, 12),
|
84 |
+
fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1),
|
85 |
+
], dim=-1)
|
86 |
+
|
87 |
+
|
88 |
+
def build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor):
|
89 |
+
"""
|
90 |
+
RT: (N, 3, 4)
|
91 |
+
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
|
92 |
+
"""
|
93 |
+
E = compose_extrinsic_RT(RT)
|
94 |
+
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
|
95 |
+
I = torch.stack([
|
96 |
+
torch.stack([fx, torch.zeros_like(fx), cx], dim=-1),
|
97 |
+
torch.stack([torch.zeros_like(fy), fy, cy], dim=-1),
|
98 |
+
torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1),
|
99 |
+
], dim=1)
|
100 |
+
return torch.cat([
|
101 |
+
E.reshape(-1, 16),
|
102 |
+
I.reshape(-1, 9),
|
103 |
+
], dim=-1)
|
104 |
+
|
105 |
+
|
106 |
+
def center_looking_at_camera_pose(camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None):
|
107 |
+
"""
|
108 |
+
camera_position: (M, 3)
|
109 |
+
look_at: (3)
|
110 |
+
up_world: (3)
|
111 |
+
return: (M, 3, 4)
|
112 |
+
"""
|
113 |
+
# by default, looking at the origin and world up is pos-z
|
114 |
+
if look_at is None:
|
115 |
+
look_at = torch.tensor([0, 0, 0], dtype=torch.float32)
|
116 |
+
if up_world is None:
|
117 |
+
up_world = torch.tensor([0, 0, 1], dtype=torch.float32)
|
118 |
+
look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1)
|
119 |
+
up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1)
|
120 |
+
|
121 |
+
z_axis = camera_position - look_at
|
122 |
+
z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True)
|
123 |
+
x_axis = torch.cross(up_world, z_axis)
|
124 |
+
x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True)
|
125 |
+
y_axis = torch.cross(z_axis, x_axis)
|
126 |
+
y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True)
|
127 |
+
extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1)
|
128 |
+
return extrinsics
|
lrm/inferrer.py
ADDED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import math
|
18 |
+
import os
|
19 |
+
import imageio
|
20 |
+
import mcubes
|
21 |
+
import trimesh
|
22 |
+
import numpy as np
|
23 |
+
import argparse
|
24 |
+
from PIL import Image
|
25 |
+
|
26 |
+
from .models.generator import LRMGenerator
|
27 |
+
from .cam_utils import build_camera_principle, build_camera_standard, center_looking_at_camera_pose
|
28 |
+
|
29 |
+
|
30 |
+
class LRMInferrer:
|
31 |
+
def __init__(self, model_name: str):
|
32 |
+
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
33 |
+
|
34 |
+
_checkpoint = self._load_checkpoint(model_name)
|
35 |
+
_model_weights, _model_kwargs = _checkpoint['weights'], _checkpoint['kwargs']['model']
|
36 |
+
self.model = self._build_model(_model_kwargs, _model_weights).eval()
|
37 |
+
|
38 |
+
self.infer_kwargs = _checkpoint['kwargs']['infer']
|
39 |
+
|
40 |
+
def __enter__(self):
|
41 |
+
return self
|
42 |
+
|
43 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
44 |
+
pass
|
45 |
+
|
46 |
+
def _load_checkpoint(self, model_name: str, cache_dir = './.cache'):
|
47 |
+
# download checkpoint if not exists
|
48 |
+
if not os.path.exists(cache_dir):
|
49 |
+
os.makedirs(cache_dir, exist_ok=True)
|
50 |
+
if not os.path.exists(os.path.join(cache_dir, f'{model_name}.pth')):
|
51 |
+
# TODO: on-the-fly download not supported yet, plz download manually
|
52 |
+
# os.system(f'wget -O {os.path.join(cache_dir, f"{model_name}.pth")} https://zxhezexin.com/modelzoo/openlrm/{model_name}.pth')
|
53 |
+
raise FileNotFoundError(f"Checkpoint {model_name} not found in {cache_dir}")
|
54 |
+
local_path = os.path.join(cache_dir, f'{model_name}.pth')
|
55 |
+
checkpoint = torch.load(local_path, map_location=self.device)
|
56 |
+
return checkpoint
|
57 |
+
|
58 |
+
def _build_model(self, model_kwargs, model_weights):
|
59 |
+
model = LRMGenerator(**model_kwargs).to(self.device)
|
60 |
+
model.load_state_dict(model_weights)
|
61 |
+
print(f"======== Loaded model from checkpoint ========")
|
62 |
+
return model
|
63 |
+
|
64 |
+
@staticmethod
|
65 |
+
def _get_surrounding_views(M: int = 160, radius: float = 2.0, height: float = 0.8):
|
66 |
+
# M: number of surrounding views
|
67 |
+
# radius: camera dist to center
|
68 |
+
# height: height of the camera
|
69 |
+
# return: (M, 3, 4)
|
70 |
+
assert M > 0
|
71 |
+
assert radius > 0
|
72 |
+
|
73 |
+
camera_positions = []
|
74 |
+
projected_radius = math.sqrt(radius ** 2 - height ** 2)
|
75 |
+
for i in range(M):
|
76 |
+
theta = 2 * math.pi * i / M - math.pi / 2
|
77 |
+
x = projected_radius * math.cos(theta)
|
78 |
+
y = projected_radius * math.sin(theta)
|
79 |
+
z = height
|
80 |
+
camera_positions.append([x, y, z])
|
81 |
+
camera_positions = torch.tensor(camera_positions, dtype=torch.float32)
|
82 |
+
extrinsics = center_looking_at_camera_pose(camera_positions)
|
83 |
+
|
84 |
+
return extrinsics
|
85 |
+
|
86 |
+
@staticmethod
|
87 |
+
def _default_intrinsics():
|
88 |
+
# return: (3, 2)
|
89 |
+
fx = fy = 384
|
90 |
+
cx = cy = 256
|
91 |
+
w = h = 512
|
92 |
+
intrinsics = torch.tensor([
|
93 |
+
[fx, fy],
|
94 |
+
[cx, cy],
|
95 |
+
[w, h],
|
96 |
+
], dtype=torch.float32)
|
97 |
+
return intrinsics
|
98 |
+
|
99 |
+
def _default_source_camera(self, batch_size: int = 1):
|
100 |
+
# return: (N, D_cam_raw)
|
101 |
+
dist_to_center = 2
|
102 |
+
canonical_camera_extrinsics = torch.tensor([[
|
103 |
+
[1, 0, 0, 0],
|
104 |
+
[0, 0, -1, -dist_to_center],
|
105 |
+
[0, 1, 0, 0],
|
106 |
+
]], dtype=torch.float32)
|
107 |
+
canonical_camera_intrinsics = self._default_intrinsics().unsqueeze(0)
|
108 |
+
source_camera = build_camera_principle(canonical_camera_extrinsics, canonical_camera_intrinsics)
|
109 |
+
return source_camera.repeat(batch_size, 1)
|
110 |
+
|
111 |
+
def _default_render_cameras(self, batch_size: int = 1):
|
112 |
+
# return: (N, M, D_cam_render)
|
113 |
+
render_camera_extrinsics = self._get_surrounding_views()
|
114 |
+
render_camera_intrinsics = self._default_intrinsics().unsqueeze(0).repeat(render_camera_extrinsics.shape[0], 1, 1)
|
115 |
+
render_cameras = build_camera_standard(render_camera_extrinsics, render_camera_intrinsics)
|
116 |
+
return render_cameras.unsqueeze(0).repeat(batch_size, 1, 1)
|
117 |
+
|
118 |
+
@staticmethod
|
119 |
+
def images_to_video(images, output_path, fps, verbose=False):
|
120 |
+
# images: (T, C, H, W)
|
121 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
122 |
+
frames = []
|
123 |
+
for i in range(images.shape[0]):
|
124 |
+
frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
|
125 |
+
assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
|
126 |
+
f"Frame shape mismatch: {frame.shape} vs {images.shape}"
|
127 |
+
assert frame.min() >= 0 and frame.max() <= 255, \
|
128 |
+
f"Frame value out of range: {frame.min()} ~ {frame.max()}"
|
129 |
+
frames.append(frame)
|
130 |
+
imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='mpeg4', quality=10)
|
131 |
+
if verbose:
|
132 |
+
print(f"Saved video to {output_path}")
|
133 |
+
|
134 |
+
def infer_single(self, image: torch.Tensor, render_size: int, mesh_size: int, export_video: bool, export_mesh: bool):
|
135 |
+
# image: [1, C_img, H_img, W_img]
|
136 |
+
mesh_thres = 1.0
|
137 |
+
chunk_size = 2
|
138 |
+
batch_size = 1
|
139 |
+
|
140 |
+
source_camera = self._default_source_camera(batch_size).to(self.device)
|
141 |
+
render_cameras = self._default_render_cameras(batch_size).to(self.device)
|
142 |
+
|
143 |
+
with torch.no_grad():
|
144 |
+
planes = self.model.forward_planes(image, source_camera)
|
145 |
+
results = {}
|
146 |
+
|
147 |
+
if export_video:
|
148 |
+
# forward synthesizer per mini-batch
|
149 |
+
frames = []
|
150 |
+
for i in range(0, render_cameras.shape[1], chunk_size):
|
151 |
+
frames.append(
|
152 |
+
self.model.synthesizer(
|
153 |
+
planes,
|
154 |
+
render_cameras[:, i:i+chunk_size],
|
155 |
+
render_size,
|
156 |
+
)
|
157 |
+
)
|
158 |
+
# merge frames
|
159 |
+
frames = {
|
160 |
+
k: torch.cat([r[k] for r in frames], dim=1)
|
161 |
+
for k in frames[0].keys()
|
162 |
+
}
|
163 |
+
# update results
|
164 |
+
results.update({
|
165 |
+
'frames': frames,
|
166 |
+
})
|
167 |
+
|
168 |
+
if export_mesh:
|
169 |
+
grid_out = self.model.synthesizer.forward_grid(
|
170 |
+
planes=planes,
|
171 |
+
grid_size=mesh_size,
|
172 |
+
)
|
173 |
+
|
174 |
+
vtx, faces = mcubes.marching_cubes(grid_out['sigma'].squeeze(0).squeeze(-1).cpu().numpy(), mesh_thres)
|
175 |
+
vtx = vtx / (mesh_size - 1) * 2 - 1
|
176 |
+
|
177 |
+
vtx_tensor = torch.tensor(vtx, dtype=torch.float32, device=self.device).unsqueeze(0)
|
178 |
+
vtx_colors = self.model.synthesizer.forward_points(planes, vtx_tensor)['rgb'].squeeze(0).cpu().numpy() # (0, 1)
|
179 |
+
vtx_colors = (vtx_colors * 255).astype(np.uint8)
|
180 |
+
|
181 |
+
mesh = trimesh.Trimesh(vertices=vtx, faces=faces, vertex_colors=vtx_colors)
|
182 |
+
|
183 |
+
results.update({
|
184 |
+
'mesh': mesh,
|
185 |
+
})
|
186 |
+
|
187 |
+
return results
|
188 |
+
|
189 |
+
def infer(self, source_image: str, dump_path: str, source_size: int, render_size: int, mesh_size: int, export_video: bool, export_mesh: bool):
|
190 |
+
|
191 |
+
source_image_size = source_size if source_size > 0 else self.infer_kwargs['source_size']
|
192 |
+
|
193 |
+
image = torch.tensor(np.array(Image.open(source_image))).permute(2, 0, 1).unsqueeze(0) / 255.0
|
194 |
+
# if RGBA, blend to RGB
|
195 |
+
if image.shape[1] == 4:
|
196 |
+
image = image[:, :3, ...] * image[:, 3:, ...] + (1 - image[:, 3:, ...])
|
197 |
+
image = torch.nn.functional.interpolate(image, size=(source_image_size, source_image_size), mode='bicubic', align_corners=True)
|
198 |
+
image = torch.clamp(image, 0, 1)
|
199 |
+
results = self.infer_single(
|
200 |
+
image.cuda(),
|
201 |
+
render_size=render_size if render_size > 0 else self.infer_kwargs['render_size'],
|
202 |
+
mesh_size=mesh_size,
|
203 |
+
export_video=export_video,
|
204 |
+
export_mesh=export_mesh,
|
205 |
+
)
|
206 |
+
|
207 |
+
image_name = os.path.basename(source_image)
|
208 |
+
uid = image_name.split('.')[0]
|
209 |
+
|
210 |
+
os.makedirs(dump_path, exist_ok=True)
|
211 |
+
|
212 |
+
# dump video
|
213 |
+
if 'frames' in results:
|
214 |
+
renderings = results['frames']
|
215 |
+
for k, v in renderings.items():
|
216 |
+
if k == 'images_rgb':
|
217 |
+
self.images_to_video(
|
218 |
+
v[0],
|
219 |
+
os.path.join(dump_path, f'{uid}.mov'),
|
220 |
+
fps=40,
|
221 |
+
)
|
222 |
+
else:
|
223 |
+
# torch.save(v[0], os.path.join(dump_path, f'{uid}_{k}.pth'))
|
224 |
+
pass
|
225 |
+
|
226 |
+
# dump mesh
|
227 |
+
if 'mesh' in results:
|
228 |
+
mesh = results['mesh']
|
229 |
+
# save ply format mesh
|
230 |
+
mesh.export(os.path.join(dump_path, f'{uid}.ply'), 'ply')
|
231 |
+
|
232 |
+
|
233 |
+
if __name__ == '__main__':
|
234 |
+
|
235 |
+
"""
|
236 |
+
Example usage:
|
237 |
+
python -m lrm.inferrer --model_name lrm-base-obj-v1 --source_image ./assets/sample_input/owl.png --export_video --export_mesh
|
238 |
+
"""
|
239 |
+
|
240 |
+
parser = argparse.ArgumentParser()
|
241 |
+
parser.add_argument('--model_name', type=str, default='lrm-base-obj-v1')
|
242 |
+
parser.add_argument('--source_image', type=str, default='./assets/sample_input/owl.png')
|
243 |
+
parser.add_argument('--dump_path', type=str, default='./dumps')
|
244 |
+
parser.add_argument('--source_size', type=int, default=-1)
|
245 |
+
parser.add_argument('--render_size', type=int, default=-1)
|
246 |
+
parser.add_argument('--mesh_size', type=int, default=384)
|
247 |
+
parser.add_argument('--export_video', action='store_true')
|
248 |
+
parser.add_argument('--export_mesh', action='store_true')
|
249 |
+
args = parser.parse_args()
|
250 |
+
|
251 |
+
with LRMInferrer(model_name=args.model_name) as inferrer:
|
252 |
+
inferrer.infer(
|
253 |
+
source_image=args.source_image,
|
254 |
+
dump_path=args.dump_path,
|
255 |
+
source_size=args.source_size,
|
256 |
+
render_size=args.render_size,
|
257 |
+
mesh_size=args.mesh_size,
|
258 |
+
export_video=args.export_video,
|
259 |
+
export_mesh=args.export_mesh,
|
260 |
+
)
|
lrm/models/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
#
|
15 |
+
# Empty
|
lrm/models/encoders/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
#
|
15 |
+
# Empty
|
lrm/models/encoders/dino_wrapper.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
from transformers import ViTImageProcessor, ViTModel
|
18 |
+
|
19 |
+
|
20 |
+
class DinoWrapper(nn.Module):
|
21 |
+
"""
|
22 |
+
Dino v1 wrapper using huggingface transformer implementation.
|
23 |
+
"""
|
24 |
+
def __init__(self, model_name: str, freeze: bool = True):
|
25 |
+
super().__init__()
|
26 |
+
self.model, self.processor = self._build_dino(model_name)
|
27 |
+
if freeze:
|
28 |
+
self._freeze()
|
29 |
+
|
30 |
+
def forward(self, image):
|
31 |
+
# image: [N, C, H, W], on cpu
|
32 |
+
# RGB image with [0,1] scale and properly sized
|
33 |
+
inputs = self.processor(images=image, return_tensors="pt", do_rescale=False, do_resize=False).to(self.model.device)
|
34 |
+
# This resampling of positional embedding uses bicubic interpolation
|
35 |
+
outputs = self.model(**inputs, interpolate_pos_encoding=True)
|
36 |
+
last_hidden_states = outputs.last_hidden_state
|
37 |
+
return last_hidden_states
|
38 |
+
|
39 |
+
def _freeze(self):
|
40 |
+
print(f"======== Freezing DinoWrapper ========")
|
41 |
+
self.model.eval()
|
42 |
+
for name, param in self.model.named_parameters():
|
43 |
+
param.requires_grad = False
|
44 |
+
|
45 |
+
@staticmethod
|
46 |
+
def _build_dino(model_name: str, proxy_error_retries: int = 3, proxy_error_cooldown: int = 5):
|
47 |
+
import requests
|
48 |
+
try:
|
49 |
+
model = ViTModel.from_pretrained(model_name, add_pooling_layer=False)
|
50 |
+
processor = ViTImageProcessor.from_pretrained(model_name)
|
51 |
+
return model, processor
|
52 |
+
except requests.exceptions.ProxyError as err:
|
53 |
+
if proxy_error_retries > 0:
|
54 |
+
print(f"Huggingface ProxyError: Retrying in {proxy_error_cooldown} seconds...")
|
55 |
+
import time
|
56 |
+
time.sleep(proxy_error_cooldown)
|
57 |
+
return DinoWrapper._build_dino(model_name, proxy_error_retries - 1, proxy_error_cooldown)
|
58 |
+
else:
|
59 |
+
raise err
|
lrm/models/generator.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
|
18 |
+
from .encoders.dino_wrapper import DinoWrapper
|
19 |
+
from .transformer import TriplaneTransformer
|
20 |
+
from .rendering.synthesizer import TriplaneSynthesizer
|
21 |
+
|
22 |
+
|
23 |
+
class CameraEmbedder(nn.Module):
|
24 |
+
"""
|
25 |
+
Embed camera features to a high-dimensional vector.
|
26 |
+
|
27 |
+
Reference:
|
28 |
+
DiT: https://github.com/facebookresearch/DiT/blob/main/models.py#L27
|
29 |
+
"""
|
30 |
+
def __init__(self, raw_dim: int, embed_dim: int):
|
31 |
+
super().__init__()
|
32 |
+
self.mlp = nn.Sequential(
|
33 |
+
nn.Linear(raw_dim, embed_dim),
|
34 |
+
nn.SiLU(),
|
35 |
+
nn.Linear(embed_dim, embed_dim),
|
36 |
+
)
|
37 |
+
|
38 |
+
def forward(self, x):
|
39 |
+
return self.mlp(x)
|
40 |
+
|
41 |
+
|
42 |
+
class LRMGenerator(nn.Module):
|
43 |
+
"""
|
44 |
+
Full model of the large reconstruction model.
|
45 |
+
"""
|
46 |
+
def __init__(self, camera_embed_dim: int, rendering_samples_per_ray: int,
|
47 |
+
transformer_dim: int, transformer_layers: int, transformer_heads: int,
|
48 |
+
triplane_low_res: int, triplane_high_res: int, triplane_dim: int,
|
49 |
+
encoder_freeze: bool = True, encoder_model_name: str = 'facebook/dino-vitb16', encoder_feat_dim: int = 768):
|
50 |
+
super().__init__()
|
51 |
+
|
52 |
+
# attributes
|
53 |
+
self.encoder_feat_dim = encoder_feat_dim
|
54 |
+
self.camera_embed_dim = camera_embed_dim
|
55 |
+
|
56 |
+
# modules
|
57 |
+
self.encoder = DinoWrapper(
|
58 |
+
model_name=encoder_model_name,
|
59 |
+
freeze=encoder_freeze,
|
60 |
+
)
|
61 |
+
self.camera_embedder = CameraEmbedder(
|
62 |
+
raw_dim=12+4, embed_dim=camera_embed_dim,
|
63 |
+
)
|
64 |
+
self.transformer = TriplaneTransformer(
|
65 |
+
inner_dim=transformer_dim, num_layers=transformer_layers, num_heads=transformer_heads,
|
66 |
+
image_feat_dim=encoder_feat_dim,
|
67 |
+
camera_embed_dim=camera_embed_dim,
|
68 |
+
triplane_low_res=triplane_low_res, triplane_high_res=triplane_high_res, triplane_dim=triplane_dim,
|
69 |
+
)
|
70 |
+
self.synthesizer = TriplaneSynthesizer(
|
71 |
+
triplane_dim=triplane_dim, samples_per_ray=rendering_samples_per_ray,
|
72 |
+
)
|
73 |
+
|
74 |
+
def forward_planes(self, image, camera):
|
75 |
+
# image: [N, C_img, H_img, W_img]
|
76 |
+
# camera: [N, D_cam_raw]
|
77 |
+
assert image.shape[0] == camera.shape[0], "Batch size mismatch for image and camera"
|
78 |
+
N = image.shape[0]
|
79 |
+
|
80 |
+
# encode image
|
81 |
+
image_feats = self.encoder(image)
|
82 |
+
assert image_feats.shape[-1] == self.encoder_feat_dim, \
|
83 |
+
f"Feature dimension mismatch: {image_feats.shape[-1]} vs {self.encoder_feat_dim}"
|
84 |
+
|
85 |
+
# embed camera
|
86 |
+
camera_embeddings = self.camera_embedder(camera)
|
87 |
+
assert camera_embeddings.shape[-1] == self.camera_embed_dim, \
|
88 |
+
f"Feature dimension mismatch: {camera_embeddings.shape[-1]} vs {self.camera_embed_dim}"
|
89 |
+
|
90 |
+
# transformer generating planes
|
91 |
+
planes = self.transformer(image_feats, camera_embeddings)
|
92 |
+
assert planes.shape[0] == N, "Batch size mismatch for planes"
|
93 |
+
assert planes.shape[1] == 3, "Planes should have 3 channels"
|
94 |
+
|
95 |
+
return planes
|
96 |
+
|
97 |
+
def forward(self, image, source_camera, render_cameras, render_size: int):
|
98 |
+
# image: [N, C_img, H_img, W_img]
|
99 |
+
# source_camera: [N, D_cam_raw]
|
100 |
+
# render_cameras: [N, M, D_cam_render]
|
101 |
+
# render_size: int
|
102 |
+
assert image.shape[0] == source_camera.shape[0], "Batch size mismatch for image and source_camera"
|
103 |
+
assert image.shape[0] == render_cameras.shape[0], "Batch size mismatch for image and render_cameras"
|
104 |
+
N, M = render_cameras.shape[:2]
|
105 |
+
|
106 |
+
planes = self.forward_planes(image, source_camera)
|
107 |
+
|
108 |
+
# render target views
|
109 |
+
render_results = self.synthesizer(planes, render_cameras, render_size)
|
110 |
+
assert render_results['images_rgb'].shape[0] == N, "Batch size mismatch for render_results"
|
111 |
+
assert render_results['images_rgb'].shape[1] == M, "Number of rendered views should be consistent with render_cameras"
|
112 |
+
|
113 |
+
return {
|
114 |
+
'planes': planes,
|
115 |
+
**render_results,
|
116 |
+
}
|
lrm/models/rendering/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
#
|
15 |
+
# Empty
|
lrm/models/rendering/synthesizer.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ORIGINAL LICENSE
|
2 |
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
3 |
+
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
|
4 |
+
#
|
5 |
+
# Modified by Zexin He
|
6 |
+
# The modifications are subject to the same license as the original.
|
7 |
+
|
8 |
+
|
9 |
+
import itertools
|
10 |
+
import torch
|
11 |
+
import torch.nn as nn
|
12 |
+
|
13 |
+
from .utils.renderer import ImportanceRenderer
|
14 |
+
from .utils.ray_sampler import RaySampler
|
15 |
+
|
16 |
+
|
17 |
+
class OSGDecoder(nn.Module):
|
18 |
+
"""
|
19 |
+
Triplane decoder that gives RGB and sigma values from sampled features.
|
20 |
+
Using ReLU here instead of Softplus in the original implementation.
|
21 |
+
|
22 |
+
Reference:
|
23 |
+
EG3D: https://github.com/NVlabs/eg3d/blob/main/eg3d/training/triplane.py#L112
|
24 |
+
"""
|
25 |
+
def __init__(self, n_features: int,
|
26 |
+
hidden_dim: int = 64, num_layers: int = 4, activation: nn.Module = nn.ReLU):
|
27 |
+
super().__init__()
|
28 |
+
self.net = nn.Sequential(
|
29 |
+
nn.Linear(3 * n_features, hidden_dim),
|
30 |
+
activation(),
|
31 |
+
*itertools.chain(*[[
|
32 |
+
nn.Linear(hidden_dim, hidden_dim),
|
33 |
+
activation(),
|
34 |
+
] for _ in range(num_layers - 2)]),
|
35 |
+
nn.Linear(hidden_dim, 1 + 3),
|
36 |
+
)
|
37 |
+
# init all bias to zero
|
38 |
+
for m in self.modules():
|
39 |
+
if isinstance(m, nn.Linear):
|
40 |
+
nn.init.zeros_(m.bias)
|
41 |
+
|
42 |
+
def forward(self, sampled_features, ray_directions):
|
43 |
+
# Aggregate features by mean
|
44 |
+
# sampled_features = sampled_features.mean(1)
|
45 |
+
# Aggregate features by concatenation
|
46 |
+
_N, n_planes, _M, _C = sampled_features.shape
|
47 |
+
sampled_features = sampled_features.permute(0, 2, 1, 3).reshape(_N, _M, n_planes*_C)
|
48 |
+
x = sampled_features
|
49 |
+
|
50 |
+
N, M, C = x.shape
|
51 |
+
x = x.contiguous().view(N*M, C)
|
52 |
+
|
53 |
+
x = self.net(x)
|
54 |
+
x = x.view(N, M, -1)
|
55 |
+
rgb = torch.sigmoid(x[..., 1:])*(1 + 2*0.001) - 0.001 # Uses sigmoid clamping from MipNeRF
|
56 |
+
sigma = x[..., 0:1]
|
57 |
+
|
58 |
+
return {'rgb': rgb, 'sigma': sigma}
|
59 |
+
|
60 |
+
|
61 |
+
class TriplaneSynthesizer(nn.Module):
|
62 |
+
"""
|
63 |
+
Synthesizer that renders a triplane volume with planes and a camera.
|
64 |
+
|
65 |
+
Reference:
|
66 |
+
EG3D: https://github.com/NVlabs/eg3d/blob/main/eg3d/training/triplane.py#L19
|
67 |
+
"""
|
68 |
+
|
69 |
+
DEFAULT_RENDERING_KWARGS = {
|
70 |
+
'ray_start': 'auto',
|
71 |
+
'ray_end': 'auto',
|
72 |
+
'box_warp': 2.,
|
73 |
+
'white_back': True,
|
74 |
+
'disparity_space_sampling': False,
|
75 |
+
'clamp_mode': 'softplus',
|
76 |
+
'sampler_bbox_min': -1.,
|
77 |
+
'sampler_bbox_max': 1.,
|
78 |
+
}
|
79 |
+
|
80 |
+
def __init__(self, triplane_dim: int, samples_per_ray: int):
|
81 |
+
super().__init__()
|
82 |
+
|
83 |
+
# attributes
|
84 |
+
self.triplane_dim = triplane_dim
|
85 |
+
self.rendering_kwargs = {
|
86 |
+
**self.DEFAULT_RENDERING_KWARGS,
|
87 |
+
'depth_resolution': samples_per_ray // 2,
|
88 |
+
'depth_resolution_importance': samples_per_ray // 2,
|
89 |
+
}
|
90 |
+
|
91 |
+
# renderings
|
92 |
+
self.renderer = ImportanceRenderer()
|
93 |
+
self.ray_sampler = RaySampler()
|
94 |
+
|
95 |
+
# modules
|
96 |
+
self.decoder = OSGDecoder(n_features=triplane_dim)
|
97 |
+
|
98 |
+
def forward(self, planes, cameras, render_size: int):
|
99 |
+
# planes: (N, 3, D', H', W')
|
100 |
+
# cameras: (N, M, D_cam)
|
101 |
+
# render_size: int
|
102 |
+
assert planes.shape[0] == cameras.shape[0], "Batch size mismatch for planes and cameras"
|
103 |
+
N, M = cameras.shape[:2]
|
104 |
+
|
105 |
+
cam2world_matrix = cameras[..., :16].view(N, M, 4, 4)
|
106 |
+
intrinsics = cameras[..., 16:25].view(N, M, 3, 3)
|
107 |
+
|
108 |
+
# Create a batch of rays for volume rendering
|
109 |
+
ray_origins, ray_directions = self.ray_sampler(
|
110 |
+
cam2world_matrix=cam2world_matrix.reshape(-1, 4, 4),
|
111 |
+
intrinsics=intrinsics.reshape(-1, 3, 3),
|
112 |
+
render_size=render_size,
|
113 |
+
)
|
114 |
+
assert N*M == ray_origins.shape[0], "Batch size mismatch for ray_origins"
|
115 |
+
assert ray_origins.dim() == 3, "ray_origins should be 3-dimensional"
|
116 |
+
|
117 |
+
# Perform volume rendering
|
118 |
+
rgb_samples, depth_samples, weights_samples = self.renderer(
|
119 |
+
planes.repeat_interleave(M, dim=0), self.decoder, ray_origins, ray_directions, self.rendering_kwargs,
|
120 |
+
)
|
121 |
+
|
122 |
+
# Reshape into 'raw' neural-rendered image
|
123 |
+
Himg = Wimg = render_size
|
124 |
+
rgb_images = rgb_samples.permute(0, 2, 1).reshape(N, M, rgb_samples.shape[-1], Himg, Wimg).contiguous()
|
125 |
+
depth_images = depth_samples.permute(0, 2, 1).reshape(N, M, 1, Himg, Wimg)
|
126 |
+
weight_images = weights_samples.permute(0, 2, 1).reshape(N, M, 1, Himg, Wimg)
|
127 |
+
|
128 |
+
return {
|
129 |
+
'images_rgb': rgb_images,
|
130 |
+
'images_depth': depth_images,
|
131 |
+
'images_weight': weight_images,
|
132 |
+
}
|
133 |
+
|
134 |
+
def forward_grid(self, planes, grid_size: int, aabb: torch.Tensor = None):
|
135 |
+
# planes: (N, 3, D', H', W')
|
136 |
+
# grid_size: int
|
137 |
+
# aabb: (N, 2, 3)
|
138 |
+
if aabb is None:
|
139 |
+
aabb = torch.tensor([
|
140 |
+
[self.rendering_kwargs['sampler_bbox_min']] * 3,
|
141 |
+
[self.rendering_kwargs['sampler_bbox_max']] * 3,
|
142 |
+
], device=planes.device, dtype=planes.dtype).unsqueeze(0).repeat(planes.shape[0], 1, 1)
|
143 |
+
assert planes.shape[0] == aabb.shape[0], "Batch size mismatch for planes and aabb"
|
144 |
+
N = planes.shape[0]
|
145 |
+
|
146 |
+
# create grid points for triplane query
|
147 |
+
grid_points = []
|
148 |
+
for i in range(N):
|
149 |
+
grid_points.append(torch.stack(torch.meshgrid(
|
150 |
+
torch.linspace(aabb[i, 0, 0], aabb[i, 1, 0], grid_size, device=planes.device),
|
151 |
+
torch.linspace(aabb[i, 0, 1], aabb[i, 1, 1], grid_size, device=planes.device),
|
152 |
+
torch.linspace(aabb[i, 0, 2], aabb[i, 1, 2], grid_size, device=planes.device),
|
153 |
+
indexing='ij',
|
154 |
+
), dim=-1).reshape(-1, 3))
|
155 |
+
cube_grid = torch.stack(grid_points, dim=0).to(planes.device)
|
156 |
+
|
157 |
+
features = self.forward_points(planes, cube_grid)
|
158 |
+
|
159 |
+
# reshape into grid
|
160 |
+
features = {
|
161 |
+
k: v.reshape(N, grid_size, grid_size, grid_size, -1)
|
162 |
+
for k, v in features.items()
|
163 |
+
}
|
164 |
+
return features
|
165 |
+
|
166 |
+
def forward_points(self, planes, points: torch.Tensor, chunk_size: int = 2**20):
|
167 |
+
# planes: (N, 3, D', H', W')
|
168 |
+
# points: (N, P, 3)
|
169 |
+
N, P = points.shape[:2]
|
170 |
+
|
171 |
+
# query triplane in chunks
|
172 |
+
outs = []
|
173 |
+
for i in range(0, points.shape[1], chunk_size):
|
174 |
+
chunk_points = points[:, i:i+chunk_size]
|
175 |
+
|
176 |
+
# query triplane
|
177 |
+
chunk_out = self.renderer.run_model_activated(
|
178 |
+
planes=planes,
|
179 |
+
decoder=self.decoder,
|
180 |
+
sample_coordinates=chunk_points,
|
181 |
+
sample_directions=torch.zeros_like(chunk_points),
|
182 |
+
options=self.rendering_kwargs,
|
183 |
+
)
|
184 |
+
outs.append(chunk_out)
|
185 |
+
|
186 |
+
# concatenate the outputs
|
187 |
+
point_features = {
|
188 |
+
k: torch.cat([out[k] for out in outs], dim=1)
|
189 |
+
for k in outs[0].keys()
|
190 |
+
}
|
191 |
+
return point_features
|
lrm/models/rendering/utils/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
|
3 |
+
#
|
4 |
+
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
5 |
+
# property and proprietary rights in and to this material, related
|
6 |
+
# documentation and any modifications thereto. Any use, reproduction,
|
7 |
+
# disclosure or distribution of this material and related documentation
|
8 |
+
# without an express license agreement from NVIDIA CORPORATION or
|
9 |
+
# its affiliates is strictly prohibited.
|
lrm/models/rendering/utils/math_utils.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Petr Kellnhofer
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
import torch
|
24 |
+
|
25 |
+
def transform_vectors(matrix: torch.Tensor, vectors4: torch.Tensor) -> torch.Tensor:
|
26 |
+
"""
|
27 |
+
Left-multiplies MxM @ NxM. Returns NxM.
|
28 |
+
"""
|
29 |
+
res = torch.matmul(vectors4, matrix.T)
|
30 |
+
return res
|
31 |
+
|
32 |
+
|
33 |
+
def normalize_vecs(vectors: torch.Tensor) -> torch.Tensor:
|
34 |
+
"""
|
35 |
+
Normalize vector lengths.
|
36 |
+
"""
|
37 |
+
return vectors / (torch.norm(vectors, dim=-1, keepdim=True))
|
38 |
+
|
39 |
+
def torch_dot(x: torch.Tensor, y: torch.Tensor):
|
40 |
+
"""
|
41 |
+
Dot product of two tensors.
|
42 |
+
"""
|
43 |
+
return (x * y).sum(-1)
|
44 |
+
|
45 |
+
|
46 |
+
def get_ray_limits_box(rays_o: torch.Tensor, rays_d: torch.Tensor, box_side_length):
|
47 |
+
"""
|
48 |
+
Author: Petr Kellnhofer
|
49 |
+
Intersects rays with the [-1, 1] NDC volume.
|
50 |
+
Returns min and max distance of entry.
|
51 |
+
Returns -1 for no intersection.
|
52 |
+
https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-box-intersection
|
53 |
+
"""
|
54 |
+
o_shape = rays_o.shape
|
55 |
+
rays_o = rays_o.detach().reshape(-1, 3)
|
56 |
+
rays_d = rays_d.detach().reshape(-1, 3)
|
57 |
+
|
58 |
+
|
59 |
+
bb_min = [-1*(box_side_length/2), -1*(box_side_length/2), -1*(box_side_length/2)]
|
60 |
+
bb_max = [1*(box_side_length/2), 1*(box_side_length/2), 1*(box_side_length/2)]
|
61 |
+
bounds = torch.tensor([bb_min, bb_max], dtype=rays_o.dtype, device=rays_o.device)
|
62 |
+
is_valid = torch.ones(rays_o.shape[:-1], dtype=bool, device=rays_o.device)
|
63 |
+
|
64 |
+
# Precompute inverse for stability.
|
65 |
+
invdir = 1 / rays_d
|
66 |
+
sign = (invdir < 0).long()
|
67 |
+
|
68 |
+
# Intersect with YZ plane.
|
69 |
+
tmin = (bounds.index_select(0, sign[..., 0])[..., 0] - rays_o[..., 0]) * invdir[..., 0]
|
70 |
+
tmax = (bounds.index_select(0, 1 - sign[..., 0])[..., 0] - rays_o[..., 0]) * invdir[..., 0]
|
71 |
+
|
72 |
+
# Intersect with XZ plane.
|
73 |
+
tymin = (bounds.index_select(0, sign[..., 1])[..., 1] - rays_o[..., 1]) * invdir[..., 1]
|
74 |
+
tymax = (bounds.index_select(0, 1 - sign[..., 1])[..., 1] - rays_o[..., 1]) * invdir[..., 1]
|
75 |
+
|
76 |
+
# Resolve parallel rays.
|
77 |
+
is_valid[torch.logical_or(tmin > tymax, tymin > tmax)] = False
|
78 |
+
|
79 |
+
# Use the shortest intersection.
|
80 |
+
tmin = torch.max(tmin, tymin)
|
81 |
+
tmax = torch.min(tmax, tymax)
|
82 |
+
|
83 |
+
# Intersect with XY plane.
|
84 |
+
tzmin = (bounds.index_select(0, sign[..., 2])[..., 2] - rays_o[..., 2]) * invdir[..., 2]
|
85 |
+
tzmax = (bounds.index_select(0, 1 - sign[..., 2])[..., 2] - rays_o[..., 2]) * invdir[..., 2]
|
86 |
+
|
87 |
+
# Resolve parallel rays.
|
88 |
+
is_valid[torch.logical_or(tmin > tzmax, tzmin > tmax)] = False
|
89 |
+
|
90 |
+
# Use the shortest intersection.
|
91 |
+
tmin = torch.max(tmin, tzmin)
|
92 |
+
tmax = torch.min(tmax, tzmax)
|
93 |
+
|
94 |
+
# Mark invalid.
|
95 |
+
tmin[torch.logical_not(is_valid)] = -1
|
96 |
+
tmax[torch.logical_not(is_valid)] = -2
|
97 |
+
|
98 |
+
return tmin.reshape(*o_shape[:-1], 1), tmax.reshape(*o_shape[:-1], 1)
|
99 |
+
|
100 |
+
|
101 |
+
def linspace(start: torch.Tensor, stop: torch.Tensor, num: int):
|
102 |
+
"""
|
103 |
+
Creates a tensor of shape [num, *start.shape] whose values are evenly spaced from start to end, inclusive.
|
104 |
+
Replicates but the multi-dimensional bahaviour of numpy.linspace in PyTorch.
|
105 |
+
"""
|
106 |
+
# create a tensor of 'num' steps from 0 to 1
|
107 |
+
steps = torch.arange(num, dtype=torch.float32, device=start.device) / (num - 1)
|
108 |
+
|
109 |
+
# reshape the 'steps' tensor to [-1, *([1]*start.ndim)] to allow for broadcastings
|
110 |
+
# - using 'steps.reshape([-1, *([1]*start.ndim)])' would be nice here but torchscript
|
111 |
+
# "cannot statically infer the expected size of a list in this contex", hence the code below
|
112 |
+
for i in range(start.ndim):
|
113 |
+
steps = steps.unsqueeze(-1)
|
114 |
+
|
115 |
+
# the output starts at 'start' and increments until 'stop' in each dimension
|
116 |
+
out = start[None] + steps * (stop - start)[None]
|
117 |
+
|
118 |
+
return out
|
lrm/models/rendering/utils/ray_marcher.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
|
3 |
+
#
|
4 |
+
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
5 |
+
# property and proprietary rights in and to this material, related
|
6 |
+
# documentation and any modifications thereto. Any use, reproduction,
|
7 |
+
# disclosure or distribution of this material and related documentation
|
8 |
+
# without an express license agreement from NVIDIA CORPORATION or
|
9 |
+
# its affiliates is strictly prohibited.
|
10 |
+
#
|
11 |
+
# Modified by Zexin He
|
12 |
+
# The modifications are subject to the same license as the original.
|
13 |
+
|
14 |
+
|
15 |
+
"""
|
16 |
+
The ray marcher takes the raw output of the implicit representation and uses the volume rendering equation to produce composited colors and depths.
|
17 |
+
Based off of the implementation in MipNeRF (this one doesn't do any cone tracing though!)
|
18 |
+
"""
|
19 |
+
|
20 |
+
import torch
|
21 |
+
import torch.nn as nn
|
22 |
+
|
23 |
+
|
24 |
+
class MipRayMarcher2(nn.Module):
|
25 |
+
def __init__(self, activation_factory):
|
26 |
+
super().__init__()
|
27 |
+
self.activation_factory = activation_factory
|
28 |
+
|
29 |
+
def run_forward(self, colors, densities, depths, rendering_options):
|
30 |
+
deltas = depths[:, :, 1:] - depths[:, :, :-1]
|
31 |
+
colors_mid = (colors[:, :, :-1] + colors[:, :, 1:]) / 2
|
32 |
+
densities_mid = (densities[:, :, :-1] + densities[:, :, 1:]) / 2
|
33 |
+
depths_mid = (depths[:, :, :-1] + depths[:, :, 1:]) / 2
|
34 |
+
|
35 |
+
# using factory mode for better usability
|
36 |
+
densities_mid = self.activation_factory(rendering_options)(densities_mid)
|
37 |
+
|
38 |
+
density_delta = densities_mid * deltas
|
39 |
+
|
40 |
+
alpha = 1 - torch.exp(-density_delta)
|
41 |
+
|
42 |
+
alpha_shifted = torch.cat([torch.ones_like(alpha[:, :, :1]), 1-alpha + 1e-10], -2)
|
43 |
+
weights = alpha * torch.cumprod(alpha_shifted, -2)[:, :, :-1]
|
44 |
+
|
45 |
+
composite_rgb = torch.sum(weights * colors_mid, -2)
|
46 |
+
weight_total = weights.sum(2)
|
47 |
+
composite_depth = torch.sum(weights * depths_mid, -2) / weight_total
|
48 |
+
|
49 |
+
# clip the composite to min/max range of depths
|
50 |
+
composite_depth = torch.nan_to_num(composite_depth, float('inf'))
|
51 |
+
composite_depth = torch.clamp(composite_depth, torch.min(depths), torch.max(depths))
|
52 |
+
|
53 |
+
if rendering_options.get('white_back', False):
|
54 |
+
composite_rgb = composite_rgb + 1 - weight_total
|
55 |
+
|
56 |
+
# rendered value scale is 0-1, comment out original mipnerf scaling
|
57 |
+
# composite_rgb = composite_rgb * 2 - 1 # Scale to (-1, 1)
|
58 |
+
|
59 |
+
return composite_rgb, composite_depth, weights
|
60 |
+
|
61 |
+
|
62 |
+
def forward(self, colors, densities, depths, rendering_options):
|
63 |
+
composite_rgb, composite_depth, weights = self.run_forward(colors, densities, depths, rendering_options)
|
64 |
+
|
65 |
+
return composite_rgb, composite_depth, weights
|
lrm/models/rendering/utils/ray_sampler.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
|
3 |
+
#
|
4 |
+
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
5 |
+
# property and proprietary rights in and to this material, related
|
6 |
+
# documentation and any modifications thereto. Any use, reproduction,
|
7 |
+
# disclosure or distribution of this material and related documentation
|
8 |
+
# without an express license agreement from NVIDIA CORPORATION or
|
9 |
+
# its affiliates is strictly prohibited.
|
10 |
+
#
|
11 |
+
# Modified by Zexin He
|
12 |
+
# The modifications are subject to the same license as the original.
|
13 |
+
|
14 |
+
|
15 |
+
"""
|
16 |
+
The ray sampler is a module that takes in camera matrices and resolution and batches of rays.
|
17 |
+
Expects cam2world matrices that use the OpenCV camera coordinate system conventions.
|
18 |
+
"""
|
19 |
+
|
20 |
+
import torch
|
21 |
+
|
22 |
+
class RaySampler(torch.nn.Module):
|
23 |
+
def __init__(self):
|
24 |
+
super().__init__()
|
25 |
+
self.ray_origins_h, self.ray_directions, self.depths, self.image_coords, self.rendering_options = None, None, None, None, None
|
26 |
+
|
27 |
+
|
28 |
+
def forward(self, cam2world_matrix, intrinsics, render_size):
|
29 |
+
"""
|
30 |
+
Create batches of rays and return origins and directions.
|
31 |
+
|
32 |
+
cam2world_matrix: (N, 4, 4)
|
33 |
+
intrinsics: (N, 3, 3)
|
34 |
+
render_size: int
|
35 |
+
|
36 |
+
ray_origins: (N, M, 3)
|
37 |
+
ray_dirs: (N, M, 2)
|
38 |
+
"""
|
39 |
+
|
40 |
+
N, M = cam2world_matrix.shape[0], render_size**2
|
41 |
+
cam_locs_world = cam2world_matrix[:, :3, 3]
|
42 |
+
fx = intrinsics[:, 0, 0]
|
43 |
+
fy = intrinsics[:, 1, 1]
|
44 |
+
cx = intrinsics[:, 0, 2]
|
45 |
+
cy = intrinsics[:, 1, 2]
|
46 |
+
sk = intrinsics[:, 0, 1]
|
47 |
+
|
48 |
+
uv = torch.stack(torch.meshgrid(
|
49 |
+
torch.arange(render_size, dtype=torch.float32, device=cam2world_matrix.device),
|
50 |
+
torch.arange(render_size, dtype=torch.float32, device=cam2world_matrix.device),
|
51 |
+
indexing='ij',
|
52 |
+
))
|
53 |
+
uv = uv.flip(0).reshape(2, -1).transpose(1, 0)
|
54 |
+
uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)
|
55 |
+
|
56 |
+
x_cam = uv[:, :, 0].view(N, -1) * (1./render_size) + (0.5/render_size)
|
57 |
+
y_cam = uv[:, :, 1].view(N, -1) * (1./render_size) + (0.5/render_size)
|
58 |
+
z_cam = torch.ones((N, M), device=cam2world_matrix.device)
|
59 |
+
|
60 |
+
x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1)*sk.unsqueeze(-1)/fy.unsqueeze(-1) - sk.unsqueeze(-1)*y_cam/fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
|
61 |
+
y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam
|
62 |
+
|
63 |
+
cam_rel_points = torch.stack((x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)
|
64 |
+
|
65 |
+
_opencv2blender = torch.tensor([
|
66 |
+
[1, 0, 0, 0],
|
67 |
+
[0, -1, 0, 0],
|
68 |
+
[0, 0, -1, 0],
|
69 |
+
[0, 0, 0, 1],
|
70 |
+
], dtype=torch.float32, device=cam2world_matrix.device).unsqueeze(0).repeat(N, 1, 1)
|
71 |
+
|
72 |
+
cam2world_matrix = torch.bmm(cam2world_matrix, _opencv2blender)
|
73 |
+
|
74 |
+
world_rel_points = torch.bmm(cam2world_matrix, cam_rel_points.permute(0, 2, 1)).permute(0, 2, 1)[:, :, :3]
|
75 |
+
|
76 |
+
ray_dirs = world_rel_points - cam_locs_world[:, None, :]
|
77 |
+
ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)
|
78 |
+
|
79 |
+
ray_origins = cam_locs_world.unsqueeze(1).repeat(1, ray_dirs.shape[1], 1)
|
80 |
+
|
81 |
+
return ray_origins, ray_dirs
|
lrm/models/rendering/utils/renderer.py
ADDED
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 |
+
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
|
3 |
+
#
|
4 |
+
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
5 |
+
# property and proprietary rights in and to this material, related
|
6 |
+
# documentation and any modifications thereto. Any use, reproduction,
|
7 |
+
# disclosure or distribution of this material and related documentation
|
8 |
+
# without an express license agreement from NVIDIA CORPORATION or
|
9 |
+
# its affiliates is strictly prohibited.
|
10 |
+
#
|
11 |
+
# Modified by Zexin He
|
12 |
+
# The modifications are subject to the same license as the original.
|
13 |
+
|
14 |
+
|
15 |
+
"""
|
16 |
+
The renderer is a module that takes in rays, decides where to sample along each
|
17 |
+
ray, and computes pixel colors using the volume rendering equation.
|
18 |
+
"""
|
19 |
+
|
20 |
+
import torch
|
21 |
+
import torch.nn as nn
|
22 |
+
import torch.nn.functional as F
|
23 |
+
|
24 |
+
from .ray_marcher import MipRayMarcher2
|
25 |
+
from . import math_utils
|
26 |
+
|
27 |
+
def generate_planes():
|
28 |
+
"""
|
29 |
+
Defines planes by the three vectors that form the "axes" of the
|
30 |
+
plane. Should work with arbitrary number of planes and planes of
|
31 |
+
arbitrary orientation.
|
32 |
+
|
33 |
+
Bugfix reference: https://github.com/NVlabs/eg3d/issues/67
|
34 |
+
"""
|
35 |
+
return torch.tensor([[[1, 0, 0],
|
36 |
+
[0, 1, 0],
|
37 |
+
[0, 0, 1]],
|
38 |
+
[[1, 0, 0],
|
39 |
+
[0, 0, 1],
|
40 |
+
[0, 1, 0]],
|
41 |
+
[[0, 0, 1],
|
42 |
+
[0, 1, 0],
|
43 |
+
[1, 0, 0]]], dtype=torch.float32)
|
44 |
+
|
45 |
+
def project_onto_planes(planes, coordinates):
|
46 |
+
"""
|
47 |
+
Does a projection of a 3D point onto a batch of 2D planes,
|
48 |
+
returning 2D plane coordinates.
|
49 |
+
|
50 |
+
Takes plane axes of shape n_planes, 3, 3
|
51 |
+
# Takes coordinates of shape N, M, 3
|
52 |
+
# returns projections of shape N*n_planes, M, 2
|
53 |
+
"""
|
54 |
+
N, M, C = coordinates.shape
|
55 |
+
n_planes, _, _ = planes.shape
|
56 |
+
coordinates = coordinates.unsqueeze(1).expand(-1, n_planes, -1, -1).reshape(N*n_planes, M, 3)
|
57 |
+
inv_planes = torch.linalg.inv(planes).unsqueeze(0).expand(N, -1, -1, -1).reshape(N*n_planes, 3, 3)
|
58 |
+
projections = torch.bmm(coordinates, inv_planes)
|
59 |
+
return projections[..., :2]
|
60 |
+
|
61 |
+
def sample_from_planes(plane_axes, plane_features, coordinates, mode='bilinear', padding_mode='zeros', box_warp=None):
|
62 |
+
assert padding_mode == 'zeros'
|
63 |
+
N, n_planes, C, H, W = plane_features.shape
|
64 |
+
_, M, _ = coordinates.shape
|
65 |
+
plane_features = plane_features.view(N*n_planes, C, H, W)
|
66 |
+
|
67 |
+
coordinates = (2/box_warp) * coordinates # add specific box bounds
|
68 |
+
|
69 |
+
projected_coordinates = project_onto_planes(plane_axes, coordinates).unsqueeze(1)
|
70 |
+
output_features = torch.nn.functional.grid_sample(plane_features, projected_coordinates.float(), mode=mode, padding_mode=padding_mode, align_corners=False).permute(0, 3, 2, 1).reshape(N, n_planes, M, C)
|
71 |
+
return output_features
|
72 |
+
|
73 |
+
def sample_from_3dgrid(grid, coordinates):
|
74 |
+
"""
|
75 |
+
Expects coordinates in shape (batch_size, num_points_per_batch, 3)
|
76 |
+
Expects grid in shape (1, channels, H, W, D)
|
77 |
+
(Also works if grid has batch size)
|
78 |
+
Returns sampled features of shape (batch_size, num_points_per_batch, feature_channels)
|
79 |
+
"""
|
80 |
+
batch_size, n_coords, n_dims = coordinates.shape
|
81 |
+
sampled_features = torch.nn.functional.grid_sample(grid.expand(batch_size, -1, -1, -1, -1),
|
82 |
+
coordinates.reshape(batch_size, 1, 1, -1, n_dims),
|
83 |
+
mode='bilinear', padding_mode='zeros', align_corners=False)
|
84 |
+
N, C, H, W, D = sampled_features.shape
|
85 |
+
sampled_features = sampled_features.permute(0, 4, 3, 2, 1).reshape(N, H*W*D, C)
|
86 |
+
return sampled_features
|
87 |
+
|
88 |
+
class ImportanceRenderer(torch.nn.Module):
|
89 |
+
"""
|
90 |
+
Modified original version to filter out-of-box samples as TensoRF does.
|
91 |
+
|
92 |
+
Reference:
|
93 |
+
TensoRF: https://github.com/apchenstu/TensoRF/blob/main/models/tensorBase.py#L277
|
94 |
+
"""
|
95 |
+
def __init__(self):
|
96 |
+
super().__init__()
|
97 |
+
self.activation_factory = self._build_activation_factory()
|
98 |
+
self.ray_marcher = MipRayMarcher2(self.activation_factory)
|
99 |
+
self.plane_axes = generate_planes()
|
100 |
+
|
101 |
+
def _build_activation_factory(self):
|
102 |
+
def activation_factory(options: dict):
|
103 |
+
if options['clamp_mode'] == 'softplus':
|
104 |
+
return lambda x: F.softplus(x - 1) # activation bias of -1 makes things initialize better
|
105 |
+
else:
|
106 |
+
assert False, "Renderer only supports `clamp_mode`=`softplus`!"
|
107 |
+
return activation_factory
|
108 |
+
|
109 |
+
def _forward_pass(self, depths: torch.Tensor, ray_directions: torch.Tensor, ray_origins: torch.Tensor,
|
110 |
+
planes: torch.Tensor, decoder: nn.Module, rendering_options: dict):
|
111 |
+
"""
|
112 |
+
Additional filtering is applied to filter out-of-box samples.
|
113 |
+
Modifications made by Zexin He.
|
114 |
+
"""
|
115 |
+
|
116 |
+
# context related variables
|
117 |
+
batch_size, num_rays, samples_per_ray, _ = depths.shape
|
118 |
+
device = depths.device
|
119 |
+
|
120 |
+
# define sample points with depths
|
121 |
+
sample_directions = ray_directions.unsqueeze(-2).expand(-1, -1, samples_per_ray, -1).reshape(batch_size, -1, 3)
|
122 |
+
sample_coordinates = (ray_origins.unsqueeze(-2) + depths * ray_directions.unsqueeze(-2)).reshape(batch_size, -1, 3)
|
123 |
+
|
124 |
+
# filter out-of-box samples
|
125 |
+
mask_inbox = \
|
126 |
+
(rendering_options['sampler_bbox_min'] <= sample_coordinates) & \
|
127 |
+
(sample_coordinates <= rendering_options['sampler_bbox_max'])
|
128 |
+
mask_inbox = mask_inbox.all(-1)
|
129 |
+
|
130 |
+
# forward model according to all samples
|
131 |
+
_out = self.run_model(planes, decoder, sample_coordinates, sample_directions, rendering_options)
|
132 |
+
|
133 |
+
# set out-of-box samples to zeros(rgb) & -inf(sigma)
|
134 |
+
SAFE_GUARD = 3
|
135 |
+
DATA_TYPE = _out['sigma'].dtype
|
136 |
+
colors_pass = torch.zeros(batch_size, num_rays * samples_per_ray, 3, device=device, dtype=DATA_TYPE)
|
137 |
+
densities_pass = torch.nan_to_num(torch.full((batch_size, num_rays * samples_per_ray, 1), -float('inf'), device=device, dtype=DATA_TYPE)) / SAFE_GUARD
|
138 |
+
colors_pass[mask_inbox], densities_pass[mask_inbox] = _out['rgb'][mask_inbox], _out['sigma'][mask_inbox]
|
139 |
+
|
140 |
+
# reshape back
|
141 |
+
colors_pass = colors_pass.reshape(batch_size, num_rays, samples_per_ray, colors_pass.shape[-1])
|
142 |
+
densities_pass = densities_pass.reshape(batch_size, num_rays, samples_per_ray, densities_pass.shape[-1])
|
143 |
+
|
144 |
+
return colors_pass, densities_pass
|
145 |
+
|
146 |
+
def forward(self, planes, decoder, ray_origins, ray_directions, rendering_options):
|
147 |
+
# self.plane_axes = self.plane_axes.to(ray_origins.device)
|
148 |
+
|
149 |
+
if rendering_options['ray_start'] == rendering_options['ray_end'] == 'auto':
|
150 |
+
ray_start, ray_end = math_utils.get_ray_limits_box(ray_origins, ray_directions, box_side_length=rendering_options['box_warp'])
|
151 |
+
is_ray_valid = ray_end > ray_start
|
152 |
+
if torch.any(is_ray_valid).item():
|
153 |
+
ray_start[~is_ray_valid] = ray_start[is_ray_valid].min()
|
154 |
+
ray_end[~is_ray_valid] = ray_start[is_ray_valid].max()
|
155 |
+
depths_coarse = self.sample_stratified(ray_origins, ray_start, ray_end, rendering_options['depth_resolution'], rendering_options['disparity_space_sampling'])
|
156 |
+
else:
|
157 |
+
# Create stratified depth samples
|
158 |
+
depths_coarse = self.sample_stratified(ray_origins, rendering_options['ray_start'], rendering_options['ray_end'], rendering_options['depth_resolution'], rendering_options['disparity_space_sampling'])
|
159 |
+
|
160 |
+
# Coarse Pass
|
161 |
+
colors_coarse, densities_coarse = self._forward_pass(
|
162 |
+
depths=depths_coarse, ray_directions=ray_directions, ray_origins=ray_origins,
|
163 |
+
planes=planes, decoder=decoder, rendering_options=rendering_options)
|
164 |
+
|
165 |
+
# Fine Pass
|
166 |
+
N_importance = rendering_options['depth_resolution_importance']
|
167 |
+
if N_importance > 0:
|
168 |
+
_, _, weights = self.ray_marcher(colors_coarse, densities_coarse, depths_coarse, rendering_options)
|
169 |
+
|
170 |
+
depths_fine = self.sample_importance(depths_coarse, weights, N_importance)
|
171 |
+
|
172 |
+
colors_fine, densities_fine = self._forward_pass(
|
173 |
+
depths=depths_fine, ray_directions=ray_directions, ray_origins=ray_origins,
|
174 |
+
planes=planes, decoder=decoder, rendering_options=rendering_options)
|
175 |
+
|
176 |
+
all_depths, all_colors, all_densities = self.unify_samples(depths_coarse, colors_coarse, densities_coarse,
|
177 |
+
depths_fine, colors_fine, densities_fine)
|
178 |
+
|
179 |
+
# Aggregate
|
180 |
+
rgb_final, depth_final, weights = self.ray_marcher(all_colors, all_densities, all_depths, rendering_options)
|
181 |
+
else:
|
182 |
+
rgb_final, depth_final, weights = self.ray_marcher(colors_coarse, densities_coarse, depths_coarse, rendering_options)
|
183 |
+
|
184 |
+
return rgb_final, depth_final, weights.sum(2)
|
185 |
+
|
186 |
+
def run_model(self, planes, decoder, sample_coordinates, sample_directions, options):
|
187 |
+
plane_axes = self.plane_axes.to(planes.device)
|
188 |
+
sampled_features = sample_from_planes(plane_axes, planes, sample_coordinates, padding_mode='zeros', box_warp=options['box_warp'])
|
189 |
+
|
190 |
+
out = decoder(sampled_features, sample_directions)
|
191 |
+
if options.get('density_noise', 0) > 0:
|
192 |
+
out['sigma'] += torch.randn_like(out['sigma']) * options['density_noise']
|
193 |
+
return out
|
194 |
+
|
195 |
+
def run_model_activated(self, planes, decoder, sample_coordinates, sample_directions, options):
|
196 |
+
out = self.run_model(planes, decoder, sample_coordinates, sample_directions, options)
|
197 |
+
out['sigma'] = self.activation_factory(options)(out['sigma'])
|
198 |
+
return out
|
199 |
+
|
200 |
+
def sort_samples(self, all_depths, all_colors, all_densities):
|
201 |
+
_, indices = torch.sort(all_depths, dim=-2)
|
202 |
+
all_depths = torch.gather(all_depths, -2, indices)
|
203 |
+
all_colors = torch.gather(all_colors, -2, indices.expand(-1, -1, -1, all_colors.shape[-1]))
|
204 |
+
all_densities = torch.gather(all_densities, -2, indices.expand(-1, -1, -1, 1))
|
205 |
+
return all_depths, all_colors, all_densities
|
206 |
+
|
207 |
+
def unify_samples(self, depths1, colors1, densities1, depths2, colors2, densities2):
|
208 |
+
all_depths = torch.cat([depths1, depths2], dim = -2)
|
209 |
+
all_colors = torch.cat([colors1, colors2], dim = -2)
|
210 |
+
all_densities = torch.cat([densities1, densities2], dim = -2)
|
211 |
+
|
212 |
+
_, indices = torch.sort(all_depths, dim=-2)
|
213 |
+
all_depths = torch.gather(all_depths, -2, indices)
|
214 |
+
all_colors = torch.gather(all_colors, -2, indices.expand(-1, -1, -1, all_colors.shape[-1]))
|
215 |
+
all_densities = torch.gather(all_densities, -2, indices.expand(-1, -1, -1, 1))
|
216 |
+
|
217 |
+
return all_depths, all_colors, all_densities
|
218 |
+
|
219 |
+
def sample_stratified(self, ray_origins, ray_start, ray_end, depth_resolution, disparity_space_sampling=False):
|
220 |
+
"""
|
221 |
+
Return depths of approximately uniformly spaced samples along rays.
|
222 |
+
"""
|
223 |
+
N, M, _ = ray_origins.shape
|
224 |
+
if disparity_space_sampling:
|
225 |
+
depths_coarse = torch.linspace(0,
|
226 |
+
1,
|
227 |
+
depth_resolution,
|
228 |
+
device=ray_origins.device).reshape(1, 1, depth_resolution, 1).repeat(N, M, 1, 1)
|
229 |
+
depth_delta = 1/(depth_resolution - 1)
|
230 |
+
depths_coarse += torch.rand_like(depths_coarse) * depth_delta
|
231 |
+
depths_coarse = 1./(1./ray_start * (1. - depths_coarse) + 1./ray_end * depths_coarse)
|
232 |
+
else:
|
233 |
+
if type(ray_start) == torch.Tensor:
|
234 |
+
depths_coarse = math_utils.linspace(ray_start, ray_end, depth_resolution).permute(1,2,0,3)
|
235 |
+
depth_delta = (ray_end - ray_start) / (depth_resolution - 1)
|
236 |
+
depths_coarse += torch.rand_like(depths_coarse) * depth_delta[..., None]
|
237 |
+
else:
|
238 |
+
depths_coarse = torch.linspace(ray_start, ray_end, depth_resolution, device=ray_origins.device).reshape(1, 1, depth_resolution, 1).repeat(N, M, 1, 1)
|
239 |
+
depth_delta = (ray_end - ray_start)/(depth_resolution - 1)
|
240 |
+
depths_coarse += torch.rand_like(depths_coarse) * depth_delta
|
241 |
+
|
242 |
+
return depths_coarse
|
243 |
+
|
244 |
+
def sample_importance(self, z_vals, weights, N_importance):
|
245 |
+
"""
|
246 |
+
Return depths of importance sampled points along rays. See NeRF importance sampling for more.
|
247 |
+
"""
|
248 |
+
with torch.no_grad():
|
249 |
+
batch_size, num_rays, samples_per_ray, _ = z_vals.shape
|
250 |
+
|
251 |
+
z_vals = z_vals.reshape(batch_size * num_rays, samples_per_ray)
|
252 |
+
weights = weights.reshape(batch_size * num_rays, -1) # -1 to account for loss of 1 sample in MipRayMarcher
|
253 |
+
|
254 |
+
# smooth weights
|
255 |
+
weights = torch.nn.functional.max_pool1d(weights.unsqueeze(1).float(), 2, 1, padding=1)
|
256 |
+
weights = torch.nn.functional.avg_pool1d(weights, 2, 1).squeeze()
|
257 |
+
weights = weights + 0.01
|
258 |
+
|
259 |
+
z_vals_mid = 0.5 * (z_vals[: ,:-1] + z_vals[: ,1:])
|
260 |
+
importance_z_vals = self.sample_pdf(z_vals_mid, weights[:, 1:-1],
|
261 |
+
N_importance).detach().reshape(batch_size, num_rays, N_importance, 1)
|
262 |
+
return importance_z_vals
|
263 |
+
|
264 |
+
def sample_pdf(self, bins, weights, N_importance, det=False, eps=1e-5):
|
265 |
+
"""
|
266 |
+
Sample @N_importance samples from @bins with distribution defined by @weights.
|
267 |
+
Inputs:
|
268 |
+
bins: (N_rays, N_samples_+1) where N_samples_ is "the number of coarse samples per ray - 2"
|
269 |
+
weights: (N_rays, N_samples_)
|
270 |
+
N_importance: the number of samples to draw from the distribution
|
271 |
+
det: deterministic or not
|
272 |
+
eps: a small number to prevent division by zero
|
273 |
+
Outputs:
|
274 |
+
samples: the sampled samples
|
275 |
+
"""
|
276 |
+
N_rays, N_samples_ = weights.shape
|
277 |
+
weights = weights + eps # prevent division by zero (don't do inplace op!)
|
278 |
+
pdf = weights / torch.sum(weights, -1, keepdim=True) # (N_rays, N_samples_)
|
279 |
+
cdf = torch.cumsum(pdf, -1) # (N_rays, N_samples), cumulative distribution function
|
280 |
+
cdf = torch.cat([torch.zeros_like(cdf[: ,:1]), cdf], -1) # (N_rays, N_samples_+1)
|
281 |
+
# padded to 0~1 inclusive
|
282 |
+
|
283 |
+
if det:
|
284 |
+
u = torch.linspace(0, 1, N_importance, device=bins.device)
|
285 |
+
u = u.expand(N_rays, N_importance)
|
286 |
+
else:
|
287 |
+
u = torch.rand(N_rays, N_importance, device=bins.device)
|
288 |
+
u = u.contiguous()
|
289 |
+
|
290 |
+
inds = torch.searchsorted(cdf, u, right=True)
|
291 |
+
below = torch.clamp_min(inds-1, 0)
|
292 |
+
above = torch.clamp_max(inds, N_samples_)
|
293 |
+
|
294 |
+
inds_sampled = torch.stack([below, above], -1).view(N_rays, 2*N_importance)
|
295 |
+
cdf_g = torch.gather(cdf, 1, inds_sampled).view(N_rays, N_importance, 2)
|
296 |
+
bins_g = torch.gather(bins, 1, inds_sampled).view(N_rays, N_importance, 2)
|
297 |
+
|
298 |
+
denom = cdf_g[...,1]-cdf_g[...,0]
|
299 |
+
denom[denom<eps] = 1 # denom equals 0 means a bin has weight 0, in which case it will not be sampled
|
300 |
+
# anyway, therefore any value for it is fine (set to 1 here)
|
301 |
+
|
302 |
+
samples = bins_g[...,0] + (u-cdf_g[...,0])/denom * (bins_g[...,1]-bins_g[...,0])
|
303 |
+
return samples
|
lrm/models/transformer.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Zexin He
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import torch.nn as nn
|
18 |
+
|
19 |
+
|
20 |
+
class ModLN(nn.Module):
|
21 |
+
"""
|
22 |
+
Modulation with adaLN.
|
23 |
+
|
24 |
+
References:
|
25 |
+
DiT: https://github.com/facebookresearch/DiT/blob/main/models.py#L101
|
26 |
+
"""
|
27 |
+
def __init__(self, inner_dim: int, mod_dim: int, eps: float):
|
28 |
+
super().__init__()
|
29 |
+
self.norm = nn.LayerNorm(inner_dim, eps=eps)
|
30 |
+
self.mlp = nn.Sequential(
|
31 |
+
nn.SiLU(),
|
32 |
+
nn.Linear(mod_dim, inner_dim * 2),
|
33 |
+
)
|
34 |
+
|
35 |
+
@staticmethod
|
36 |
+
def modulate(x, shift, scale):
|
37 |
+
# x: [N, L, D]
|
38 |
+
# shift, scale: [N, D]
|
39 |
+
return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
|
40 |
+
|
41 |
+
def forward(self, x, cond):
|
42 |
+
shift, scale = self.mlp(cond).chunk(2, dim=-1) # [N, D]
|
43 |
+
return self.modulate(self.norm(x), shift, scale) # [N, L, D]
|
44 |
+
|
45 |
+
|
46 |
+
class ConditionModulationBlock(nn.Module):
|
47 |
+
"""
|
48 |
+
Transformer block that takes in a cross-attention condition and another modulation vector applied to sub-blocks.
|
49 |
+
"""
|
50 |
+
# use attention from torch.nn.MultiHeadAttention
|
51 |
+
# Block contains a cross-attention layer, a self-attention layer, and a MLP
|
52 |
+
def __init__(self, inner_dim: int, cond_dim: int, mod_dim: int, num_heads: int, eps: float,
|
53 |
+
attn_drop: float = 0., attn_bias: bool = False,
|
54 |
+
mlp_ratio: float = 4., mlp_drop: float = 0.):
|
55 |
+
super().__init__()
|
56 |
+
self.norm1 = ModLN(inner_dim, mod_dim, eps)
|
57 |
+
self.cross_attn = nn.MultiheadAttention(
|
58 |
+
embed_dim=inner_dim, num_heads=num_heads, kdim=cond_dim, vdim=cond_dim,
|
59 |
+
dropout=attn_drop, bias=attn_bias, batch_first=True)
|
60 |
+
self.norm2 = ModLN(inner_dim, mod_dim, eps)
|
61 |
+
self.self_attn = nn.MultiheadAttention(
|
62 |
+
embed_dim=inner_dim, num_heads=num_heads,
|
63 |
+
dropout=attn_drop, bias=attn_bias, batch_first=True)
|
64 |
+
self.norm3 = ModLN(inner_dim, mod_dim, eps)
|
65 |
+
self.mlp = nn.Sequential(
|
66 |
+
nn.Linear(inner_dim, int(inner_dim * mlp_ratio)),
|
67 |
+
nn.GELU(),
|
68 |
+
nn.Dropout(mlp_drop),
|
69 |
+
nn.Linear(int(inner_dim * mlp_ratio), inner_dim),
|
70 |
+
nn.Dropout(mlp_drop),
|
71 |
+
)
|
72 |
+
|
73 |
+
def forward(self, x, cond, mod):
|
74 |
+
# x: [N, L, D]
|
75 |
+
# cond: [N, L_cond, D_cond]
|
76 |
+
# mod: [N, D_mod]
|
77 |
+
x = x + self.cross_attn(self.norm1(x, mod), cond, cond)[0]
|
78 |
+
before_sa = self.norm2(x, mod)
|
79 |
+
x = x + self.self_attn(before_sa, before_sa, before_sa)[0]
|
80 |
+
x = x + self.mlp(self.norm3(x, mod))
|
81 |
+
return x
|
82 |
+
|
83 |
+
|
84 |
+
class TriplaneTransformer(nn.Module):
|
85 |
+
"""
|
86 |
+
Transformer with condition and modulation that generates a triplane representation.
|
87 |
+
|
88 |
+
Reference:
|
89 |
+
Timm: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L486
|
90 |
+
"""
|
91 |
+
def __init__(self, inner_dim: int, image_feat_dim: int, camera_embed_dim: int,
|
92 |
+
triplane_low_res: int, triplane_high_res: int, triplane_dim: int,
|
93 |
+
num_layers: int, num_heads: int,
|
94 |
+
eps: float = 1e-6):
|
95 |
+
super().__init__()
|
96 |
+
|
97 |
+
# attributes
|
98 |
+
self.triplane_low_res = triplane_low_res
|
99 |
+
self.triplane_high_res = triplane_high_res
|
100 |
+
self.triplane_dim = triplane_dim
|
101 |
+
|
102 |
+
# modules
|
103 |
+
# initialize pos_embed with 1/sqrt(dim) * N(0, 1)
|
104 |
+
self.pos_embed = nn.Parameter(torch.randn(1, 3*triplane_low_res**2, inner_dim) * (1. / inner_dim) ** 0.5)
|
105 |
+
self.layers = nn.ModuleList([
|
106 |
+
ConditionModulationBlock(
|
107 |
+
inner_dim=inner_dim, cond_dim=image_feat_dim, mod_dim=camera_embed_dim, num_heads=num_heads, eps=eps)
|
108 |
+
for _ in range(num_layers)
|
109 |
+
])
|
110 |
+
self.norm = nn.LayerNorm(inner_dim, eps=eps)
|
111 |
+
self.deconv = nn.ConvTranspose2d(inner_dim, triplane_dim, kernel_size=2, stride=2, padding=0)
|
112 |
+
|
113 |
+
def forward(self, image_feats, camera_embeddings):
|
114 |
+
# image_feats: [N, L_cond, D_cond]
|
115 |
+
# camera_embeddings: [N, D_mod]
|
116 |
+
|
117 |
+
assert image_feats.shape[0] == camera_embeddings.shape[0], \
|
118 |
+
f"Mismatched batch size: {image_feats.shape[0]} vs {camera_embeddings.shape[0]}"
|
119 |
+
|
120 |
+
N = image_feats.shape[0]
|
121 |
+
H = W = self.triplane_low_res
|
122 |
+
L = 3 * H * W
|
123 |
+
|
124 |
+
x = self.pos_embed.repeat(N, 1, 1) # [N, L, D]
|
125 |
+
for layer in self.layers:
|
126 |
+
x = layer(x, image_feats, camera_embeddings)
|
127 |
+
x = self.norm(x)
|
128 |
+
|
129 |
+
# separate each plane and apply deconv
|
130 |
+
x = x.view(N, 3, H, W, -1)
|
131 |
+
x = torch.einsum('nihwd->indhw', x) # [3, N, D, H, W]
|
132 |
+
x = x.contiguous().view(3*N, -1, H, W) # [3*N, D, H, W]
|
133 |
+
x = self.deconv(x) # [3*N, D', H', W']
|
134 |
+
x = x.view(3, N, *x.shape[-3:]) # [3, N, D', H', W']
|
135 |
+
x = torch.einsum('indhw->nidhw', x) # [N, 3, D', H', W']
|
136 |
+
x = x.contiguous()
|
137 |
+
|
138 |
+
assert self.triplane_high_res == x.shape[-2], \
|
139 |
+
f"Output triplane resolution does not match with expected: {x.shape[-2]} vs {self.triplane_high_res}"
|
140 |
+
assert self.triplane_dim == x.shape[-3], \
|
141 |
+
f"Output triplane dimension does not match with expected: {x.shape[-3]} vs {self.triplane_dim}"
|
142 |
+
|
143 |
+
return x
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch>=2.1.0
|
2 |
+
transformers
|
3 |
+
omegaconf
|
4 |
+
pillow
|
5 |
+
imageio[ffmpeg]
|
6 |
+
PyMCubes
|
7 |
+
trimesh
|