Spaces:

zxhezexin
/

OpenLRM

Sleeping

App Files Files

zxhezexin commited on Mar 4, 2024

Commit

87c0d1b

1 Parent(s): 1ac1801

Clean spaces

Browse files

Files changed (34) hide show

.gitattributes +0 -35
LICENSE +0 -201
LICENSE_NVIDIA +0 -99
LICENSE_WEIGHT +0 -407
README.md +0 -13
app.py +0 -167
assets/sample_input/building.png +0 -0
assets/sample_input/cartoon.png +0 -0
assets/sample_input/ceramic.png +0 -0
assets/sample_input/fire.png +0 -0
assets/sample_input/girl.png +0 -0
assets/sample_input/hotdogs.png +0 -0
assets/sample_input/hydrant.png +0 -0
assets/sample_input/lamp.png +0 -0
assets/sample_input/mailbox.png +0 -0
assets/sample_input/owl.png +0 -0
assets/sample_input/traffic.png +0 -0
lrm/__init__.py +0 -15
lrm/cam_utils.py +0 -128
lrm/inferrer.py +0 -271
lrm/models/__init__.py +0 -15
lrm/models/encoders/__init__.py +0 -15
lrm/models/encoders/dino_wrapper.py +0 -59
lrm/models/generator.py +0 -116
lrm/models/rendering/__init__.py +0 -15
lrm/models/rendering/synthesizer.py +0 -191
lrm/models/rendering/utils/__init__.py +0 -9
lrm/models/rendering/utils/math_utils.py +0 -118
lrm/models/rendering/utils/ray_marcher.py +0 -65
lrm/models/rendering/utils/ray_sampler.py +0 -81
lrm/models/rendering/utils/renderer.py +0 -303
lrm/models/transformer.py +0 -143
rembg_and_center.py +0 -81
requirements.txt +0 -11

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

LICENSE DELETED Viewed

@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-   1. Definitions.
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-   END OF TERMS AND CONDITIONS
-   APPENDIX: How to apply the Apache License to your work.
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-       http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.

LICENSE_NVIDIA DELETED Viewed

@@ -1,99 +0,0 @@
-Copyright (c) 2021-2022, NVIDIA Corporation & affiliates. All rights
-reserved.
-NVIDIA Source Code License for EG3D
-=======================================================================
-1. Definitions
-"Licensor" means any person or entity that distributes its Work.
-"Software" means the original work of authorship made available under
-this License.
-"Work" means the Software and any additions to or derivative works of
-the Software that are made available under this License.
-The terms "reproduce," "reproduction," "derivative works," and
-"distribution" have the meaning as provided under U.S. copyright law;
-provided, however, that for the purposes of this License, derivative
-works shall not include works that remain separable from, or merely
-link (or bind by name) to the interfaces of, the Work.
-Works, including the Software, are "made available" under this License
-by including in or with the Work either (a) a copyright notice
-referencing the applicability of this License to the Work, or (b) a
-copy of this License.
-2. License Grants
-    2.1 Copyright Grant. Subject to the terms and conditions of this
-    License, each Licensor grants to you a perpetual, worldwide,
-    non-exclusive, royalty-free, copyright license to reproduce,
-    prepare derivative works of, publicly display, publicly perform,
-    sublicense and distribute its Work and any resulting derivative
-    works in any form.
-3. Limitations
-    3.1 Redistribution. You may reproduce or distribute the Work only
-    if (a) you do so under this License, (b) you include a complete
-    copy of this License with your distribution, and (c) you retain
-    without modification any copyright, patent, trademark, or
-    attribution notices that are present in the Work.
-    3.2 Derivative Works. You may specify that additional or different
-    terms apply to the use, reproduction, and distribution of your
-    derivative works of the Work ("Your Terms") only if (a) Your Terms
-    provide that the use limitation in Section 3.3 applies to your
-    derivative works, and (b) you identify the specific derivative
-    works that are subject to Your Terms. Notwithstanding Your Terms,
-    this License (including the redistribution requirements in Section
-    3.1) will continue to apply to the Work itself.
-    3.3 Use Limitation. The Work and any derivative works thereof only
-    may be used or intended for use non-commercially. The Work or
-    derivative works thereof may be used or intended for use by NVIDIA
-    or it’s affiliates commercially or non-commercially. As used
-    herein, "non-commercially" means for research or evaluation
-    purposes only and not for any direct or indirect monetary gain.
-    3.4 Patent Claims. If you bring or threaten to bring a patent claim
-    against any Licensor (including any claim, cross-claim or
-    counterclaim in a lawsuit) to enforce any patents that you allege
-    are infringed by any Work, then your rights under this License from
-    such Licensor (including the grants in Sections 2.1) will terminate
-    immediately.
-    3.5 Trademarks. This License does not grant any rights to use any
-    Licensor’s or its affiliates’ names, logos, or trademarks, except
-    as necessary to reproduce the notices described in this License.
-    3.6 Termination. If you violate any term of this License, then your
-    rights under this License (including the grants in Sections 2.1)
-    will terminate immediately.
-4. Disclaimer of Warranty.
-THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
-NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
-THIS LICENSE.
-5. Limitation of Liability.
-EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
-THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
-SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
-INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
-OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
-(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
-LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
-COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGES.
-=======================================================================

LICENSE_WEIGHT DELETED Viewed

@@ -1,407 +0,0 @@
-Attribution-NonCommercial 4.0 International
-=======================================================================
-Creative Commons Corporation ("Creative Commons") is not a law firm and
-does not provide legal services or legal advice. Distribution of
-Creative Commons public licenses does not create a lawyer-client or
-other relationship. Creative Commons makes its licenses and related
-information available on an "as-is" basis. Creative Commons gives no
-warranties regarding its licenses, any material licensed under their
-terms and conditions, or any related information. Creative Commons
-disclaims all liability for damages resulting from their use to the
-fullest extent possible.
-Using Creative Commons Public Licenses
-Creative Commons public licenses provide a standard set of terms and
-conditions that creators and other rights holders may use to share
-original works of authorship and other material subject to copyright
-and certain other rights specified in the public license below. The
-following considerations are for informational purposes only, are not
-exhaustive, and do not form part of our licenses.
-     Considerations for licensors: Our public licenses are
-     intended for use by those authorized to give the public
-     permission to use material in ways otherwise restricted by
-     copyright and certain other rights. Our licenses are
-     irrevocable. Licensors should read and understand the terms
-     and conditions of the license they choose before applying it.
-     Licensors should also secure all rights necessary before
-     applying our licenses so that the public can reuse the
-     material as expected. Licensors should clearly mark any
-     material not subject to the license. This includes other CC-
-     licensed material, or material used under an exception or
-     limitation to copyright. More considerations for licensors:
-    wiki.creativecommons.org/Considerations_for_licensors
-     Considerations for the public: By using one of our public
-     licenses, a licensor grants the public permission to use the
-     licensed material under specified terms and conditions. If
-     the licensor's permission is not necessary for any reason--for
-     example, because of any applicable exception or limitation to
-     copyright--then that use is not regulated by the license. Our
-     licenses grant only permissions under copyright and certain
-     other rights that a licensor has authority to grant. Use of
-     the licensed material may still be restricted for other
-     reasons, including because others have copyright or other
-     rights in the material. A licensor may make special requests,
-     such as asking that all changes be marked or described.
-     Although not required by our licenses, you are encouraged to
-     respect those requests where reasonable. More considerations
-     for the public:
-    wiki.creativecommons.org/Considerations_for_licensees
-=======================================================================
-Creative Commons Attribution-NonCommercial 4.0 International Public
-License
-By exercising the Licensed Rights (defined below), You accept and agree
-to be bound by the terms and conditions of this Creative Commons
-Attribution-NonCommercial 4.0 International Public License ("Public
-License"). To the extent this Public License may be interpreted as a
-contract, You are granted the Licensed Rights in consideration of Your
-acceptance of these terms and conditions, and the Licensor grants You
-such rights in consideration of benefits the Licensor receives from
-making the Licensed Material available under these terms and
-conditions.
-Section 1 -- Definitions.
-  a. Adapted Material means material subject to Copyright and Similar
-     Rights that is derived from or based upon the Licensed Material
-     and in which the Licensed Material is translated, altered,
-     arranged, transformed, or otherwise modified in a manner requiring
-     permission under the Copyright and Similar Rights held by the
-     Licensor. For purposes of this Public License, where the Licensed
-     Material is a musical work, performance, or sound recording,
-     Adapted Material is always produced where the Licensed Material is
-     synched in timed relation with a moving image.
-  b. Adapter's License means the license You apply to Your Copyright
-     and Similar Rights in Your contributions to Adapted Material in
-     accordance with the terms and conditions of this Public License.
-  c. Copyright and Similar Rights means copyright and/or similar rights
-     closely related to copyright including, without limitation,
-     performance, broadcast, sound recording, and Sui Generis Database
-     Rights, without regard to how the rights are labeled or
-     categorized. For purposes of this Public License, the rights
-     specified in Section 2(b)(1)-(2) are not Copyright and Similar
-     Rights.
-  d. Effective Technological Measures means those measures that, in the
-     absence of proper authority, may not be circumvented under laws
-     fulfilling obligations under Article 11 of the WIPO Copyright
-     Treaty adopted on December 20, 1996, and/or similar international
-     agreements.
-  e. Exceptions and Limitations means fair use, fair dealing, and/or
-     any other exception or limitation to Copyright and Similar Rights
-     that applies to Your use of the Licensed Material.
-  f. Licensed Material means the artistic or literary work, database,
-     or other material to which the Licensor applied this Public
-     License.
-  g. Licensed Rights means the rights granted to You subject to the
-     terms and conditions of this Public License, which are limited to
-     all Copyright and Similar Rights that apply to Your use of the
-     Licensed Material and that the Licensor has authority to license.
-  h. Licensor means the individual(s) or entity(ies) granting rights
-     under this Public License.
-  i. NonCommercial means not primarily intended for or directed towards
-     commercial advantage or monetary compensation. For purposes of
-     this Public License, the exchange of the Licensed Material for
-     other material subject to Copyright and Similar Rights by digital
-     file-sharing or similar means is NonCommercial provided there is
-     no payment of monetary compensation in connection with the
-     exchange.
-  j. Share means to provide material to the public by any means or
-     process that requires permission under the Licensed Rights, such
-     as reproduction, public display, public performance, distribution,
-     dissemination, communication, or importation, and to make material
-     available to the public including in ways that members of the
-     public may access the material from a place and at a time
-     individually chosen by them.
-  k. Sui Generis Database Rights means rights other than copyright
-     resulting from Directive 96/9/EC of the European Parliament and of
-     the Council of 11 March 1996 on the legal protection of databases,
-     as amended and/or succeeded, as well as other essentially
-     equivalent rights anywhere in the world.
-  l. You means the individual or entity exercising the Licensed Rights
-     under this Public License. Your has a corresponding meaning.
-Section 2 -- Scope.
-  a. License grant.
-       1. Subject to the terms and conditions of this Public License,
-          the Licensor hereby grants You a worldwide, royalty-free,
-          non-sublicensable, non-exclusive, irrevocable license to
-          exercise the Licensed Rights in the Licensed Material to:
-            a. reproduce and Share the Licensed Material, in whole or
-               in part, for NonCommercial purposes only; and
-            b. produce, reproduce, and Share Adapted Material for
-               NonCommercial purposes only.
-       2. Exceptions and Limitations. For the avoidance of doubt, where
-          Exceptions and Limitations apply to Your use, this Public
-          License does not apply, and You do not need to comply with
-          its terms and conditions.
-       3. Term. The term of this Public License is specified in Section
-          6(a).
-       4. Media and formats; technical modifications allowed. The
-          Licensor authorizes You to exercise the Licensed Rights in
-          all media and formats whether now known or hereafter created,
-          and to make technical modifications necessary to do so. The
-          Licensor waives and/or agrees not to assert any right or
-          authority to forbid You from making technical modifications
-          necessary to exercise the Licensed Rights, including
-          technical modifications necessary to circumvent Effective
-          Technological Measures. For purposes of this Public License,
-          simply making modifications authorized by this Section 2(a)
-          (4) never produces Adapted Material.
-       5. Downstream recipients.
-            a. Offer from the Licensor -- Licensed Material. Every
-               recipient of the Licensed Material automatically
-               receives an offer from the Licensor to exercise the
-               Licensed Rights under the terms and conditions of this
-               Public License.
-            b. No downstream restrictions. You may not offer or impose
-               any additional or different terms or conditions on, or
-               apply any Effective Technological Measures to, the
-               Licensed Material if doing so restricts exercise of the
-               Licensed Rights by any recipient of the Licensed
-               Material.
-       6. No endorsement. Nothing in this Public License constitutes or
-          may be construed as permission to assert or imply that You
-          are, or that Your use of the Licensed Material is, connected
-          with, or sponsored, endorsed, or granted official status by,
-          the Licensor or others designated to receive attribution as
-          provided in Section 3(a)(1)(A)(i).
-  b. Other rights.
-       1. Moral rights, such as the right of integrity, are not
-          licensed under this Public License, nor are publicity,
-          privacy, and/or other similar personality rights; however, to
-          the extent possible, the Licensor waives and/or agrees not to
-          assert any such rights held by the Licensor to the limited
-          extent necessary to allow You to exercise the Licensed
-          Rights, but not otherwise.
-       2. Patent and trademark rights are not licensed under this
-          Public License.
-       3. To the extent possible, the Licensor waives any right to
-          collect royalties from You for the exercise of the Licensed
-          Rights, whether directly or through a collecting society
-          under any voluntary or waivable statutory or compulsory
-          licensing scheme. In all other cases the Licensor expressly
-          reserves any right to collect such royalties, including when
-          the Licensed Material is used other than for NonCommercial
-          purposes.
-Section 3 -- License Conditions.
-Your exercise of the Licensed Rights is expressly made subject to the
-following conditions.
-  a. Attribution.
-       1. If You Share the Licensed Material (including in modified
-          form), You must:
-            a. retain the following if it is supplied by the Licensor
-               with the Licensed Material:
-                 i. identification of the creator(s) of the Licensed
-                    Material and any others designated to receive
-                    attribution, in any reasonable manner requested by
-                    the Licensor (including by pseudonym if
-                    designated);
-                ii. a copyright notice;
-               iii. a notice that refers to this Public License;
-                iv. a notice that refers to the disclaimer of
-                    warranties;
-                 v. a URI or hyperlink to the Licensed Material to the
-                    extent reasonably practicable;
-            b. indicate if You modified the Licensed Material and
-               retain an indication of any previous modifications; and
-            c. indicate the Licensed Material is licensed under this
-               Public License, and include the text of, or the URI or
-               hyperlink to, this Public License.
-       2. You may satisfy the conditions in Section 3(a)(1) in any
-          reasonable manner based on the medium, means, and context in
-          which You Share the Licensed Material. For example, it may be
-          reasonable to satisfy the conditions by providing a URI or
-          hyperlink to a resource that includes the required
-          information.
-       3. If requested by the Licensor, You must remove any of the
-          information required by Section 3(a)(1)(A) to the extent
-          reasonably practicable.
-       4. If You Share Adapted Material You produce, the Adapter's
-          License You apply must not prevent recipients of the Adapted
-          Material from complying with this Public License.
-Section 4 -- Sui Generis Database Rights.
-Where the Licensed Rights include Sui Generis Database Rights that
-apply to Your use of the Licensed Material:
-  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
-     to extract, reuse, reproduce, and Share all or a substantial
-     portion of the contents of the database for NonCommercial purposes
-     only;
-  b. if You include all or a substantial portion of the database
-     contents in a database in which You have Sui Generis Database
-     Rights, then the database in which You have Sui Generis Database
-     Rights (but not its individual contents) is Adapted Material; and
-  c. You must comply with the conditions in Section 3(a) if You Share
-     all or a substantial portion of the contents of the database.
-For the avoidance of doubt, this Section 4 supplements and does not
-replace Your obligations under this Public License where the Licensed
-Rights include other Copyright and Similar Rights.
-Section 5 -- Disclaimer of Warranties and Limitation of Liability.
-  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
-     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
-     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
-     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
-     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
-     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
-     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
-     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
-     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
-     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
-  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
-     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
-     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
-     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
-     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
-     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
-     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
-     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
-     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
-  c. The disclaimer of warranties and limitation of liability provided
-     above shall be interpreted in a manner that, to the extent
-     possible, most closely approximates an absolute disclaimer and
-     waiver of all liability.
-Section 6 -- Term and Termination.
-  a. This Public License applies for the term of the Copyright and
-     Similar Rights licensed here. However, if You fail to comply with
-     this Public License, then Your rights under this Public License
-     terminate automatically.
-  b. Where Your right to use the Licensed Material has terminated under
-     Section 6(a), it reinstates:
-       1. automatically as of the date the violation is cured, provided
-          it is cured within 30 days of Your discovery of the
-          violation; or
-       2. upon express reinstatement by the Licensor.
-     For the avoidance of doubt, this Section 6(b) does not affect any
-     right the Licensor may have to seek remedies for Your violations
-     of this Public License.
-  c. For the avoidance of doubt, the Licensor may also offer the
-     Licensed Material under separate terms or conditions or stop
-     distributing the Licensed Material at any time; however, doing so
-     will not terminate this Public License.
-  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
-     License.
-Section 7 -- Other Terms and Conditions.
-  a. The Licensor shall not be bound by any additional or different
-     terms or conditions communicated by You unless expressly agreed.
-  b. Any arrangements, understandings, or agreements regarding the
-     Licensed Material not stated herein are separate from and
-     independent of the terms and conditions of this Public License.
-Section 8 -- Interpretation.
-  a. For the avoidance of doubt, this Public License does not, and
-     shall not be interpreted to, reduce, limit, restrict, or impose
-     conditions on any use of the Licensed Material that could lawfully
-     be made without permission under this Public License.
-  b. To the extent possible, if any provision of this Public License is
-     deemed unenforceable, it shall be automatically reformed to the
-     minimum extent necessary to make it enforceable. If the provision
-     cannot be reformed, it shall be severed from this Public License
-     without affecting the enforceability of the remaining terms and
-     conditions.
-  c. No term or condition of this Public License will be waived and no
-     failure to comply consented to unless expressly agreed to by the
-     Licensor.
-  d. Nothing in this Public License constitutes or may be interpreted
-     as a limitation upon, or waiver of, any privileges and immunities
-     that apply to the Licensor or You, including from the legal
-     processes of any jurisdiction or authority.
-=======================================================================
-Creative Commons is not a party to its public
-licenses. Notwithstanding, Creative Commons may elect to apply one of
-its public licenses to material it publishes and in those instances
-will be considered the “Licensor.” The text of the Creative Commons
-public licenses is dedicated to the public domain under the CC0 Public
-Domain Dedication. Except for the limited purpose of indicating that
-material is shared under a Creative Commons public license or as
-otherwise permitted by the Creative Commons policies published at
-creativecommons.org/policies, Creative Commons does not authorize the
-use of the trademark "Creative Commons" or any other trademark or logo
-of Creative Commons without its prior written consent including,
-without limitation, in connection with any unauthorized modifications
-to any of its public licenses or any other arrangements,
-understandings, or agreements concerning use of licensed material. For
-the avoidance of doubt, this paragraph does not form part of the
-public licenses.
-Creative Commons may be contacted at creativecommons.org.

README.md DELETED Viewed

@@ -1,13 +0,0 @@
----
-title: OpenLRM
-emoji: ⚡
-colorFrom: indigo
-colorTo: gray
-sdk: gradio
-sdk_version: 4.11.0
-app_file: app.py
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py DELETED Viewed

@@ -1,167 +0,0 @@
-import gradio as gr
-import os
-import uuid
-import subprocess
-import shutil
-from huggingface_hub import hf_hub_download
-from lrm.inferrer import LRMInferrer
-# def prepare_checkpoint(model_name: str):
-#     REPO_ID = f"zxhezexin/OpenLRM"
-#     FILE_NAME = f"{model_name}.pth"
-#     CACHE_PATH = f".cache"
-#     print(f"Downloading ckpt ...")
-#     ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, local_dir=CACHE_PATH)
-#     print(f"checkpoint path is {ckpt_path}")
-#     print(f"Downloaded ckpt into {CACHE_PATH}")
-def assert_input_image(input_image):
-    if input_image is None:
-        raise gr.Error("No image selected or uploaded!")
-def rembg_and_center_wrapper(source_image):
-    subprocess.run([f'python rembg_and_center.py {source_image}'], shell=True)
-    directory, filename = os.path.split(source_image)
-    # file_base, file_extension = os.path.splitext(filename)
-    file_base = os.path.basename(source_image).split('.')[0]
-    new_filename = f"{file_base}_rgba.png"
-    new_image_path = os.path.join(directory, new_filename)
-    return new_image_path
-def infer_wrapper(source_image, checkbox_rembg):
-    random_uuid = str(uuid.uuid4())
-    directory, extension = os.path.split(source_image)
-    file_extension = os.path.splitext(extension)[1]
-    new_file_path = os.path.join(directory, random_uuid + file_extension)
-    shutil.copy(source_image, new_file_path)
-    print(f"File renamed from {source_image} to {new_file_path}")
-    source_image = new_file_path
-    if checkbox_rembg:
-        source_image = rembg_and_center_wrapper(source_image)
-    return inferrer.infer(
-        source_image=source_image,
-        dump_path="./dumps",
-        source_size=-1,
-        render_size=-1,
-        mesh_size=384,
-        export_video=True,
-        export_mesh=False,
-    )
-def infer_wrapper_clean(source_image):
-    return infer_wrapper(source_image, checkbox_rembg=False)
-def demo_image_to_video(inferrer: LRMInferrer):
-    print(f"So far so good.")
-    print(inferrer)
-    _TITLE = '''OpenLRM: Open-Source Large Reconstruction Models'''
-    _DESCRIPTION = '''
-        <div>
-            <a style="display:inline-block" href='https://github.com/3DTopia/OpenLRM'><img src='https://img.shields.io/github/stars/3DTopia/OpenLRM?style=social'/></a>
-            <a style="display:inline-block; margin-left: .5em" href="https://huggingface.co/zxhezexin"><img src='https://img.shields.io/badge/Model-Weights-blue'/></a>
-        </div>
-        OpenLRM is an open-source implementation of Large Reconstruction Models.
-        <strong>Image-to-3D in 10 seconds!</strong>
-        <strong>Disclaimer:</strong> This demo uses `openlrm-base-obj-1.0` model trained on Objaverse only, which consists of synthetic data. Its performance may decrease on in-the-wild images. We use 194x194 rendering resolution here for a quick demonstration.
-    '''
-    with gr.Blocks(analytics_enabled=False) as iface:
-        # HEADERS
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown('# ' + _TITLE)
-        gr.Markdown(_DESCRIPTION)
-        # DISPLAY
-        with gr.Row():
-            with gr.Column(variant='panel', scale=3):
-                with gr.Tabs(elem_id="openlrm_input_image"):
-                    with gr.TabItem('Input Image'):
-                        with gr.Row():
-                            input_image = gr.Image(label="Input Image", image_mode="RGB", sources="upload", type="filepath", elem_id="content_image", width="auto")
-            with gr.Column(variant='panel', scale=2):
-                with gr.Tabs(elem_id="openlrm_render_video"):
-                    with gr.TabItem('Rendered Video'):
-                        with gr.Row():
-                            output_video = gr.Video(label="Rendered Video", format="mp4", width="auto", autoplay=True)
-        # SETTING
-        with gr.Row():
-            with gr.Column(variant='panel', scale=1):
-                with gr.Tabs(elem_id="openlrm_attrs"):
-                    with gr.TabItem('Settings'):
-                        with gr.Column(variant='panel'):
-                            gr.Markdown(
-                                """
-                                Please check the box when uploading RGBA images.
-                                <strong>Best Practice</strong>:
-                                    RGB images with a white background.
-                                    Centered objects in reasonable sizes.
-                                """
-                            )
-                            checkbox_rembg = gr.Checkbox(False,
-                                             label='Remove background and center the object (It takes some extra time)')
-                            submit = gr.Button('Generate', elem_id="openlrm_generate", variant='primary')
-        submit.click(
-            fn=assert_input_image,
-            inputs=[input_image],
-            queue=False
-        ).success(
-            fn=infer_wrapper,
-            inputs=[input_image, checkbox_rembg],
-            outputs=[output_video],
-        )
-        # EXAMPLES
-        with gr.Row():
-            examples = [
-                ['assets/sample_input/owl.png'],
-                ['assets/sample_input/building.png'],
-                ['assets/sample_input/mailbox.png'],
-                ['assets/sample_input/fire.png'],
-                ['assets/sample_input/girl.png'],
-                ['assets/sample_input/lamp.png'],
-                ['assets/sample_input/hydrant.png'],
-                ['assets/sample_input/hotdogs.png'],
-                ['assets/sample_input/traffic.png'],
-                ['assets/sample_input/ceramic.png'],
-                ['assets/sample_input/cartoon.png'],
-            ]
-            gr.Examples(
-                examples=examples,
-                inputs=[input_image],
-                outputs=[output_video],
-                fn=infer_wrapper_clean,
-                cache_examples=os.getenv('SYSTEM') == 'spaces',
-                examples_per_page=20,
-            )
-    return iface
-if __name__ == "__main__":
-    model_name = "openlrm-base-obj-1.0"
-    # prepare_checkpoint(model_name)
-    with LRMInferrer(model_name) as inferrer:
-        iface = demo_image_to_video(inferrer)
-        iface.queue(max_size=10)
-        iface.launch()

assets/sample_input/building.png DELETED Viewed

Binary file (211 kB)

assets/sample_input/cartoon.png DELETED Viewed

Binary file (24 kB)

assets/sample_input/ceramic.png DELETED Viewed

Binary file (372 kB)

assets/sample_input/fire.png DELETED Viewed

Binary file (524 kB)

assets/sample_input/girl.png DELETED Viewed

Binary file (52.3 kB)

assets/sample_input/hotdogs.png DELETED Viewed

Binary file (73.2 kB)

assets/sample_input/hydrant.png DELETED Viewed

Binary file (73.5 kB)

assets/sample_input/lamp.png DELETED Viewed

Binary file (223 kB)

assets/sample_input/mailbox.png DELETED Viewed

Binary file (217 kB)

assets/sample_input/owl.png DELETED Viewed

Binary file (132 kB)

assets/sample_input/traffic.png DELETED Viewed

Binary file (355 kB)

lrm/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Empty

lrm/cam_utils.py DELETED Viewed

@@ -1,128 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import torch
-"""
-R: (N, 3, 3)
-T: (N, 3)
-E: (N, 4, 4)
-vector: (N, 3)
-"""
-def compose_extrinsic_R_T(R: torch.Tensor, T: torch.Tensor):
-    """
-    Compose the standard form extrinsic matrix from R and T.
-    Batched I/O.
-    """
-    RT = torch.cat((R, T.unsqueeze(-1)), dim=-1)
-    return compose_extrinsic_RT(RT)
-def compose_extrinsic_RT(RT: torch.Tensor):
-    """
-    Compose the standard form extrinsic matrix from RT.
-    Batched I/O.
-    """
-    return torch.cat([
-        RT,
-        torch.tensor([[[0, 0, 0, 1]]], dtype=torch.float32).repeat(RT.shape[0], 1, 1)
-        ], dim=1)
-def decompose_extrinsic_R_T(E: torch.Tensor):
-    """
-    Decompose the standard extrinsic matrix into R and T.
-    Batched I/O.
-    """
-    RT = decompose_extrinsic_RT(E)
-    return RT[:, :, :3], RT[:, :, 3]
-def decompose_extrinsic_RT(E: torch.Tensor):
-    """
-    Decompose the standard extrinsic matrix into RT.
-    Batched I/O.
-    """
-    return E[:, :3, :]
-def get_normalized_camera_intrinsics(intrinsics: torch.Tensor):
-    """
-    intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
-    Return batched fx, fy, cx, cy
-    """
-    fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1]
-    cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1]
-    width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1]
-    fx, fy = fx / width, fy / height
-    cx, cy = cx / width, cy / height
-    return fx, fy, cx, cy
-def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor):
-    """
-    RT: (N, 3, 4)
-    intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
-    """
-    fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
-    return torch.cat([
-        RT.reshape(-1, 12),
-        fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1),
-    ], dim=-1)
-def build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor):
-    """
-    RT: (N, 3, 4)
-    intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
-    """
-    E = compose_extrinsic_RT(RT)
-    fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
-    I = torch.stack([
-        torch.stack([fx, torch.zeros_like(fx), cx], dim=-1),
-        torch.stack([torch.zeros_like(fy), fy, cy], dim=-1),
-        torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1),
-    ], dim=1)
-    return torch.cat([
-        E.reshape(-1, 16),
-        I.reshape(-1, 9),
-    ], dim=-1)
-def center_looking_at_camera_pose(camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None):
-    """
-    camera_position: (M, 3)
-    look_at: (3)
-    up_world: (3)
-    return: (M, 3, 4)
-    """
-    # by default, looking at the origin and world up is pos-z
-    if look_at is None:
-        look_at = torch.tensor([0, 0, 0], dtype=torch.float32)
-    if up_world is None:
-        up_world = torch.tensor([0, 0, 1], dtype=torch.float32)
-    look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1)
-    up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1)
-    z_axis = camera_position - look_at
-    z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True)
-    x_axis = torch.cross(up_world, z_axis)
-    x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True)
-    y_axis = torch.cross(z_axis, x_axis)
-    y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True)
-    extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1)
-    return extrinsics

lrm/inferrer.py DELETED Viewed

@@ -1,271 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import torch
-import math
-import os
-import imageio
-import mcubes
-import trimesh
-import numpy as np
-import argparse
-from PIL import Image
-from .models.generator import LRMGenerator
-from .cam_utils import build_camera_principle, build_camera_standard, center_looking_at_camera_pose
-class LRMInferrer:
-    def __init__(self, model_name: str):
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        _checkpoint = self._load_checkpoint(model_name)
-        _model_weights, _model_kwargs = _checkpoint['weights'], _checkpoint['kwargs']['model']
-        self.model = self._build_model(_model_kwargs, _model_weights).eval()
-        self.infer_kwargs = _checkpoint['kwargs']['infer']
-    def __enter__(self):
-        return self
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        pass
-    def _load_checkpoint(self, model_name: str, cache_dir = './.cache'):
-        # download checkpoint if not exists
-        local_dir = os.path.join(cache_dir, model_name)
-        if not os.path.exists(local_dir):
-            os.makedirs(local_dir, exist_ok=True)
-        if not os.path.exists(os.path.join(local_dir, f'model.pth')):
-            # os.system(f'wget -O {os.path.join(cache_dir, f"{model_name}.pth")} https://zxhezexin.com/modelzoo/openlrm/{model_name}.pth')
-            # raise FileNotFoundError(f"Checkpoint {model_name} not found in {cache_dir}")
-            from huggingface_hub import hf_hub_download
-            repo_id = f'zxhezexin/{model_name}'
-            config_path = hf_hub_download(repo_id=repo_id, filename='config.json', local_dir=local_dir)
-            model_path = hf_hub_download(repo_id=repo_id, filename=f'model.pth', local_dir=local_dir)
-        else:
-            model_path = os.path.join(local_dir, f'model.pth')
-        checkpoint = torch.load(model_path, map_location=self.device)
-        return checkpoint
-    def _build_model(self, model_kwargs, model_weights):
-        model = LRMGenerator(**model_kwargs).to(self.device)
-        model.load_state_dict(model_weights)
-        print(f"======== Loaded model from checkpoint ========")
-        return model
-    @staticmethod
-    def _get_surrounding_views(M: int = 80, radius: float = 2.0, height: float = 0.8):
-        # M: number of surrounding views
-        # radius: camera dist to center
-        # height: height of the camera
-        # return: (M, 3, 4)
-        assert M > 0
-        assert radius > 0
-        camera_positions = []
-        projected_radius = math.sqrt(radius ** 2 - height ** 2)
-        for i in range(M):
-            theta = 2 * math.pi * i / M - math.pi / 2
-            x = projected_radius * math.cos(theta)
-            y = projected_radius * math.sin(theta)
-            z = height
-            camera_positions.append([x, y, z])
-        camera_positions = torch.tensor(camera_positions, dtype=torch.float32)
-        extrinsics = center_looking_at_camera_pose(camera_positions)
-        return extrinsics
-    @staticmethod
-    def _default_intrinsics():
-        # return: (3, 2)
-        fx = fy = 384
-        cx = cy = 256
-        w = h = 512
-        intrinsics = torch.tensor([
-            [fx, fy],
-            [cx, cy],
-            [w, h],
-        ], dtype=torch.float32)
-        return intrinsics
-    def _default_source_camera(self, batch_size: int = 1):
-        # return: (N, D_cam_raw)
-        dist_to_center = 2
-        canonical_camera_extrinsics = torch.tensor([[
-            [1, 0, 0, 0],
-            [0, 0, -1, -dist_to_center],
-            [0, 1, 0, 0],
-        ]], dtype=torch.float32)
-        canonical_camera_intrinsics = self._default_intrinsics().unsqueeze(0)
-        source_camera = build_camera_principle(canonical_camera_extrinsics, canonical_camera_intrinsics)
-        return source_camera.repeat(batch_size, 1)
-    def _default_render_cameras(self, batch_size: int = 1):
-        # return: (N, M, D_cam_render)
-        render_camera_extrinsics = self._get_surrounding_views()
-        render_camera_intrinsics = self._default_intrinsics().unsqueeze(0).repeat(render_camera_extrinsics.shape[0], 1, 1)
-        render_cameras = build_camera_standard(render_camera_extrinsics, render_camera_intrinsics)
-        return render_cameras.unsqueeze(0).repeat(batch_size, 1, 1)
-    @staticmethod
-    def images_to_video(images, output_path, fps, verbose=False):
-        # images: (T, C, H, W)
-        os.makedirs(os.path.dirname(output_path), exist_ok=True)
-        frames = []
-        for i in range(images.shape[0]):
-            frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
-            assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
-                f"Frame shape mismatch: {frame.shape} vs {images.shape}"
-            assert frame.min() >= 0 and frame.max() <= 255, \
-                f"Frame value out of range: {frame.min()} ~ {frame.max()}"
-            frames.append(frame)
-        # imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='mpeg4', quality=10)
-        imageio.mimwrite(output_path, np.stack(frames), fps=fps, quality=10)
-        if verbose:
-            print(f"Saved video to {output_path}")
-    def infer_single(self, image: torch.Tensor, render_size: int, mesh_size: int, export_video: bool, export_mesh: bool):
-        # image: [1, C_img, H_img, W_img]
-        mesh_thres = 1.0
-        chunk_size = 2
-        batch_size = 1
-        source_camera = self._default_source_camera(batch_size).to(self.device)
-        render_cameras = self._default_render_cameras(batch_size).to(self.device)
-        with torch.no_grad():
-            planes = self.model.forward_planes(image, source_camera)
-            results = {}
-            if export_video:
-                # forward synthesizer per mini-batch
-                frames = []
-                for i in range(0, render_cameras.shape[1], chunk_size):
-                    frames.append(
-                        self.model.synthesizer(
-                            planes,
-                            render_cameras[:, i:i+chunk_size],
-                            render_size,
-                        )
-                    )
-                # merge frames
-                frames = {
-                    k: torch.cat([r[k] for r in frames], dim=1)
-                    for k in frames[0].keys()
-                }
-                # update results
-                results.update({
-                    'frames': frames,
-                })
-            if export_mesh:
-                grid_out = self.model.synthesizer.forward_grid(
-                    planes=planes,
-                    grid_size=mesh_size,
-                )
-                vtx, faces = mcubes.marching_cubes(grid_out['sigma'].squeeze(0).squeeze(-1).cpu().numpy(), mesh_thres)
-                vtx = vtx / (mesh_size - 1) * 2 - 1
-                vtx_tensor = torch.tensor(vtx, dtype=torch.float32, device=self.device).unsqueeze(0)
-                vtx_colors = self.model.synthesizer.forward_points(planes, vtx_tensor)['rgb'].squeeze(0).cpu().numpy()  # (0, 1)
-                vtx_colors = (vtx_colors * 255).astype(np.uint8)
-                mesh = trimesh.Trimesh(vertices=vtx, faces=faces, vertex_colors=vtx_colors)
-                results.update({
-                    'mesh': mesh,
-                })
-            return results
-    def infer(self, source_image: str, dump_path: str, source_size: int, render_size: int, mesh_size: int, export_video: bool, export_mesh: bool):
-        source_image_size = source_size if source_size > 0 else self.infer_kwargs['source_size']
-        image = torch.tensor(np.array(Image.open(source_image))).permute(2, 0, 1).unsqueeze(0) / 255.0
-        # if RGBA, blend to RGB
-        # print(f"[DEBUG] check 1.")
-        if image.shape[1] == 4:
-            image = image[:, :3, ...] * image[:, 3:, ...] + (1 - image[:, 3:, ...])
-            print(f"[DEBUG] image.shape={image.shape} and image[0,0,0,0]={image[0,0,0,0]}")
-        # print(f"[DEBUG] check 2.")
-        image = torch.nn.functional.interpolate(image, size=(source_image_size, source_image_size), mode='bicubic', align_corners=True)
-        image = torch.clamp(image, 0, 1)
-        results = self.infer_single(
-            image.to(self.device),
-            render_size=render_size if render_size > 0 else self.infer_kwargs['render_size'],
-            mesh_size=mesh_size,
-            export_video=export_video,
-            export_mesh=export_mesh,
-        )
-        image_name = os.path.basename(source_image)
-        uid = image_name.split('.')[0]
-        os.makedirs(dump_path, exist_ok=True)
-        # dump video
-        if 'frames' in results:
-            renderings = results['frames']
-            for k, v in renderings.items():
-                if k == 'images_rgb':
-                    self.images_to_video(
-                        v[0],
-                        os.path.join(dump_path, f'{uid}.mp4'),
-                        fps=20,
-                    )
-                else:
-                    # torch.save(v[0], os.path.join(dump_path, f'{uid}_{k}.pth'))
-                    pass
-        # dump mesh
-        if 'mesh' in results:
-            mesh = results['mesh']
-            # save ply format mesh
-            mesh.export(os.path.join(dump_path, f'{uid}.ply'), 'ply')
-        return os.path.join(dump_path, f'{uid}.mp4')
-if __name__ == '__main__':
-    """
-    Example usage:
-    python -m lrm.inferrer --model_name openlrm-base-obj-1.0 --source_image ./assets/sample_input/owl.png --export_video --export_mesh
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_name', type=str, default='openlrm-base-obj-1.0')
-    parser.add_argument('--source_image', type=str, default='./assets/sample_input/owl.png')
-    parser.add_argument('--dump_path', type=str, default='./dumps')
-    parser.add_argument('--source_size', type=int, default=-1)
-    parser.add_argument('--render_size', type=int, default=-1)
-    parser.add_argument('--mesh_size', type=int, default=384)
-    parser.add_argument('--export_video', action='store_true')
-    parser.add_argument('--export_mesh', action='store_true')
-    args = parser.parse_args()
-    with LRMInferrer(model_name=args.model_name) as inferrer:
-        inferrer.infer(
-            source_image=args.source_image,
-            dump_path=args.dump_path,
-            source_size=args.source_size,
-            render_size=args.render_size,
-            mesh_size=args.mesh_size,
-            export_video=args.export_video,
-            export_mesh=args.export_mesh,
-        )

lrm/models/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Empty

lrm/models/encoders/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Empty

lrm/models/encoders/dino_wrapper.py DELETED Viewed

@@ -1,59 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import torch.nn as nn
-from transformers import ViTImageProcessor, ViTModel
-class DinoWrapper(nn.Module):
-    """
-    Dino v1 wrapper using huggingface transformer implementation.
-    """
-    def __init__(self, model_name: str, freeze: bool = True):
-        super().__init__()
-        self.model, self.processor = self._build_dino(model_name)
-        if freeze:
-            self._freeze()
-    def forward(self, image):
-        # image: [N, C, H, W], on cpu
-        # RGB image with [0,1] scale and properly sized
-        inputs = self.processor(images=image, return_tensors="pt", do_rescale=False, do_resize=False).to(self.model.device)
-        # This resampling of positional embedding uses bicubic interpolation
-        outputs = self.model(**inputs, interpolate_pos_encoding=True)
-        last_hidden_states = outputs.last_hidden_state
-        return last_hidden_states
-    def _freeze(self):
-        print(f"======== Freezing DinoWrapper ========")
-        self.model.eval()
-        for name, param in self.model.named_parameters():
-            param.requires_grad = False
-    @staticmethod
-    def _build_dino(model_name: str, proxy_error_retries: int = 3, proxy_error_cooldown: int = 5):
-        import requests
-        try:
-            model = ViTModel.from_pretrained(model_name, add_pooling_layer=False)
-            processor = ViTImageProcessor.from_pretrained(model_name)
-            return model, processor
-        except requests.exceptions.ProxyError as err:
-            if proxy_error_retries > 0:
-                print(f"Huggingface ProxyError: Retrying in {proxy_error_cooldown} seconds...")
-                import time
-                time.sleep(proxy_error_cooldown)
-                return DinoWrapper._build_dino(model_name, proxy_error_retries - 1, proxy_error_cooldown)
-            else:
-                raise err

lrm/models/generator.py DELETED Viewed

@@ -1,116 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import torch.nn as nn
-from .encoders.dino_wrapper import DinoWrapper
-from .transformer import TriplaneTransformer
-from .rendering.synthesizer import TriplaneSynthesizer
-class CameraEmbedder(nn.Module):
-    """
-    Embed camera features to a high-dimensional vector.
-    Reference:
-    DiT: https://github.com/facebookresearch/DiT/blob/main/models.py#L27
-    """
-    def __init__(self, raw_dim: int, embed_dim: int):
-        super().__init__()
-        self.mlp = nn.Sequential(
-            nn.Linear(raw_dim, embed_dim),
-            nn.SiLU(),
-            nn.Linear(embed_dim, embed_dim),
-        )
-    def forward(self, x):
-        return self.mlp(x)
-class LRMGenerator(nn.Module):
-    """
-    Full model of the large reconstruction model.
-    """
-    def __init__(self, camera_embed_dim: int, rendering_samples_per_ray: int,
-                 transformer_dim: int, transformer_layers: int, transformer_heads: int,
-                 triplane_low_res: int, triplane_high_res: int, triplane_dim: int,
-                 encoder_freeze: bool = True, encoder_model_name: str = 'facebook/dino-vitb16', encoder_feat_dim: int = 768):
-        super().__init__()
-        # attributes
-        self.encoder_feat_dim = encoder_feat_dim
-        self.camera_embed_dim = camera_embed_dim
-        # modules
-        self.encoder = DinoWrapper(
-            model_name=encoder_model_name,
-            freeze=encoder_freeze,
-        )
-        self.camera_embedder = CameraEmbedder(
-            raw_dim=12+4, embed_dim=camera_embed_dim,
-        )
-        self.transformer = TriplaneTransformer(
-            inner_dim=transformer_dim, num_layers=transformer_layers, num_heads=transformer_heads,
-            image_feat_dim=encoder_feat_dim,
-            camera_embed_dim=camera_embed_dim,
-            triplane_low_res=triplane_low_res, triplane_high_res=triplane_high_res, triplane_dim=triplane_dim,
-        )
-        self.synthesizer = TriplaneSynthesizer(
-            triplane_dim=triplane_dim, samples_per_ray=rendering_samples_per_ray,
-        )
-    def forward_planes(self, image, camera):
-        # image: [N, C_img, H_img, W_img]
-        # camera: [N, D_cam_raw]
-        assert image.shape[0] == camera.shape[0], "Batch size mismatch for image and camera"
-        N = image.shape[0]
-        # encode image
-        image_feats = self.encoder(image)
-        assert image_feats.shape[-1] == self.encoder_feat_dim, \
-            f"Feature dimension mismatch: {image_feats.shape[-1]} vs {self.encoder_feat_dim}"
-        # embed camera
-        camera_embeddings = self.camera_embedder(camera)
-        assert camera_embeddings.shape[-1] == self.camera_embed_dim, \
-            f"Feature dimension mismatch: {camera_embeddings.shape[-1]} vs {self.camera_embed_dim}"
-        # transformer generating planes
-        planes = self.transformer(image_feats, camera_embeddings)
-        assert planes.shape[0] == N, "Batch size mismatch for planes"
-        assert planes.shape[1] == 3, "Planes should have 3 channels"
-        return planes
-    def forward(self, image, source_camera, render_cameras, render_size: int):
-        # image: [N, C_img, H_img, W_img]
-        # source_camera: [N, D_cam_raw]
-        # render_cameras: [N, M, D_cam_render]
-        # render_size: int
-        assert image.shape[0] == source_camera.shape[0], "Batch size mismatch for image and source_camera"
-        assert image.shape[0] == render_cameras.shape[0], "Batch size mismatch for image and render_cameras"
-        N, M = render_cameras.shape[:2]
-        planes = self.forward_planes(image, source_camera)
-        # render target views
-        render_results = self.synthesizer(planes, render_cameras, render_size)
-        assert render_results['images_rgb'].shape[0] == N, "Batch size mismatch for render_results"
-        assert render_results['images_rgb'].shape[1] == M, "Number of rendered views should be consistent with render_cameras"
-        return {
-            'planes': planes,
-            **render_results,
-        }

lrm/models/rendering/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Empty

lrm/models/rendering/synthesizer.py DELETED Viewed

@@ -1,191 +0,0 @@
-# ORIGINAL LICENSE
-# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
-#
-# Modified by Zexin He
-# The modifications are subject to the same license as the original.
-import itertools
-import torch
-import torch.nn as nn
-from .utils.renderer import ImportanceRenderer
-from .utils.ray_sampler import RaySampler
-class OSGDecoder(nn.Module):
-    """
-    Triplane decoder that gives RGB and sigma values from sampled features.
-    Using ReLU here instead of Softplus in the original implementation.
-    Reference:
-    EG3D: https://github.com/NVlabs/eg3d/blob/main/eg3d/training/triplane.py#L112
-    """
-    def __init__(self, n_features: int,
-                 hidden_dim: int = 64, num_layers: int = 4, activation: nn.Module = nn.ReLU):
-        super().__init__()
-        self.net = nn.Sequential(
-            nn.Linear(3 * n_features, hidden_dim),
-            activation(),
-            *itertools.chain(*[[
-                nn.Linear(hidden_dim, hidden_dim),
-                activation(),
-            ] for _ in range(num_layers - 2)]),
-            nn.Linear(hidden_dim, 1 + 3),
-        )
-        # init all bias to zero
-        for m in self.modules():
-            if isinstance(m, nn.Linear):
-                nn.init.zeros_(m.bias)
-    def forward(self, sampled_features, ray_directions):
-        # Aggregate features by mean
-        # sampled_features = sampled_features.mean(1)
-        # Aggregate features by concatenation
-        _N, n_planes, _M, _C = sampled_features.shape
-        sampled_features = sampled_features.permute(0, 2, 1, 3).reshape(_N, _M, n_planes*_C)
-        x = sampled_features
-        N, M, C = x.shape
-        x = x.contiguous().view(N*M, C)
-        x = self.net(x)
-        x = x.view(N, M, -1)
-        rgb = torch.sigmoid(x[..., 1:])*(1 + 2*0.001) - 0.001  # Uses sigmoid clamping from MipNeRF
-        sigma = x[..., 0:1]
-        return {'rgb': rgb, 'sigma': sigma}
-class TriplaneSynthesizer(nn.Module):
-    """
-    Synthesizer that renders a triplane volume with planes and a camera.
-    Reference:
-    EG3D: https://github.com/NVlabs/eg3d/blob/main/eg3d/training/triplane.py#L19
-    """
-    DEFAULT_RENDERING_KWARGS = {
-        'ray_start': 'auto',
-        'ray_end': 'auto',
-        'box_warp': 2.,
-        'white_back': True,
-        'disparity_space_sampling': False,
-        'clamp_mode': 'softplus',
-        'sampler_bbox_min': -1.,
-        'sampler_bbox_max': 1.,
-    }
-    def __init__(self, triplane_dim: int, samples_per_ray: int):
-        super().__init__()
-        # attributes
-        self.triplane_dim = triplane_dim
-        self.rendering_kwargs = {
-            **self.DEFAULT_RENDERING_KWARGS,
-            'depth_resolution': samples_per_ray // 2,
-            'depth_resolution_importance': samples_per_ray // 2,
-        }
-        # renderings
-        self.renderer = ImportanceRenderer()
-        self.ray_sampler = RaySampler()
-        # modules
-        self.decoder = OSGDecoder(n_features=triplane_dim)
-    def forward(self, planes, cameras, render_size: int):
-        # planes: (N, 3, D', H', W')
-        # cameras: (N, M, D_cam)
-        # render_size: int
-        assert planes.shape[0] == cameras.shape[0], "Batch size mismatch for planes and cameras"
-        N, M = cameras.shape[:2]
-        cam2world_matrix = cameras[..., :16].view(N, M, 4, 4)
-        intrinsics = cameras[..., 16:25].view(N, M, 3, 3)
-        # Create a batch of rays for volume rendering
-        ray_origins, ray_directions = self.ray_sampler(
-            cam2world_matrix=cam2world_matrix.reshape(-1, 4, 4),
-            intrinsics=intrinsics.reshape(-1, 3, 3),
-            render_size=render_size,
-        )
-        assert N*M == ray_origins.shape[0], "Batch size mismatch for ray_origins"
-        assert ray_origins.dim() == 3, "ray_origins should be 3-dimensional"
-        # Perform volume rendering
-        rgb_samples, depth_samples, weights_samples = self.renderer(
-            planes.repeat_interleave(M, dim=0), self.decoder, ray_origins, ray_directions, self.rendering_kwargs,
-        )
-        # Reshape into 'raw' neural-rendered image
-        Himg = Wimg = render_size
-        rgb_images = rgb_samples.permute(0, 2, 1).reshape(N, M, rgb_samples.shape[-1], Himg, Wimg).contiguous()
-        depth_images = depth_samples.permute(0, 2, 1).reshape(N, M, 1, Himg, Wimg)
-        weight_images = weights_samples.permute(0, 2, 1).reshape(N, M, 1, Himg, Wimg)
-        return {
-            'images_rgb': rgb_images,
-            'images_depth': depth_images,
-            'images_weight': weight_images,
-        }
-    def forward_grid(self, planes, grid_size: int, aabb: torch.Tensor = None):
-        # planes: (N, 3, D', H', W')
-        # grid_size: int
-        # aabb: (N, 2, 3)
-        if aabb is None:
-            aabb = torch.tensor([
-                [self.rendering_kwargs['sampler_bbox_min']] * 3,
-                [self.rendering_kwargs['sampler_bbox_max']] * 3,
-            ], device=planes.device, dtype=planes.dtype).unsqueeze(0).repeat(planes.shape[0], 1, 1)
-        assert planes.shape[0] == aabb.shape[0], "Batch size mismatch for planes and aabb"
-        N = planes.shape[0]
-        # create grid points for triplane query
-        grid_points = []
-        for i in range(N):
-            grid_points.append(torch.stack(torch.meshgrid(
-                torch.linspace(aabb[i, 0, 0], aabb[i, 1, 0], grid_size, device=planes.device),
-                torch.linspace(aabb[i, 0, 1], aabb[i, 1, 1], grid_size, device=planes.device),
-                torch.linspace(aabb[i, 0, 2], aabb[i, 1, 2], grid_size, device=planes.device),
-                indexing='ij',
-            ), dim=-1).reshape(-1, 3))
-        cube_grid = torch.stack(grid_points, dim=0).to(planes.device)
-        features = self.forward_points(planes, cube_grid)
-        # reshape into grid
-        features = {
-            k: v.reshape(N, grid_size, grid_size, grid_size, -1)
-            for k, v in features.items()
-        }
-        return features
-    def forward_points(self, planes, points: torch.Tensor, chunk_size: int = 2**20):
-        # planes: (N, 3, D', H', W')
-        # points: (N, P, 3)
-        N, P = points.shape[:2]
-        # query triplane in chunks
-        outs = []
-        for i in range(0, points.shape[1], chunk_size):
-            chunk_points = points[:, i:i+chunk_size]
-            # query triplane
-            chunk_out = self.renderer.run_model_activated(
-                planes=planes,
-                decoder=self.decoder,
-                sample_coordinates=chunk_points,
-                sample_directions=torch.zeros_like(chunk_points),
-                options=self.rendering_kwargs,
-            )
-            outs.append(chunk_out)
-        # concatenate the outputs
-        point_features = {
-            k: torch.cat([out[k] for out in outs], dim=1)
-            for k in outs[0].keys()
-        }
-        return point_features

lrm/models/rendering/utils/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
-#
-# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
-# property and proprietary rights in and to this material, related
-# documentation and any modifications thereto. Any use, reproduction,
-# disclosure or distribution of this material and related documentation
-# without an express license agreement from NVIDIA CORPORATION or
-# its affiliates is strictly prohibited.

lrm/models/rendering/utils/math_utils.py DELETED Viewed

@@ -1,118 +0,0 @@
-# MIT License
-# Copyright (c) 2022 Petr Kellnhofer
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-import torch
-def transform_vectors(matrix: torch.Tensor, vectors4: torch.Tensor) -> torch.Tensor:
-    """
-    Left-multiplies MxM @ NxM. Returns NxM.
-    """
-    res = torch.matmul(vectors4, matrix.T)
-    return res
-def normalize_vecs(vectors: torch.Tensor) -> torch.Tensor:
-    """
-    Normalize vector lengths.
-    """
-    return vectors / (torch.norm(vectors, dim=-1, keepdim=True))
-def torch_dot(x: torch.Tensor, y: torch.Tensor):
-    """
-    Dot product of two tensors.
-    """
-    return (x * y).sum(-1)
-def get_ray_limits_box(rays_o: torch.Tensor, rays_d: torch.Tensor, box_side_length):
-    """
-    Author: Petr Kellnhofer
-    Intersects rays with the [-1, 1] NDC volume.
-    Returns min and max distance of entry.
-    Returns -1 for no intersection.
-    https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-box-intersection
-    """
-    o_shape = rays_o.shape
-    rays_o = rays_o.detach().reshape(-1, 3)
-    rays_d = rays_d.detach().reshape(-1, 3)
-    bb_min = [-1*(box_side_length/2), -1*(box_side_length/2), -1*(box_side_length/2)]
-    bb_max = [1*(box_side_length/2), 1*(box_side_length/2), 1*(box_side_length/2)]
-    bounds = torch.tensor([bb_min, bb_max], dtype=rays_o.dtype, device=rays_o.device)
-    is_valid = torch.ones(rays_o.shape[:-1], dtype=bool, device=rays_o.device)
-    # Precompute inverse for stability.
-    invdir = 1 / rays_d
-    sign = (invdir < 0).long()
-    # Intersect with YZ plane.
-    tmin = (bounds.index_select(0, sign[..., 0])[..., 0] - rays_o[..., 0]) * invdir[..., 0]
-    tmax = (bounds.index_select(0, 1 - sign[..., 0])[..., 0] - rays_o[..., 0]) * invdir[..., 0]
-    # Intersect with XZ plane.
-    tymin = (bounds.index_select(0, sign[..., 1])[..., 1] - rays_o[..., 1]) * invdir[..., 1]
-    tymax = (bounds.index_select(0, 1 - sign[..., 1])[..., 1] - rays_o[..., 1]) * invdir[..., 1]
-    # Resolve parallel rays.
-    is_valid[torch.logical_or(tmin > tymax, tymin > tmax)] = False
-    # Use the shortest intersection.
-    tmin = torch.max(tmin, tymin)
-    tmax = torch.min(tmax, tymax)
-    # Intersect with XY plane.
-    tzmin = (bounds.index_select(0, sign[..., 2])[..., 2] - rays_o[..., 2]) * invdir[..., 2]
-    tzmax = (bounds.index_select(0, 1 - sign[..., 2])[..., 2] - rays_o[..., 2]) * invdir[..., 2]
-    # Resolve parallel rays.
-    is_valid[torch.logical_or(tmin > tzmax, tzmin > tmax)] = False
-    # Use the shortest intersection.
-    tmin = torch.max(tmin, tzmin)
-    tmax = torch.min(tmax, tzmax)
-    # Mark invalid.
-    tmin[torch.logical_not(is_valid)] = -1
-    tmax[torch.logical_not(is_valid)] = -2
-    return tmin.reshape(*o_shape[:-1], 1), tmax.reshape(*o_shape[:-1], 1)
-def linspace(start: torch.Tensor, stop: torch.Tensor, num: int):
-    """
-    Creates a tensor of shape [num, *start.shape] whose values are evenly spaced from start to end, inclusive.
-    Replicates but the multi-dimensional bahaviour of numpy.linspace in PyTorch.
-    """
-    # create a tensor of 'num' steps from 0 to 1
-    steps = torch.arange(num, dtype=torch.float32, device=start.device) / (num - 1)
-    # reshape the 'steps' tensor to [-1, *([1]*start.ndim)] to allow for broadcastings
-    # - using 'steps.reshape([-1, *([1]*start.ndim)])' would be nice here but torchscript
-    #   "cannot statically infer the expected size of a list in this contex", hence the code below
-    for i in range(start.ndim):
-        steps = steps.unsqueeze(-1)
-    # the output starts at 'start' and increments until 'stop' in each dimension
-    out = start[None] + steps * (stop - start)[None]
-    return out

lrm/models/rendering/utils/ray_marcher.py DELETED Viewed

@@ -1,65 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
-#
-# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
-# property and proprietary rights in and to this material, related
-# documentation and any modifications thereto. Any use, reproduction,
-# disclosure or distribution of this material and related documentation
-# without an express license agreement from NVIDIA CORPORATION or
-# its affiliates is strictly prohibited.
-#
-# Modified by Zexin He
-# The modifications are subject to the same license as the original.
-"""
-The ray marcher takes the raw output of the implicit representation and uses the volume rendering equation to produce composited colors and depths.
-Based off of the implementation in MipNeRF (this one doesn't do any cone tracing though!)
-"""
-import torch
-import torch.nn as nn
-class MipRayMarcher2(nn.Module):
-    def __init__(self, activation_factory):
-        super().__init__()
-        self.activation_factory = activation_factory
-    def run_forward(self, colors, densities, depths, rendering_options):
-        deltas = depths[:, :, 1:] - depths[:, :, :-1]
-        colors_mid = (colors[:, :, :-1] + colors[:, :, 1:]) / 2
-        densities_mid = (densities[:, :, :-1] + densities[:, :, 1:]) / 2
-        depths_mid = (depths[:, :, :-1] + depths[:, :, 1:]) / 2
-        # using factory mode for better usability
-        densities_mid = self.activation_factory(rendering_options)(densities_mid)
-        density_delta = densities_mid * deltas
-        alpha = 1 - torch.exp(-density_delta)
-        alpha_shifted = torch.cat([torch.ones_like(alpha[:, :, :1]), 1-alpha + 1e-10], -2)
-        weights = alpha * torch.cumprod(alpha_shifted, -2)[:, :, :-1]
-        composite_rgb = torch.sum(weights * colors_mid, -2)
-        weight_total = weights.sum(2)
-        composite_depth = torch.sum(weights * depths_mid, -2) / weight_total
-        # clip the composite to min/max range of depths
-        composite_depth = torch.nan_to_num(composite_depth, float('inf'))
-        composite_depth = torch.clamp(composite_depth, torch.min(depths), torch.max(depths))
-        if rendering_options.get('white_back', False):
-            composite_rgb = composite_rgb + 1 - weight_total
-        # rendered value scale is 0-1, comment out original mipnerf scaling
-        # composite_rgb = composite_rgb * 2 - 1 # Scale to (-1, 1)
-        return composite_rgb, composite_depth, weights
-    def forward(self, colors, densities, depths, rendering_options):
-        composite_rgb, composite_depth, weights = self.run_forward(colors, densities, depths, rendering_options)
-        return composite_rgb, composite_depth, weights

lrm/models/rendering/utils/ray_sampler.py DELETED Viewed

@@ -1,81 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
-#
-# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
-# property and proprietary rights in and to this material, related
-# documentation and any modifications thereto. Any use, reproduction,
-# disclosure or distribution of this material and related documentation
-# without an express license agreement from NVIDIA CORPORATION or
-# its affiliates is strictly prohibited.
-#
-# Modified by Zexin He
-# The modifications are subject to the same license as the original.
-"""
-The ray sampler is a module that takes in camera matrices and resolution and batches of rays.
-Expects cam2world matrices that use the OpenCV camera coordinate system conventions.
-"""
-import torch
-class RaySampler(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.ray_origins_h, self.ray_directions, self.depths, self.image_coords, self.rendering_options = None, None, None, None, None
-    def forward(self, cam2world_matrix, intrinsics, render_size):
-        """
-        Create batches of rays and return origins and directions.
-        cam2world_matrix: (N, 4, 4)
-        intrinsics: (N, 3, 3)
-        render_size: int
-        ray_origins: (N, M, 3)
-        ray_dirs: (N, M, 2)
-        """
-        N, M = cam2world_matrix.shape[0], render_size**2
-        cam_locs_world = cam2world_matrix[:, :3, 3]
-        fx = intrinsics[:, 0, 0]
-        fy = intrinsics[:, 1, 1]
-        cx = intrinsics[:, 0, 2]
-        cy = intrinsics[:, 1, 2]
-        sk = intrinsics[:, 0, 1]
-        uv = torch.stack(torch.meshgrid(
-            torch.arange(render_size, dtype=torch.float32, device=cam2world_matrix.device),
-            torch.arange(render_size, dtype=torch.float32, device=cam2world_matrix.device),
-            indexing='ij',
-        ))
-        uv = uv.flip(0).reshape(2, -1).transpose(1, 0)
-        uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)
-        x_cam = uv[:, :, 0].view(N, -1) * (1./render_size) + (0.5/render_size)
-        y_cam = uv[:, :, 1].view(N, -1) * (1./render_size) + (0.5/render_size)
-        z_cam = torch.ones((N, M), device=cam2world_matrix.device)
-        x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1)*sk.unsqueeze(-1)/fy.unsqueeze(-1) - sk.unsqueeze(-1)*y_cam/fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
-        y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam
-        cam_rel_points = torch.stack((x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)
-        _opencv2blender = torch.tensor([
-            [1, 0, 0, 0],
-            [0, -1, 0, 0],
-            [0, 0, -1, 0],
-            [0, 0, 0, 1],
-        ], dtype=torch.float32, device=cam2world_matrix.device).unsqueeze(0).repeat(N, 1, 1)
-        cam2world_matrix = torch.bmm(cam2world_matrix, _opencv2blender)
-        world_rel_points = torch.bmm(cam2world_matrix, cam_rel_points.permute(0, 2, 1)).permute(0, 2, 1)[:, :, :3]
-        ray_dirs = world_rel_points - cam_locs_world[:, None, :]
-        ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)
-        ray_origins = cam_locs_world.unsqueeze(1).repeat(1, ray_dirs.shape[1], 1)
-        return ray_origins, ray_dirs

lrm/models/rendering/utils/renderer.py DELETED Viewed

@@ -1,303 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
-#
-# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
-# property and proprietary rights in and to this material, related
-# documentation and any modifications thereto. Any use, reproduction,
-# disclosure or distribution of this material and related documentation
-# without an express license agreement from NVIDIA CORPORATION or
-# its affiliates is strictly prohibited.
-#
-# Modified by Zexin He
-# The modifications are subject to the same license as the original.
-"""
-The renderer is a module that takes in rays, decides where to sample along each
-ray, and computes pixel colors using the volume rendering equation.
-"""
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .ray_marcher import MipRayMarcher2
-from . import math_utils
-def generate_planes():
-    """
-    Defines planes by the three vectors that form the "axes" of the
-    plane. Should work with arbitrary number of planes and planes of
-    arbitrary orientation.
-    Bugfix reference: https://github.com/NVlabs/eg3d/issues/67
-    """
-    return torch.tensor([[[1, 0, 0],
-                            [0, 1, 0],
-                            [0, 0, 1]],
-                            [[1, 0, 0],
-                            [0, 0, 1],
-                            [0, 1, 0]],
-                            [[0, 0, 1],
-                            [0, 1, 0],
-                            [1, 0, 0]]], dtype=torch.float32)
-def project_onto_planes(planes, coordinates):
-    """
-    Does a projection of a 3D point onto a batch of 2D planes,
-    returning 2D plane coordinates.
-    Takes plane axes of shape n_planes, 3, 3
-    # Takes coordinates of shape N, M, 3
-    # returns projections of shape N*n_planes, M, 2
-    """
-    N, M, C = coordinates.shape
-    n_planes, _, _ = planes.shape
-    coordinates = coordinates.unsqueeze(1).expand(-1, n_planes, -1, -1).reshape(N*n_planes, M, 3)
-    inv_planes = torch.linalg.inv(planes).unsqueeze(0).expand(N, -1, -1, -1).reshape(N*n_planes, 3, 3)
-    projections = torch.bmm(coordinates, inv_planes)
-    return projections[..., :2]
-def sample_from_planes(plane_axes, plane_features, coordinates, mode='bilinear', padding_mode='zeros', box_warp=None):
-    assert padding_mode == 'zeros'
-    N, n_planes, C, H, W = plane_features.shape
-    _, M, _ = coordinates.shape
-    plane_features = plane_features.view(N*n_planes, C, H, W)
-    coordinates = (2/box_warp) * coordinates # add specific box bounds
-    projected_coordinates = project_onto_planes(plane_axes, coordinates).unsqueeze(1)
-    output_features = torch.nn.functional.grid_sample(plane_features, projected_coordinates.float(), mode=mode, padding_mode=padding_mode, align_corners=False).permute(0, 3, 2, 1).reshape(N, n_planes, M, C)
-    return output_features
-def sample_from_3dgrid(grid, coordinates):
-    """
-    Expects coordinates in shape (batch_size, num_points_per_batch, 3)
-    Expects grid in shape (1, channels, H, W, D)
-    (Also works if grid has batch size)
-    Returns sampled features of shape (batch_size, num_points_per_batch, feature_channels)
-    """
-    batch_size, n_coords, n_dims = coordinates.shape
-    sampled_features = torch.nn.functional.grid_sample(grid.expand(batch_size, -1, -1, -1, -1),
-                                                       coordinates.reshape(batch_size, 1, 1, -1, n_dims),
-                                                       mode='bilinear', padding_mode='zeros', align_corners=False)
-    N, C, H, W, D = sampled_features.shape
-    sampled_features = sampled_features.permute(0, 4, 3, 2, 1).reshape(N, H*W*D, C)
-    return sampled_features
-class ImportanceRenderer(torch.nn.Module):
-    """
-    Modified original version to filter out-of-box samples as TensoRF does.
-    Reference:
-    TensoRF: https://github.com/apchenstu/TensoRF/blob/main/models/tensorBase.py#L277
-    """
-    def __init__(self):
-        super().__init__()
-        self.activation_factory = self._build_activation_factory()
-        self.ray_marcher = MipRayMarcher2(self.activation_factory)
-        self.plane_axes = generate_planes()
-    def _build_activation_factory(self):
-        def activation_factory(options: dict):
-            if options['clamp_mode'] == 'softplus':
-                return lambda x: F.softplus(x - 1)  # activation bias of -1 makes things initialize better
-            else:
-                assert False, "Renderer only supports `clamp_mode`=`softplus`!"
-        return activation_factory
-    def _forward_pass(self, depths: torch.Tensor, ray_directions: torch.Tensor, ray_origins: torch.Tensor,
-                        planes: torch.Tensor, decoder: nn.Module, rendering_options: dict):
-        """
-        Additional filtering is applied to filter out-of-box samples.
-        Modifications made by Zexin He.
-        """
-        # context related variables
-        batch_size, num_rays, samples_per_ray, _ = depths.shape
-        device = depths.device
-        # define sample points with depths
-        sample_directions = ray_directions.unsqueeze(-2).expand(-1, -1, samples_per_ray, -1).reshape(batch_size, -1, 3)
-        sample_coordinates = (ray_origins.unsqueeze(-2) + depths * ray_directions.unsqueeze(-2)).reshape(batch_size, -1, 3)
-        # filter out-of-box samples
-        mask_inbox = \
-            (rendering_options['sampler_bbox_min'] <= sample_coordinates) & \
-                (sample_coordinates <= rendering_options['sampler_bbox_max'])
-        mask_inbox = mask_inbox.all(-1)
-        # forward model according to all samples
-        _out = self.run_model(planes, decoder, sample_coordinates, sample_directions, rendering_options)
-        # set out-of-box samples to zeros(rgb) & -inf(sigma)
-        SAFE_GUARD = 3
-        DATA_TYPE = _out['sigma'].dtype
-        colors_pass = torch.zeros(batch_size, num_rays * samples_per_ray, 3, device=device, dtype=DATA_TYPE)
-        densities_pass = torch.nan_to_num(torch.full((batch_size, num_rays * samples_per_ray, 1), -float('inf'), device=device, dtype=DATA_TYPE)) / SAFE_GUARD
-        colors_pass[mask_inbox], densities_pass[mask_inbox] = _out['rgb'][mask_inbox], _out['sigma'][mask_inbox]
-        # reshape back
-        colors_pass = colors_pass.reshape(batch_size, num_rays, samples_per_ray, colors_pass.shape[-1])
-        densities_pass = densities_pass.reshape(batch_size, num_rays, samples_per_ray, densities_pass.shape[-1])
-        return colors_pass, densities_pass
-    def forward(self, planes, decoder, ray_origins, ray_directions, rendering_options):
-        # self.plane_axes = self.plane_axes.to(ray_origins.device)
-        if rendering_options['ray_start'] == rendering_options['ray_end'] == 'auto':
-            ray_start, ray_end = math_utils.get_ray_limits_box(ray_origins, ray_directions, box_side_length=rendering_options['box_warp'])
-            is_ray_valid = ray_end > ray_start
-            if torch.any(is_ray_valid).item():
-                ray_start[~is_ray_valid] = ray_start[is_ray_valid].min()
-                ray_end[~is_ray_valid] = ray_start[is_ray_valid].max()
-            depths_coarse = self.sample_stratified(ray_origins, ray_start, ray_end, rendering_options['depth_resolution'], rendering_options['disparity_space_sampling'])
-        else:
-            # Create stratified depth samples
-            depths_coarse = self.sample_stratified(ray_origins, rendering_options['ray_start'], rendering_options['ray_end'], rendering_options['depth_resolution'], rendering_options['disparity_space_sampling'])
-        # Coarse Pass
-        colors_coarse, densities_coarse = self._forward_pass(
-            depths=depths_coarse, ray_directions=ray_directions, ray_origins=ray_origins,
-            planes=planes, decoder=decoder, rendering_options=rendering_options)
-        # Fine Pass
-        N_importance = rendering_options['depth_resolution_importance']
-        if N_importance > 0:
-            _, _, weights = self.ray_marcher(colors_coarse, densities_coarse, depths_coarse, rendering_options)
-            depths_fine = self.sample_importance(depths_coarse, weights, N_importance)
-            colors_fine, densities_fine = self._forward_pass(
-                depths=depths_fine, ray_directions=ray_directions, ray_origins=ray_origins,
-                planes=planes, decoder=decoder, rendering_options=rendering_options)
-            all_depths, all_colors, all_densities = self.unify_samples(depths_coarse, colors_coarse, densities_coarse,
-                                                                  depths_fine, colors_fine, densities_fine)
-            # Aggregate
-            rgb_final, depth_final, weights = self.ray_marcher(all_colors, all_densities, all_depths, rendering_options)
-        else:
-            rgb_final, depth_final, weights = self.ray_marcher(colors_coarse, densities_coarse, depths_coarse, rendering_options)
-        return rgb_final, depth_final, weights.sum(2)
-    def run_model(self, planes, decoder, sample_coordinates, sample_directions, options):
-        plane_axes = self.plane_axes.to(planes.device)
-        sampled_features = sample_from_planes(plane_axes, planes, sample_coordinates, padding_mode='zeros', box_warp=options['box_warp'])
-        out = decoder(sampled_features, sample_directions)
-        if options.get('density_noise', 0) > 0:
-            out['sigma'] += torch.randn_like(out['sigma']) * options['density_noise']
-        return out
-    def run_model_activated(self, planes, decoder, sample_coordinates, sample_directions, options):
-        out = self.run_model(planes, decoder, sample_coordinates, sample_directions, options)
-        out['sigma'] = self.activation_factory(options)(out['sigma'])
-        return out
-    def sort_samples(self, all_depths, all_colors, all_densities):
-        _, indices = torch.sort(all_depths, dim=-2)
-        all_depths = torch.gather(all_depths, -2, indices)
-        all_colors = torch.gather(all_colors, -2, indices.expand(-1, -1, -1, all_colors.shape[-1]))
-        all_densities = torch.gather(all_densities, -2, indices.expand(-1, -1, -1, 1))
-        return all_depths, all_colors, all_densities
-    def unify_samples(self, depths1, colors1, densities1, depths2, colors2, densities2):
-        all_depths = torch.cat([depths1, depths2], dim = -2)
-        all_colors = torch.cat([colors1, colors2], dim = -2)
-        all_densities = torch.cat([densities1, densities2], dim = -2)
-        _, indices = torch.sort(all_depths, dim=-2)
-        all_depths = torch.gather(all_depths, -2, indices)
-        all_colors = torch.gather(all_colors, -2, indices.expand(-1, -1, -1, all_colors.shape[-1]))
-        all_densities = torch.gather(all_densities, -2, indices.expand(-1, -1, -1, 1))
-        return all_depths, all_colors, all_densities
-    def sample_stratified(self, ray_origins, ray_start, ray_end, depth_resolution, disparity_space_sampling=False):
-        """
-        Return depths of approximately uniformly spaced samples along rays.
-        """
-        N, M, _ = ray_origins.shape
-        if disparity_space_sampling:
-            depths_coarse = torch.linspace(0,
-                                    1,
-                                    depth_resolution,
-                                    device=ray_origins.device).reshape(1, 1, depth_resolution, 1).repeat(N, M, 1, 1)
-            depth_delta = 1/(depth_resolution - 1)
-            depths_coarse += torch.rand_like(depths_coarse) * depth_delta
-            depths_coarse = 1./(1./ray_start * (1. - depths_coarse) + 1./ray_end * depths_coarse)
-        else:
-            if type(ray_start) == torch.Tensor:
-                depths_coarse = math_utils.linspace(ray_start, ray_end, depth_resolution).permute(1,2,0,3)
-                depth_delta = (ray_end - ray_start) / (depth_resolution - 1)
-                depths_coarse += torch.rand_like(depths_coarse) * depth_delta[..., None]
-            else:
-                depths_coarse = torch.linspace(ray_start, ray_end, depth_resolution, device=ray_origins.device).reshape(1, 1, depth_resolution, 1).repeat(N, M, 1, 1)
-                depth_delta = (ray_end - ray_start)/(depth_resolution - 1)
-                depths_coarse += torch.rand_like(depths_coarse) * depth_delta
-        return depths_coarse
-    def sample_importance(self, z_vals, weights, N_importance):
-        """
-        Return depths of importance sampled points along rays. See NeRF importance sampling for more.
-        """
-        with torch.no_grad():
-            batch_size, num_rays, samples_per_ray, _ = z_vals.shape
-            z_vals = z_vals.reshape(batch_size * num_rays, samples_per_ray)
-            weights = weights.reshape(batch_size * num_rays, -1) # -1 to account for loss of 1 sample in MipRayMarcher
-            # smooth weights
-            weights = torch.nn.functional.max_pool1d(weights.unsqueeze(1).float(), 2, 1, padding=1)
-            weights = torch.nn.functional.avg_pool1d(weights, 2, 1).squeeze()
-            weights = weights + 0.01
-            z_vals_mid = 0.5 * (z_vals[: ,:-1] + z_vals[: ,1:])
-            importance_z_vals = self.sample_pdf(z_vals_mid, weights[:, 1:-1],
-                                             N_importance).detach().reshape(batch_size, num_rays, N_importance, 1)
-        return importance_z_vals
-    def sample_pdf(self, bins, weights, N_importance, det=False, eps=1e-5):
-        """
-        Sample @N_importance samples from @bins with distribution defined by @weights.
-        Inputs:
-            bins: (N_rays, N_samples_+1) where N_samples_ is "the number of coarse samples per ray - 2"
-            weights: (N_rays, N_samples_)
-            N_importance: the number of samples to draw from the distribution
-            det: deterministic or not
-            eps: a small number to prevent division by zero
-        Outputs:
-            samples: the sampled samples
-        """
-        N_rays, N_samples_ = weights.shape
-        weights = weights + eps # prevent division by zero (don't do inplace op!)
-        pdf = weights / torch.sum(weights, -1, keepdim=True) # (N_rays, N_samples_)
-        cdf = torch.cumsum(pdf, -1) # (N_rays, N_samples), cumulative distribution function
-        cdf = torch.cat([torch.zeros_like(cdf[: ,:1]), cdf], -1)  # (N_rays, N_samples_+1)
-                                                                   # padded to 0~1 inclusive
-        if det:
-            u = torch.linspace(0, 1, N_importance, device=bins.device)
-            u = u.expand(N_rays, N_importance)
-        else:
-            u = torch.rand(N_rays, N_importance, device=bins.device)
-        u = u.contiguous()
-        inds = torch.searchsorted(cdf, u, right=True)
-        below = torch.clamp_min(inds-1, 0)
-        above = torch.clamp_max(inds, N_samples_)
-        inds_sampled = torch.stack([below, above], -1).view(N_rays, 2*N_importance)
-        cdf_g = torch.gather(cdf, 1, inds_sampled).view(N_rays, N_importance, 2)
-        bins_g = torch.gather(bins, 1, inds_sampled).view(N_rays, N_importance, 2)
-        denom = cdf_g[...,1]-cdf_g[...,0]
-        denom[denom<eps] = 1 # denom equals 0 means a bin has weight 0, in which case it will not be sampled
-                             # anyway, therefore any value for it is fine (set to 1 here)
-        samples = bins_g[...,0] + (u-cdf_g[...,0])/denom * (bins_g[...,1]-bins_g[...,0])
-        return samples

lrm/models/transformer.py DELETED Viewed

@@ -1,143 +0,0 @@
-# Copyright (c) 2023, Zexin He
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import torch
-import torch.nn as nn
-class ModLN(nn.Module):
-    """
-    Modulation with adaLN.
-    References:
-    DiT: https://github.com/facebookresearch/DiT/blob/main/models.py#L101
-    """
-    def __init__(self, inner_dim: int, mod_dim: int, eps: float):
-        super().__init__()
-        self.norm = nn.LayerNorm(inner_dim, eps=eps)
-        self.mlp = nn.Sequential(
-            nn.SiLU(),
-            nn.Linear(mod_dim, inner_dim * 2),
-        )
-    @staticmethod
-    def modulate(x, shift, scale):
-        # x: [N, L, D]
-        # shift, scale: [N, D]
-        return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
-    def forward(self, x, cond):
-        shift, scale = self.mlp(cond).chunk(2, dim=-1)  # [N, D]
-        return self.modulate(self.norm(x), shift, scale)  # [N, L, D]
-class ConditionModulationBlock(nn.Module):
-    """
-    Transformer block that takes in a cross-attention condition and another modulation vector applied to sub-blocks.
-    """
-    # use attention from torch.nn.MultiHeadAttention
-    # Block contains a cross-attention layer, a self-attention layer, and a MLP
-    def __init__(self, inner_dim: int, cond_dim: int, mod_dim: int, num_heads: int, eps: float,
-                 attn_drop: float = 0., attn_bias: bool = False,
-                 mlp_ratio: float = 4., mlp_drop: float = 0.):
-        super().__init__()
-        self.norm1 = ModLN(inner_dim, mod_dim, eps)
-        self.cross_attn = nn.MultiheadAttention(
-            embed_dim=inner_dim, num_heads=num_heads, kdim=cond_dim, vdim=cond_dim,
-            dropout=attn_drop, bias=attn_bias, batch_first=True)
-        self.norm2 = ModLN(inner_dim, mod_dim, eps)
-        self.self_attn = nn.MultiheadAttention(
-            embed_dim=inner_dim, num_heads=num_heads,
-            dropout=attn_drop, bias=attn_bias, batch_first=True)
-        self.norm3 = ModLN(inner_dim, mod_dim, eps)
-        self.mlp = nn.Sequential(
-            nn.Linear(inner_dim, int(inner_dim * mlp_ratio)),
-            nn.GELU(),
-            nn.Dropout(mlp_drop),
-            nn.Linear(int(inner_dim * mlp_ratio), inner_dim),
-            nn.Dropout(mlp_drop),
-        )
-    def forward(self, x, cond, mod):
-        # x: [N, L, D]
-        # cond: [N, L_cond, D_cond]
-        # mod: [N, D_mod]
-        x = x + self.cross_attn(self.norm1(x, mod), cond, cond, need_weights=False)[0]
-        before_sa = self.norm2(x, mod)
-        x = x + self.self_attn(before_sa, before_sa, before_sa, need_weights=False)[0]
-        x = x + self.mlp(self.norm3(x, mod))
-        return x
-class TriplaneTransformer(nn.Module):
-    """
-    Transformer with condition and modulation that generates a triplane representation.
-    Reference:
-    Timm: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L486
-    """
-    def __init__(self, inner_dim: int, image_feat_dim: int, camera_embed_dim: int,
-                 triplane_low_res: int, triplane_high_res: int, triplane_dim: int,
-                 num_layers: int, num_heads: int,
-                 eps: float = 1e-6):
-        super().__init__()
-        # attributes
-        self.triplane_low_res = triplane_low_res
-        self.triplane_high_res = triplane_high_res
-        self.triplane_dim = triplane_dim
-        # modules
-        # initialize pos_embed with 1/sqrt(dim) * N(0, 1)
-        self.pos_embed = nn.Parameter(torch.randn(1, 3*triplane_low_res**2, inner_dim) * (1. / inner_dim) ** 0.5)
-        self.layers = nn.ModuleList([
-            ConditionModulationBlock(
-                inner_dim=inner_dim, cond_dim=image_feat_dim, mod_dim=camera_embed_dim, num_heads=num_heads, eps=eps)
-            for _ in range(num_layers)
-        ])
-        self.norm = nn.LayerNorm(inner_dim, eps=eps)
-        self.deconv = nn.ConvTranspose2d(inner_dim, triplane_dim, kernel_size=2, stride=2, padding=0)
-    def forward(self, image_feats, camera_embeddings):
-        # image_feats: [N, L_cond, D_cond]
-        # camera_embeddings: [N, D_mod]
-        assert image_feats.shape[0] == camera_embeddings.shape[0], \
-            f"Mismatched batch size: {image_feats.shape[0]} vs {camera_embeddings.shape[0]}"
-        N = image_feats.shape[0]
-        H = W = self.triplane_low_res
-        L = 3 * H * W
-        x = self.pos_embed.repeat(N, 1, 1)  # [N, L, D]
-        for layer in self.layers:
-            x = layer(x, image_feats, camera_embeddings)
-        x = self.norm(x)
-        # separate each plane and apply deconv
-        x = x.view(N, 3, H, W, -1)
-        x = torch.einsum('nihwd->indhw', x)  # [3, N, D, H, W]
-        x = x.contiguous().view(3*N, -1, H, W)  # [3*N, D, H, W]
-        x = self.deconv(x)  # [3*N, D', H', W']
-        x = x.view(3, N, *x.shape[-3:])  # [3, N, D', H', W']
-        x = torch.einsum('indhw->nidhw', x)  # [N, 3, D', H', W']
-        x = x.contiguous()
-        assert self.triplane_high_res == x.shape[-2], \
-            f"Output triplane resolution does not match with expected: {x.shape[-2]} vs {self.triplane_high_res}"
-        assert self.triplane_dim == x.shape[-3], \
-            f"Output triplane dimension does not match with expected: {x.shape[-3]} vs {self.triplane_dim}"
-        return x

rembg_and_center.py DELETED Viewed

@@ -1,81 +0,0 @@
-# Modified from https://huggingface.co/spaces/jiawei011/dreamgaussian/edit/main/process.py
-import os
-import glob
-import sys
-import cv2
-import argparse
-import numpy as np
-import matplotlib.pyplot as plt
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torchvision import transforms
-from PIL import Image
-import rembg
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('path', type=str, help="path to image (png, jpeg, etc.)")
-    parser.add_argument('--model', default='u2net', type=str, help="rembg model, see https://github.com/danielgatis/rembg#models")
-    parser.add_argument('--size', default=512, type=int, help="output resolution")
-    parser.add_argument('--border_ratio', default=0.2, type=float, help="output border ratio")
-    parser.add_argument('--recenter', type=bool, default=True, help="recenter, potentially not helpful for multiview zero123")
-    opt = parser.parse_args()
-    session = rembg.new_session(model_name=opt.model)
-    if os.path.isdir(opt.path):
-        print(f'[INFO] processing directory {opt.path}...')
-        files = glob.glob(f'{opt.path}/*')
-        out_dir = opt.path
-    else: # isfile
-        files = [opt.path]
-        out_dir = os.path.dirname(opt.path)
-    for file in files:
-        out_base = os.path.basename(file).split('.')[0]
-        out_rgba = os.path.join(out_dir, out_base + '_rgba.png')
-        # load image
-        print(f'[INFO] loading image {file}...')
-        image = cv2.imread(file, cv2.IMREAD_UNCHANGED)
-        _h, _w = image.shape[:2]
-        scale = opt.size / max(_h, _w)
-        _h, _w = int(_h * scale), int(_w * scale)
-        image = cv2.resize(image, (_w, _h), interpolation=cv2.INTER_AREA)
-        # carve background
-        print(f'[INFO] background removal...')
-        carved_image = rembg.remove(image, session=session) # [H, W, 4]
-        mask = carved_image[..., -1] > 0
-        # recenter
-        if opt.recenter:
-            print(f'[INFO] recenter...')
-            final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
-            coords = np.nonzero(mask)
-            x_min, x_max = coords[0].min(), coords[0].max()
-            y_min, y_max = coords[1].min(), coords[1].max()
-            h = x_max - x_min
-            w = y_max - y_min
-            desired_size = int(opt.size * (1 - opt.border_ratio))
-            scale = desired_size / max(h, w)
-            h2 = int(h * scale)
-            w2 = int(w * scale)
-            x2_min = (opt.size - h2) // 2
-            x2_max = x2_min + h2
-            y2_min = (opt.size - w2) // 2
-            y2_max = y2_min + w2
-            final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
-        else:
-            final_rgba = carved_image
-        # write image
-        cv2.imwrite(out_rgba, final_rgba)

requirements.txt DELETED Viewed

@@ -1,11 +0,0 @@
-torch>=2.1.0
-transformers
-opencv-python
-matplotlib
-torchvision==0.16.0
-rembg
-omegaconf
-pillow
-imageio[ffmpeg]
-PyMCubes
-trimesh