jadechoghari commited on Jul 31, 2024

Commit

19fb693

1 Parent(s): 2a534b4

update and include model files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

CODE_OF_CONDUCT.md +80 -0
CONTRIBUTING.md +31 -0
LICENSE +399 -0
README.md +58 -168
__init__.py +1 -0
assets/40_prompt_images/A 3D scan of AK47, weapon.jpeg +0 -0
assets/40_prompt_images/A DSLR photo of Sydney Opera House.jpg +0 -0
assets/40_prompt_images/A bald eagle carved out of wood.jpg +0 -0
assets/40_prompt_images/A bulldog wearing a black pirate hat.jpeg +0 -0
assets/40_prompt_images/A crab, low poly.jpg +0 -0
assets/40_prompt_images/A photo of a horse walking.jpeg +0 -0
assets/40_prompt_images/A pig wearing a backpack.jpeg +0 -0
assets/40_prompt_images/A product photo of a toy tank.jpg +0 -0
assets/40_prompt_images/A see no evil monkey on a kick drum.jpg +0 -0
assets/40_prompt_images/A statue of angel, blender.jpg +0 -0
assets/40_prompt_images/Corgi riding a rocket.jpeg +0 -0
assets/40_prompt_images/Daenerys Targaryen from game of throne.jpg +0 -0
assets/40_prompt_images/Darth Vader helmet,g highly detailed.jpg +0 -0
assets/40_prompt_images/Dragon armor.jpeg +0 -0
assets/40_prompt_images/Fisherman House, cute, cartoon, blender, stylized.jpg +0 -0
assets/40_prompt_images/Flying Dragon, highly detailed, breathing fire.jpeg +0 -0
assets/40_prompt_images/Handpainted watercolor windmill, hand-painted.jpg +0 -0
assets/40_prompt_images/Katana.jpeg +0 -0
assets/40_prompt_images/Little italian town, hand-painted style.jpg +0 -0
assets/40_prompt_images/Mr Bean Cartoon doing a T Pose.jpg +0 -0
assets/40_prompt_images/Pedestal Fan (White).jpeg +0 -0
assets/40_prompt_images/Pikachu with hat.jpg +0 -0
assets/40_prompt_images/Samurai koala bear.jpg +0 -0
assets/40_prompt_images/TRUMP figure.jpg +0 -0
assets/40_prompt_images/Viking axe, fantasy, weapon, blender, 8k, HD.jpg +0 -0
assets/40_prompt_images/a DSLR photo of a frog wearing a sweater.jpg +0 -0
assets/40_prompt_images/a DSLR photo of a ghost eating a hamburger.jpg +0 -0
assets/40_prompt_images/a DSLR photo of a peacock on a surfboard.jpeg +0 -0
assets/40_prompt_images/a DSLR photo of a squirrel playing guitar.jpg +0 -0
assets/40_prompt_images/a DSLR photo of an eggshell broken in two with an adorable chick standing next to it.jpeg +0 -0
assets/40_prompt_images/an astronaut riding a horse.jpeg +0 -0
assets/40_prompt_images/animal skull pile.jpg +0 -0
assets/40_prompt_images/army Jacket, 3D scan.jpg +0 -0
assets/40_prompt_images/baby yoda in the style of Mormookiee.jpg +0 -0
assets/40_prompt_images/beautiful, intricate butterfly.jpg +0 -0
assets/40_prompt_images/girl riding wolf, cute, cartoon, blender.jpg +0 -0
assets/40_prompt_images/mecha vampire girl chibi.jpg +0 -0
assets/40_prompt_images/military Mech, future, scifi.jpg +0 -0
assets/40_prompt_images/motorcycle, scifi, blender.jpeg +0 -0
assets/40_prompt_images/saber from fate stay night, 3D, girl, anime.jpeg +0 -0
install.sh +25 -0
lrm/__init__.py +5 -0
lrm/cam_utils.py +138 -0
lrm/inferrer.py +232 -0
lrm/models/__init__.py +5 -0

CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,80 @@

+# Code of Conduct
+## Our Pledge
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+## Our Standards
+Examples of behavior that contributes to creating a positive environment
+include:
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+Examples of unacceptable behavior by participants include:
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+## Our Responsibilities
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+## Scope
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+This Code of Conduct also applies outside the project spaces when there is a
+reasonable belief that an individual's behavior may have a negative impact on
+the project or its community.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <opensource-conduct@fb.com>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+[homepage]: https://www.contributor-covenant.org
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,31 @@

+# Contributing to PoseDiffusion
+We want to make contributing to this project as easy and transparent as
+possible.
+## Pull Requests
+We actively welcome your pull requests.
+1. Fork the repo and create your branch from `main`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. If you haven't already, complete the Contributor License Agreement ("CLA").
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+Complete your CLA here: <https://code.facebook.com/cla>
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+## License
+By contributing to PoseDiffusion, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.

LICENSE ADDED Viewed

	@@ -0,0 +1,399 @@

+Attribution-NonCommercial 4.0 International
+=======================================================================
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+Using Creative Commons Public Licenses
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+     Considerations for licensors: Our public licenses are
+     intended for use by those authorized to give the public
+     permission to use material in ways otherwise restricted by
+     copyright and certain other rights. Our licenses are
+     irrevocable. Licensors should read and understand the terms
+     and conditions of the license they choose before applying it.
+     Licensors should also secure all rights necessary before
+     applying our licenses so that the public can reuse the
+     material as expected. Licensors should clearly mark any
+     material not subject to the license. This includes other CC-
+     licensed material, or material used under an exception or
+     limitation to copyright. More considerations for licensors:
+	wiki.creativecommons.org/Considerations_for_licensors
+     Considerations for the public: By using one of our public
+     licenses, a licensor grants the public permission to use the
+     licensed material under specified terms and conditions. If
+     the licensor's permission is not necessary for any reason--for
+     example, because of any applicable exception or limitation to
+     copyright--then that use is not regulated by the license. Our
+     licenses grant only permissions under copyright and certain
+     other rights that a licensor has authority to grant. Use of
+     the licensed material may still be restricted for other
+     reasons, including because others have copyright or other
+     rights in the material. A licensor may make special requests,
+     such as asking that all changes be marked or described.
+     Although not required by our licenses, you are encouraged to
+     respect those requests where reasonable. More_considerations
+     for the public:
+	wiki.creativecommons.org/Considerations_for_licensees
+=======================================================================
+Creative Commons Attribution-NonCommercial 4.0 International Public
+License
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution-NonCommercial 4.0 International Public License ("Public
+License"). To the extent this Public License may be interpreted as a
+contract, You are granted the Licensed Rights in consideration of Your
+acceptance of these terms and conditions, and the Licensor grants You
+such rights in consideration of benefits the Licensor receives from
+making the Licensed Material available under these terms and
+conditions.
+Section 1 -- Definitions.
+  a. Adapted Material means material subject to Copyright and Similar
+     Rights that is derived from or based upon the Licensed Material
+     and in which the Licensed Material is translated, altered,
+     arranged, transformed, or otherwise modified in a manner requiring
+     permission under the Copyright and Similar Rights held by the
+     Licensor. For purposes of this Public License, where the Licensed
+     Material is a musical work, performance, or sound recording,
+     Adapted Material is always produced where the Licensed Material is
+     synched in timed relation with a moving image.
+  b. Adapter's License means the license You apply to Your Copyright
+     and Similar Rights in Your contributions to Adapted Material in
+     accordance with the terms and conditions of this Public License.
+  c. Copyright and Similar Rights means copyright and/or similar rights
+     closely related to copyright including, without limitation,
+     performance, broadcast, sound recording, and Sui Generis Database
+     Rights, without regard to how the rights are labeled or
+     categorized. For purposes of this Public License, the rights
+     specified in Section 2(b)(1)-(2) are not Copyright and Similar
+     Rights.
+  d. Effective Technological Measures means those measures that, in the
+     absence of proper authority, may not be circumvented under laws
+     fulfilling obligations under Article 11 of the WIPO Copyright
+     Treaty adopted on December 20, 1996, and/or similar international
+     agreements.
+  e. Exceptions and Limitations means fair use, fair dealing, and/or
+     any other exception or limitation to Copyright and Similar Rights
+     that applies to Your use of the Licensed Material.
+  f. Licensed Material means the artistic or literary work, database,
+     or other material to which the Licensor applied this Public
+     License.
+  g. Licensed Rights means the rights granted to You subject to the
+     terms and conditions of this Public License, which are limited to
+     all Copyright and Similar Rights that apply to Your use of the
+     Licensed Material and that the Licensor has authority to license.
+  h. Licensor means the individual(s) or entity(ies) granting rights
+     under this Public License.
+  i. NonCommercial means not primarily intended for or directed towards
+     commercial advantage or monetary compensation. For purposes of
+     this Public License, the exchange of the Licensed Material for
+     other material subject to Copyright and Similar Rights by digital
+     file-sharing or similar means is NonCommercial provided there is
+     no payment of monetary compensation in connection with the
+     exchange.
+  j. Share means to provide material to the public by any means or
+     process that requires permission under the Licensed Rights, such
+     as reproduction, public display, public performance, distribution,
+     dissemination, communication, or importation, and to make material
+     available to the public including in ways that members of the
+     public may access the material from a place and at a time
+     individually chosen by them.
+  k. Sui Generis Database Rights means rights other than copyright
+     resulting from Directive 96/9/EC of the European Parliament and of
+     the Council of 11 March 1996 on the legal protection of databases,
+     as amended and/or succeeded, as well as other essentially
+     equivalent rights anywhere in the world.
+  l. You means the individual or entity exercising the Licensed Rights
+     under this Public License. Your has a corresponding meaning.
+Section 2 -- Scope.
+  a. License grant.
+       1. Subject to the terms and conditions of this Public License,
+          the Licensor hereby grants You a worldwide, royalty-free,
+          non-sublicensable, non-exclusive, irrevocable license to
+          exercise the Licensed Rights in the Licensed Material to:
+            a. reproduce and Share the Licensed Material, in whole or
+               in part, for NonCommercial purposes only; and
+            b. produce, reproduce, and Share Adapted Material for
+               NonCommercial purposes only.
+       2. Exceptions and Limitations. For the avoidance of doubt, where
+          Exceptions and Limitations apply to Your use, this Public
+          License does not apply, and You do not need to comply with
+          its terms and conditions.
+       3. Term. The term of this Public License is specified in Section
+          6(a).
+       4. Media and formats; technical modifications allowed. The
+          Licensor authorizes You to exercise the Licensed Rights in
+          all media and formats whether now known or hereafter created,
+          and to make technical modifications necessary to do so. The
+          Licensor waives and/or agrees not to assert any right or
+          authority to forbid You from making technical modifications
+          necessary to exercise the Licensed Rights, including
+          technical modifications necessary to circumvent Effective
+          Technological Measures. For purposes of this Public License,
+          simply making modifications authorized by this Section 2(a)
+          (4) never produces Adapted Material.
+       5. Downstream recipients.
+            a. Offer from the Licensor -- Licensed Material. Every
+               recipient of the Licensed Material automatically
+               receives an offer from the Licensor to exercise the
+               Licensed Rights under the terms and conditions of this
+               Public License.
+            b. No downstream restrictions. You may not offer or impose
+               any additional or different terms or conditions on, or
+               apply any Effective Technological Measures to, the
+               Licensed Material if doing so restricts exercise of the
+               Licensed Rights by any recipient of the Licensed
+               Material.
+       6. No endorsement. Nothing in this Public License constitutes or
+          may be construed as permission to assert or imply that You
+          are, or that Your use of the Licensed Material is, connected
+          with, or sponsored, endorsed, or granted official status by,
+          the Licensor or others designated to receive attribution as
+          provided in Section 3(a)(1)(A)(i).
+  b. Other rights.
+       1. Moral rights, such as the right of integrity, are not
+          licensed under this Public License, nor are publicity,
+          privacy, and/or other similar personality rights; however, to
+          the extent possible, the Licensor waives and/or agrees not to
+          assert any such rights held by the Licensor to the limited
+          extent necessary to allow You to exercise the Licensed
+          Rights, but not otherwise.
+       2. Patent and trademark rights are not licensed under this
+          Public License.
+       3. To the extent possible, the Licensor waives any right to
+          collect royalties from You for the exercise of the Licensed
+          Rights, whether directly or through a collecting society
+          under any voluntary or waivable statutory or compulsory
+          licensing scheme. In all other cases the Licensor expressly
+          reserves any right to collect such royalties, including when
+          the Licensed Material is used other than for NonCommercial
+          purposes.
+Section 3 -- License Conditions.
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+  a. Attribution.
+       1. If You Share the Licensed Material (including in modified
+          form), You must:
+            a. retain the following if it is supplied by the Licensor
+               with the Licensed Material:
+                 i. identification of the creator(s) of the Licensed
+                    Material and any others designated to receive
+                    attribution, in any reasonable manner requested by
+                    the Licensor (including by pseudonym if
+                    designated);
+                ii. a copyright notice;
+               iii. a notice that refers to this Public License;
+                iv. a notice that refers to the disclaimer of
+                    warranties;
+                 v. a URI or hyperlink to the Licensed Material to the
+                    extent reasonably practicable;
+            b. indicate if You modified the Licensed Material and
+               retain an indication of any previous modifications; and
+            c. indicate the Licensed Material is licensed under this
+               Public License, and include the text of, or the URI or
+               hyperlink to, this Public License.
+       2. You may satisfy the conditions in Section 3(a)(1) in any
+          reasonable manner based on the medium, means, and context in
+          which You Share the Licensed Material. For example, it may be
+          reasonable to satisfy the conditions by providing a URI or
+          hyperlink to a resource that includes the required
+          information.
+       3. If requested by the Licensor, You must remove any of the
+          information required by Section 3(a)(1)(A) to the extent
+          reasonably practicable.
+       4. If You Share Adapted Material You produce, the Adapter's
+          License You apply must not prevent recipients of the Adapted
+          Material from complying with this Public License.
+Section 4 -- Sui Generis Database Rights.
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+     to extract, reuse, reproduce, and Share all or a substantial
+     portion of the contents of the database for NonCommercial purposes
+     only;
+  b. if You include all or a substantial portion of the database
+     contents in a database in which You have Sui Generis Database
+     Rights, then the database in which You have Sui Generis Database
+     Rights (but not its individual contents) is Adapted Material; and
+  c. You must comply with the conditions in Section 3(a) if You Share
+     all or a substantial portion of the contents of the database.
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+  c. The disclaimer of warranties and limitation of liability provided
+     above shall be interpreted in a manner that, to the extent
+     possible, most closely approximates an absolute disclaimer and
+     waiver of all liability.
+Section 6 -- Term and Termination.
+  a. This Public License applies for the term of the Copyright and
+     Similar Rights licensed here. However, if You fail to comply with
+     this Public License, then Your rights under this Public License
+     terminate automatically.
+  b. Where Your right to use the Licensed Material has terminated under
+     Section 6(a), it reinstates:
+       1. automatically as of the date the violation is cured, provided
+          it is cured within 30 days of Your discovery of the
+          violation; or
+       2. upon express reinstatement by the Licensor.
+     For the avoidance of doubt, this Section 6(b) does not affect any
+     right the Licensor may have to seek remedies for Your violations
+     of this Public License.
+  c. For the avoidance of doubt, the Licensor may also offer the
+     Licensed Material under separate terms or conditions or stop
+     distributing the Licensed Material at any time; however, doing so
+     will not terminate this Public License.
+  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+     License.
+Section 7 -- Other Terms and Conditions.
+  a. The Licensor shall not be bound by any additional or different
+     terms or conditions communicated by You unless expressly agreed.
+  b. Any arrangements, understandings, or agreements regarding the
+     Licensed Material not stated herein are separate from and
+     independent of the terms and conditions of this Public License.
+Section 8 -- Interpretation.
+  a. For the avoidance of doubt, this Public License does not, and
+     shall not be interpreted to, reduce, limit, restrict, or impose
+     conditions on any use of the Licensed Material that could lawfully
+     be made without permission under this Public License.
+  b. To the extent possible, if any provision of this Public License is
+     deemed unenforceable, it shall be automatically reformed to the
+     minimum extent necessary to make it enforceable. If the provision
+     cannot be reformed, it shall be severed from this Public License
+     without affecting the enforceability of the remaining terms and
+     conditions.
+  c. No term or condition of this Public License will be waived and no
+     failure to comply consented to unless expressly agreed to by the
+     Licensor.
+  d. Nothing in this Public License constitutes or may be interpreted
+     as a limitation upon, or waiver of, any privileges and immunities
+     that apply to the Licensor or You, including from the legal
+     processes of any jurisdiction or authority.
+=======================================================================
+Creative Commons is not a party to its public
+licenses. Notwithstanding, Creative Commons may elect to apply one of
+its public licenses to material it publishes and in those instances
+will be considered the “Licensor.” The text of the Creative Commons
+public licenses is dedicated to the public domain under the CC0 Public
+Domain Dedication. Except for the limited purpose of indicating that
+material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the
+public licenses.
+Creative Commons may be contacted at creativecommons.org.

README.md CHANGED Viewed

@@ -1,199 +1,89 @@
----
-library_name: transformers
-tags: []
----
-# Model Card for Model ID
-<!-- Provide a quick summary of what the model is/does. -->
-## Model Details
-### Model Description
-<!-- Provide a longer summary of what this model is. -->
-This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
-- **Developed by:** [More Information Needed]
-- **Funded by [optional]:** [More Information Needed]
-- **Shared by [optional]:** [More Information Needed]
-- **Model type:** [More Information Needed]
-- **Language(s) (NLP):** [More Information Needed]
-- **License:** [More Information Needed]
-- **Finetuned from model [optional]:** [More Information Needed]
-### Model Sources [optional]
-<!-- Provide the basic links for the model. -->
-- **Repository:** [More Information Needed]
-- **Paper [optional]:** [More Information Needed]
-- **Demo [optional]:** [More Information Needed]
-## Uses
-<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
-### Direct Use
-<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
-[More Information Needed]
-### Downstream Use [optional]
-<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
-[More Information Needed]
-### Out-of-Scope Use
-<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
-[More Information Needed]
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-### Recommendations
-<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
-Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
-## How to Get Started with the Model
-Use the code below to get started with the model.
-[More Information Needed]
-## Training Details
-### Training Data
-<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
-[More Information Needed]
-### Training Procedure
-<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
-#### Preprocessing [optional]
-[More Information Needed]
-#### Training Hyperparameters
-- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
-#### Speeds, Sizes, Times [optional]
-<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
-[More Information Needed]
-## Evaluation
-<!-- This section describes the evaluation protocols and provides the results. -->
-### Testing Data, Factors & Metrics
-#### Testing Data
-<!-- This should link to a Dataset Card if possible. -->
-[More Information Needed]
-#### Factors
-<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
-[More Information Needed]
-#### Metrics
-<!-- These are the evaluation metrics being used, ideally with a description of why. -->
-[More Information Needed]
-### Results
-[More Information Needed]
-#### Summary
-## Model Examination [optional]
-<!-- Relevant interpretability work for the model goes here -->
-[More Information Needed]
-## Environmental Impact
-<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
-Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
-- **Hardware Type:** [More Information Needed]
-- **Hours used:** [More Information Needed]
-- **Cloud Provider:** [More Information Needed]
-- **Compute Region:** [More Information Needed]
-- **Carbon Emitted:** [More Information Needed]
-## Technical Specifications [optional]
-### Model Architecture and Objective
-[More Information Needed]
-### Compute Infrastructure
-[More Information Needed]
-#### Hardware
-[More Information Needed]
-#### Software
-[More Information Needed]
-## Citation [optional]
-<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
-**BibTeX:**
-[More Information Needed]
-**APA:**
-[More Information Needed]
-## Glossary [optional]
-<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
-[More Information Needed]
-## More Information [optional]
-[More Information Needed]
-## Model Card Authors [optional]
-[More Information Needed]
-## Model Card Contact
-[More Information Needed]

+# [ECCV 2024] VFusion3D: Learning Scalable 3D Generative Models from Video Diffusion Models
+[Porject page](https://junlinhan.github.io/projects/vfusion3d.html), [Paper link](https://arxiv.org/abs/2403.12034)
+VFusion3D is a large, feed-forward 3D generative model trained with a small amount of 3D data and a large volume of synthetic multi-view data. It is the first work exploring scalable 3D generative/reconstruction models as a step towards a 3D foundation.
+[VFusion3D: Learning Scalable 3D Generative Models from Video Diffusion Models](https://junlinhan.github.io/projects/vfusion3d.html)<br>
+[Junlin Han](https://junlinhan.github.io/), [Filippos Kokkinos](https://www.fkokkinos.com/), [Philip Torr](https://www.robots.ox.ac.uk/~phst/)<br>
+GenAI, Meta and TVG, University of Oxford<br>
+European Conference on Computer Vision (ECCV), 2024
+## News
+- [25.07.2024] Release weights and inference code for VFusion3D.
+## Results and Comparisons
+### 3D Generation Results
+<img src='images/gif1.gif' width=950>
+<img src='images/gif2.gif' width=950>
+### User Study Results
+<img src='images/user.png' width=950>
+## Setup
+### Installation
+```
+git clone https://github.com/facebookresearch/vfusion3d
+cd vfusion3d
+```
+### Environment
+We provide a simple installation script that, by default, sets up a conda environment with Python 3.8.19, PyTorch 2.3, and CUDA 12.1. Similar package versions should also work.
+```
+source install.sh
+```
+## Quick Start
+### Pretrained Models
+- Model weights are available here [Google Drive](https://drive.google.com/file/d/1b-KKSh9VquJdzmXzZBE4nKbXnbeua42X/view?usp=sharing). Please download it and put it inside ./checkpoints/
+### Prepare Images
+- We put some sample inputs under `assets/40_prompt_images`, which is the 40 MVDream prompt images used in the paper. Results of them are also provided under `results/40_prompt_images_provided`.
+### Inference
+- Run the inference script to get 3D assets.
+- You may specify which form of output to generate by setting the flags `--export_video` and `--export_mesh`.
+- Change `--source_path` and `--dump_path` if you want to run it on other image folders.
+    ```
+    # Example usages
+    # Render a video
+    python -m lrm.inferrer --export_video --resume ./checkpoints/vfusion3dckpt
+    # Export mesh
+    python -m lrm.inferrer --export_mesh --resume ./checkpoints/vfusion3dckpt
+    ```
+## Acknowledgement
+- This inference code of VFusion3D heavily borrows from [OpenLRM](https://github.com/3DTopia/OpenLRM).
+## Citation
+If you find this work useful, please cite us:
+```
+@article{han2024vfusion3d,
+  title={VFusion3D: Learning Scalable 3D Generative Models from Video Diffusion Models},
+  author={Junlin Han and Filippos Kokkinos and Philip Torr},
+  journal={European Conference on Computer Vision (ECCV)},
+  year={2024}
+}
+```
+## License
+- The majority of VFusion3D is licensed under CC-BY-NC, however portions of the project are available under separate license terms: OpenLRM as a whole is licensed under the Apache License, Version 2.0, while certain components are covered by NVIDIA's proprietary license.
+- The model weights of VFusion3D is also licensed under CC-BY-NC.

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .modeling import LRMGenerator, LRMGeneratorConfig

assets/40_prompt_images/A 3D scan of AK47, weapon.jpeg ADDED Viewed

assets/40_prompt_images/A DSLR photo of Sydney Opera House.jpg ADDED Viewed

assets/40_prompt_images/A bald eagle carved out of wood.jpg ADDED Viewed

assets/40_prompt_images/A bulldog wearing a black pirate hat.jpeg ADDED Viewed

assets/40_prompt_images/A crab, low poly.jpg ADDED Viewed

assets/40_prompt_images/A photo of a horse walking.jpeg ADDED Viewed

assets/40_prompt_images/A pig wearing a backpack.jpeg ADDED Viewed

assets/40_prompt_images/A product photo of a toy tank.jpg ADDED Viewed

assets/40_prompt_images/A see no evil monkey on a kick drum.jpg ADDED Viewed

assets/40_prompt_images/A statue of angel, blender.jpg ADDED Viewed

assets/40_prompt_images/Corgi riding a rocket.jpeg ADDED Viewed

assets/40_prompt_images/Daenerys Targaryen from game of throne.jpg ADDED Viewed

assets/40_prompt_images/Darth Vader helmet,g highly detailed.jpg ADDED Viewed

assets/40_prompt_images/Dragon armor.jpeg ADDED Viewed

assets/40_prompt_images/Fisherman House, cute, cartoon, blender, stylized.jpg ADDED Viewed

assets/40_prompt_images/Flying Dragon, highly detailed, breathing fire.jpeg ADDED Viewed

assets/40_prompt_images/Handpainted watercolor windmill, hand-painted.jpg ADDED Viewed

assets/40_prompt_images/Katana.jpeg ADDED Viewed

assets/40_prompt_images/Little italian town, hand-painted style.jpg ADDED Viewed

assets/40_prompt_images/Mr Bean Cartoon doing a T Pose.jpg ADDED Viewed

assets/40_prompt_images/Pedestal Fan (White).jpeg ADDED Viewed

assets/40_prompt_images/Pikachu with hat.jpg ADDED Viewed

assets/40_prompt_images/Samurai koala bear.jpg ADDED Viewed

assets/40_prompt_images/TRUMP figure.jpg ADDED Viewed

assets/40_prompt_images/Viking axe, fantasy, weapon, blender, 8k, HD.jpg ADDED Viewed

assets/40_prompt_images/a DSLR photo of a frog wearing a sweater.jpg ADDED Viewed

assets/40_prompt_images/a DSLR photo of a ghost eating a hamburger.jpg ADDED Viewed

assets/40_prompt_images/a DSLR photo of a peacock on a surfboard.jpeg ADDED Viewed

assets/40_prompt_images/a DSLR photo of a squirrel playing guitar.jpg ADDED Viewed

assets/40_prompt_images/a DSLR photo of an eggshell broken in two with an adorable chick standing next to it.jpeg ADDED Viewed

assets/40_prompt_images/an astronaut riding a horse.jpeg ADDED Viewed

assets/40_prompt_images/animal skull pile.jpg ADDED Viewed

assets/40_prompt_images/army Jacket, 3D scan.jpg ADDED Viewed

assets/40_prompt_images/baby yoda in the style of Mormookiee.jpg ADDED Viewed

assets/40_prompt_images/beautiful, intricate butterfly.jpg ADDED Viewed

assets/40_prompt_images/girl riding wolf, cute, cartoon, blender.jpg ADDED Viewed

assets/40_prompt_images/mecha vampire girl chibi.jpg ADDED Viewed

assets/40_prompt_images/military Mech, future, scifi.jpg ADDED Viewed

assets/40_prompt_images/motorcycle, scifi, blender.jpeg ADDED Viewed

assets/40_prompt_images/saber from fate stay night, 3D, girl, anime.jpeg ADDED Viewed

install.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# This Script Assumes Python 3.8.19, CUDA 12.1. Similar package versions might still work but they are not tested.
+conda deactivate
+# Set environment variables
+export ENV_NAME=vfusion3d
+export PYTHON_VERSION=3.8.19
+export CUDA_VERSION=12.1
+# Create a new conda environment and activate it
+conda create -n $ENV_NAME python=$PYTHON_VERSION
+conda activate $ENV_NAME
+conda install pytorch=2.3.0 torchvision==0.18.0 pytorch-cuda=$CUDA_VERSION -c pytorch -c nvidia
+pip install transformers
+pip install imageio[ffmpeg]
+pip install PyMCubes
+pip install trimesh
+pip install rembg[gpu,cli]
+pip install kiui

lrm/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.

lrm/cam_utils.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import numpy as np
+import math
+"""
+R: (N, 3, 3)
+T: (N, 3)
+E: (N, 4, 4)
+vector: (N, 3)
+"""
+def compose_extrinsic_R_T(R: torch.Tensor, T: torch.Tensor):
+    """
+    Compose the standard form extrinsic matrix from R and T.
+    Batched I/O.
+    """
+    RT = torch.cat((R, T.unsqueeze(-1)), dim=-1)
+    return compose_extrinsic_RT(RT)
+def compose_extrinsic_RT(RT: torch.Tensor):
+    """
+    Compose the standard form extrinsic matrix from RT.
+    Batched I/O.
+    """
+    return torch.cat([
+        RT,
+        torch.tensor([[[0, 0, 0, 1]]], dtype=torch.float32).repeat(RT.shape[0], 1, 1).to(RT.device)
+        ], dim=1)
+def decompose_extrinsic_R_T(E: torch.Tensor):
+    """
+    Decompose the standard extrinsic matrix into R and T.
+    Batched I/O.
+    """
+    RT = decompose_extrinsic_RT(E)
+    return RT[:, :, :3], RT[:, :, 3]
+def decompose_extrinsic_RT(E: torch.Tensor):
+    """
+    Decompose the standard extrinsic matrix into RT.
+    Batched I/O.
+    """
+    return E[:, :3, :]
+def get_normalized_camera_intrinsics(intrinsics: torch.Tensor):
+    """
+    intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
+    Return batched fx, fy, cx, cy
+    """
+    fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1]
+    cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1]
+    width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1]
+    fx, fy = fx / width, fy / height
+    cx, cy = cx / width, cy / height
+    return fx, fy, cx, cy
+def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor):
+    """
+    RT: (N, 3, 4)
+    intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
+    """
+    fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
+    return torch.cat([
+        RT.reshape(-1, 12),
+        fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1),
+    ], dim=-1)
+def build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor):
+    """
+    RT: (N, 3, 4)
+    intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
+    """
+    E = compose_extrinsic_RT(RT)
+    fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
+    I = torch.stack([
+        torch.stack([fx, torch.zeros_like(fx), cx], dim=-1),
+        torch.stack([torch.zeros_like(fy), fy, cy], dim=-1),
+        torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1),
+    ], dim=1)
+    return torch.cat([
+        E.reshape(-1, 16),
+        I.reshape(-1, 9),
+    ], dim=-1)
+def center_looking_at_camera_pose(camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None):
+    """
+    camera_position: (M, 3)
+    look_at: (3)
+    up_world: (3)
+    return: (M, 3, 4)
+    """
+    # by default, looking at the origin and world up is pos-z
+    if look_at is None:
+        look_at = torch.tensor([0, 0, 0], dtype=torch.float32)
+    if up_world is None:
+        up_world = torch.tensor([0, 0, 1], dtype=torch.float32)
+    look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1)
+    up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1)
+    z_axis = camera_position - look_at
+    z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True)
+    x_axis = torch.cross(up_world, z_axis)
+    x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True)
+    y_axis = torch.cross(z_axis, x_axis)
+    y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True)
+    extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1)
+    return extrinsics
+def get_surrounding_views(M, radius, elevation):
+#   convert spherical coordinates (radius, azimuth, elevation) to Cartesian coordinates (x, y, z).
+    camera_positions = []
+    rand_theta= np.random.uniform(0, np.pi/180)
+    elevation = math.radians(elevation)
+    for i in range(M):
+        theta = 2 * math.pi * i / M  + rand_theta
+        x = radius * math.cos(theta) * math.cos(elevation)
+        y = radius * math.sin(theta) * math.cos(elevation)
+        z =  radius * math.sin(elevation)
+        camera_positions.append([x, y, z])
+    camera_positions = torch.tensor(camera_positions, dtype=torch.float32)
+    extrinsics = center_looking_at_camera_pose(camera_positions)
+    return extrinsics

lrm/inferrer.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import torch
+import math
+import os
+import imageio
+import mcubes
+import trimesh
+import numpy as np
+import argparse
+from torchvision.utils import save_image
+from PIL import Image
+import glob
+from .models.generator import LRMGenerator  # Make sure this import is correct
+from .cam_utils import build_camera_principle, build_camera_standard, center_looking_at_camera_pose  # Make sure this import is correct
+from functools import partial
+from rembg import remove, new_session
+from kiui.op import recenter
+import kiui
+class LRMInferrer:
+    def __init__(self, model_name: str, resume: str):
+        print("Initializing LRMInferrer")
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        _model_kwargs = {'camera_embed_dim': 1024, 'rendering_samples_per_ray': 128, 'transformer_dim': 1024, 'transformer_layers': 16, 'transformer_heads': 16, 'triplane_low_res': 32, 'triplane_high_res': 64, 'triplane_dim': 80, 'encoder_freeze': False}
+        self.model = self._build_model(_model_kwargs).eval().to(self.device)
+        checkpoint = torch.load(resume, map_location='cpu')
+        state_dict = checkpoint['model_state_dict']
+        self.model.load_state_dict(state_dict)
+        del checkpoint, state_dict
+        torch.cuda.empty_cache()
+    def __enter__(self):
+        print("Entering context")
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        print("Exiting context")
+        if exc_type:
+            print(f"Exception type: {exc_type}")
+            print(f"Exception value: {exc_val}")
+            print(f"Traceback: {exc_tb}")
+    def _build_model(self, model_kwargs):
+        print("Building model")
+        model = LRMGenerator(**model_kwargs).to(self.device)
+        print("Loaded model from checkpoint")
+        return model
+    @staticmethod
+    def get_surrounding_views(M, radius, elevation):
+        camera_positions = []
+        rand_theta = np.random.uniform(0, np.pi/180)
+        elevation = math.radians(elevation)
+        for i in range(M):
+            theta = 2 * math.pi * i / M + rand_theta
+            x = radius * math.cos(theta) * math.cos(elevation)
+            y = radius * math.sin(theta) * math.cos(elevation)
+            z = radius * math.sin(elevation)
+            camera_positions.append([x, y, z])
+        camera_positions = torch.tensor(camera_positions, dtype=torch.float32)
+        extrinsics = center_looking_at_camera_pose(camera_positions)
+        return extrinsics
+    @staticmethod
+    def _default_intrinsics():
+        fx = fy = 384
+        cx = cy = 256
+        w = h = 512
+        intrinsics = torch.tensor([
+            [fx, fy],
+            [cx, cy],
+            [w, h],
+        ], dtype=torch.float32)
+        return intrinsics
+    def _default_source_camera(self, batch_size: int = 1):
+        dist_to_center = 1.5
+        canonical_camera_extrinsics = torch.tensor([[
+            [0, 0, 1, 1],
+            [1, 0, 0, 0],
+            [0, 1, 0, 0],
+        ]], dtype=torch.float32)
+        canonical_camera_intrinsics = self._default_intrinsics().unsqueeze(0)
+        source_camera = build_camera_principle(canonical_camera_extrinsics, canonical_camera_intrinsics)
+        return source_camera.repeat(batch_size, 1)
+    def _default_render_cameras(self, batch_size: int = 1):
+        render_camera_extrinsics = self.get_surrounding_views(160, 1.5, 0)
+        render_camera_intrinsics = self._default_intrinsics().unsqueeze(0).repeat(render_camera_extrinsics.shape[0], 1, 1)
+        render_cameras = build_camera_standard(render_camera_extrinsics, render_camera_intrinsics)
+        return render_cameras.unsqueeze(0).repeat(batch_size, 1, 1)
+    @staticmethod
+    def images_to_video(images, output_path, fps, verbose=False):
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        frames = []
+        for i in range(images.shape[0]):
+            frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
+            assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
+                f"Frame shape mismatch: {frame.shape} vs {images.shape}"
+            assert frame.min() >= 0 and frame.max() <= 255, \
+                f"Frame value out of range: {frame.min()} ~ {frame.max()}"
+            frames.append(frame)
+        imageio.mimwrite(output_path, np.stack(frames), fps=fps)
+        if verbose:
+            print(f"Saved video to {output_path}")
+    def infer_single(self, image: torch.Tensor, render_size: int, mesh_size: int, export_video: bool, export_mesh: bool):
+        print("infer_single called")
+        mesh_thres = 1.0
+        chunk_size = 2
+        batch_size = 1
+        source_camera = self._default_source_camera(batch_size).to(self.device)
+        render_cameras = self._default_render_cameras(batch_size).to(self.device)
+        with torch.no_grad():
+            planes = self.model.forward(image, source_camera)
+            results = {}
+            if export_video:
+                print("Starting export_video")
+                frames = []
+                for i in range(0, render_cameras.shape[1], chunk_size):
+                    print(f"Processing chunk {i} to {i + chunk_size}")
+                    frames.append(
+                        self.model.synthesizer(
+                            planes,
+                            render_cameras[:, i:i+chunk_size],
+                            render_size,
+                            render_size,
+                            0,
+                            0
+                        )
+                    )
+                frames = {
+                    k: torch.cat([r[k] for r in frames], dim=1)
+                    for k in frames[0].keys()
+                }
+                results.update({
+                    'frames': frames,
+                })
+                print("Finished export_video")
+            if export_mesh:
+                print("Starting export_mesh")
+                grid_out = self.model.synthesizer.forward_grid(
+                    planes=planes,
+                    grid_size=mesh_size,
+                )
+                vtx, faces = mcubes.marching_cubes(grid_out['sigma'].float().squeeze(0).squeeze(-1).cpu().numpy(), mesh_thres)
+                vtx = vtx / (mesh_size - 1) * 2 - 1
+                vtx_tensor = torch.tensor(vtx, dtype=torch.float32, device=self.device).unsqueeze(0)
+                vtx_colors = self.model.synthesizer.forward_points(planes, vtx_tensor)['rgb'].float().squeeze(0).cpu().numpy()
+                vtx_colors = (vtx_colors * 255).astype(np.uint8)
+                mesh = trimesh.Trimesh(vertices=vtx, faces=faces, vertex_colors=vtx_colors)
+                results.update({
+                    'mesh': mesh,
+                })
+                print("Finished export_mesh")
+            return results
+    def infer(self, source_image: str, dump_path: str, source_size: int, render_size: int, mesh_size: int, export_video: bool, export_mesh: bool):
+        print("infer called")
+        session = new_session("isnet-general-use")
+        rembg_remove = partial(remove, session=session)
+        image_name = os.path.basename(source_image)
+        uid = image_name.split('.')[0]
+        image = kiui.read_image(source_image, mode='uint8')
+        image = rembg_remove(image)
+        mask = rembg_remove(image, only_mask=True)
+        image = recenter(image, mask, border_ratio=0.20)
+        os.makedirs(dump_path, exist_ok=True)
+        image = torch.tensor(np.array(image)).permute(2, 0, 1).unsqueeze(0) / 255.0
+        if image.shape[1] == 4:
+            image = image[:, :3, ...] * image[:, 3:, ...] + (1 - image[:, 3:, ...])
+        image = torch.nn.functional.interpolate(image, size=(source_size, source_size), mode='bicubic', align_corners=True)
+        image = torch.clamp(image, 0, 1)
+        save_image(image, os.path.join(dump_path, f'{uid}.png'))
+        results = self.infer_single(
+            image.cuda(),
+            render_size=render_size,
+            mesh_size=mesh_size,
+            export_video=export_video,
+            export_mesh=export_mesh,
+        )
+        if 'frames' in results:
+            renderings = results['frames']
+            for k, v in renderings.items():
+                if k == 'images_rgb':
+                    self.images_to_video(
+                        v[0],
+                        os.path.join(dump_path, f'{uid}.mp4'),
+                        fps=40,
+                    )
+                    print(f"Export video success to {dump_path}")
+        if 'mesh' in results:
+            mesh = results['mesh']
+            mesh.export(os.path.join(dump_path, f'{uid}.obj'), 'obj')
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_name', type=str, default='lrm-base-obj-v1')
+    parser.add_argument('--source_path', type=str, default='./assets/cat.png')
+    parser.add_argument('--dump_path', type=str, default='./results/single_image')
+    parser.add_argument('--source_size', type=int, default=512)
+    parser.add_argument('--render_size', type=int, default=384)
+    parser.add_argument('--mesh_size', type=int, default=512)
+    parser.add_argument('--export_video', action='store_true')
+    parser.add_argument('--export_mesh', action='store_true')
+    parser.add_argument('--resume', type=str, required=True, help='Path to a checkpoint to resume training from')
+    args = parser.parse_args()
+    with LRMInferrer(model_name=args.model_name, resume=args.resume) as inferrer:
+        with torch.autocast(device_type="cuda", cache_enabled=False, dtype=torch.float32):
+            print("Start inference for image:", args.source_path)
+            inferrer.infer(
+                source_image=args.source_path,
+                dump_path=args.dump_path,
+                source_size=args.source_size,
+                render_size=args.render_size,
+                mesh_size=args.mesh_size,
+                export_video=args.export_video,
+                export_mesh=args.export_mesh,
+            )
+            print("Finished inference for image:", args.source_path)

lrm/models/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.