lsnu commited on Apr 3

Commit

912c7e2

verified ·

1 Parent(s): e104169

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +11 -0
third_party/PointFlowMatch/.gitignore +10 -0
third_party/PointFlowMatch/LICENSE +674 -0
third_party/PointFlowMatch/README.md +79 -0
third_party/PointFlowMatch/conf/collect_demos_train.yaml +12 -0
third_party/PointFlowMatch/conf/collect_demos_valid.yaml +12 -0
third_party/PointFlowMatch/conf/eval.yaml +20 -0
third_party/PointFlowMatch/conf/model/flow_so3delta.yaml +29 -0
third_party/PointFlowMatch/conf/model/flow_target.yaml +29 -0
third_party/PointFlowMatch/conf/train.yaml +53 -0
third_party/PointFlowMatch/conf/trainer_eval.yaml +5 -0
third_party/PointFlowMatch/pfp/__init__.py +30 -0
third_party/PointFlowMatch/pfp/__pycache__/__init__.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/backbones/__pycache__/pointnet.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/backbones/mlp_3dp.py +42 -0
third_party/PointFlowMatch/pfp/backbones/pointmlp.py +503 -0
third_party/PointFlowMatch/pfp/backbones/pointnet.py +237 -0
third_party/PointFlowMatch/pfp/backbones/resnet_dp.py +33 -0
third_party/PointFlowMatch/pfp/common/__pycache__/fm_utils.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/common/__pycache__/o3d_utils.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/common/__pycache__/se3_utils.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/common/__pycache__/visualization.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/common/fm_utils.py +17 -0
third_party/PointFlowMatch/pfp/common/o3d_utils.py +37 -0
third_party/PointFlowMatch/pfp/common/se3_utils.py +180 -0
third_party/PointFlowMatch/pfp/common/visualization.py +178 -0
third_party/PointFlowMatch/pfp/data/__pycache__/dataset_pcd.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/data/__pycache__/replay_buffer.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/data/dataset_images.py +61 -0
third_party/PointFlowMatch/pfp/data/dataset_pcd.py +105 -0
third_party/PointFlowMatch/pfp/data/replay_buffer.py +38 -0
third_party/PointFlowMatch/pfp/envs/__pycache__/base_env.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/envs/__pycache__/rlbench_env.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/envs/__pycache__/rlbench_runner.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/envs/base_env.py +23 -0
third_party/PointFlowMatch/pfp/envs/rlbench_env.py +247 -0
third_party/PointFlowMatch/pfp/envs/rlbench_runner.py +46 -0
third_party/PointFlowMatch/pfp/policy/__pycache__/base_policy.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/policy/__pycache__/fm_policy.cpython-310.pyc +0 -0
third_party/PointFlowMatch/pfp/policy/base_policy.py +79 -0
third_party/PointFlowMatch/pfp/policy/ddim_policy.py +237 -0
third_party/PointFlowMatch/pfp/policy/fm_5p_policy.py +290 -0
third_party/PointFlowMatch/pfp/policy/fm_policy.py +298 -0
third_party/PointFlowMatch/pfp/policy/fm_se3_policy.py +270 -0
third_party/PointFlowMatch/pfp/policy/fm_so3_policy.py +341 -0
third_party/PointFlowMatch/pfp/policy/fm_so3delta_policy.py +332 -0
third_party/PointFlowMatch/pfp/policy/fm_target_policy.py +326 -0
third_party/PointFlowMatch/pyproject.toml +46 -0
third_party/PointFlowMatch/sandbox/augmentation.py +62 -0
third_party/PointFlowMatch/sandbox/learning_rate.py +27 -0

.gitattributes CHANGED Viewed

@@ -245,3 +245,14 @@ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/chopping_board.ttm fi
 third_party/AnyBimanual/third_party/RLBench/rlbench/assets/door.ttm filter=lfs diff=lfs merge=lfs -text
 third_party/AnyBimanual/third_party/RLBench/rlbench/assets/carrot.ttm filter=lfs diff=lfs merge=lfs -text
 third_party/AnyBimanual/third_party/RLBench/rlbench/assets/banana.ttm filter=lfs diff=lfs merge=lfs -text

 third_party/AnyBimanual/third_party/RLBench/rlbench/assets/door.ttm filter=lfs diff=lfs merge=lfs -text
 third_party/AnyBimanual/third_party/RLBench/rlbench/assets/carrot.ttm filter=lfs diff=lfs merge=lfs -text
 third_party/AnyBimanual/third_party/RLBench/rlbench/assets/banana.ttm filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/marble1.png filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/wood1.png filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.png filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link7.stl filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link6.stl filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link5.stl filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link4.stl filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link3.stl filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link2.stl filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/hand.stl filter=lfs diff=lfs merge=lfs -text
+third_party/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link1.stl filter=lfs diff=lfs merge=lfs -text

third_party/PointFlowMatch/.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+**/__pycache__/**
+**/outputs/**
+**/multirun/**
+**/wandb/**
+**/ckpt/**
+**/demos/**
+**.html
+**/toy_circle/results/**
+*plot.png
+*.svg

third_party/PointFlowMatch/LICENSE ADDED Viewed

	@@ -0,0 +1,674 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Use with the GNU Affero General Public License.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.

third_party/PointFlowMatch/README.md ADDED Viewed

	@@ -0,0 +1,79 @@

+# PointFlowMatch: Learning Robotic Manipulation Policies from Point Clouds with Conditional Flow Matching
+Repository providing the source code for the paper "Learning Robotic Manipulation Policies from Point Clouds with Conditional Flow Matching", see the [project website](http://pointflowmatch.cs.uni-freiburg.de/). Please cite the paper as follows:
+	@article{chisari2024learning,
+	  title={Learning Robotic Manipulation Policies from Point Clouds with Conditional Flow Matching},
+      shorttile={PointFlowMatch},
+	  author={Chisari, Eugenio and Heppert, Nick and Argus, Max and Welschehold, Tim and Brox, Thomas and Valada, Abhinav},
+	  journal={Conference on Robot Learning (CoRL)},
+	  year={2024}
+	}
+## Installation
+- Add env variables to your `.bashrc`
+```bash
+export COPPELIASIM_ROOT=${HOME}/CoppeliaSim
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$COPPELIASIM_ROOT
+export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT
+```
+- Install dependencies
+```bash
+conda create --name pfp_env python=3.10
+conda activate pfp_env
+bash bash/install_deps.sh
+bash bash/install_rlbench.sh
+# Get diffusion_policy from my branch
+cd ..
+git clone git@github.com:chisarie/diffusion_policy.git && cd diffusion_policy && git checkout develop/eugenio
+pip install -e ../diffusion_policy
+# 3dp install
+cd ..
+git clone git@github.com:YanjieZe/3D-Diffusion-Policy.git && cd 3D-Diffusion-Policy
+cd 3D-Diffusion-Policy && pip install -e . && cd ..
+# If locally (doesnt work on Ubuntu18):
+pip install rerun-sdk==0.15.1
+```
+## Pretrained Weights Download
+Here you can find the pretrained checkpoints of our PointFlowMatch policies for different RLBench environments. Download and unzip them in the `ckpt` folder.
+| unplug charger | close door | open box | open fridge | frame hanger | open oven | books on shelf | shoes out of box |
+| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
+| [1717446544-didactic-woodpecker](http://pointflowmatch.cs.uni-freiburg.de/download/1717446544-didactic-woodpecker.zip) | [1717446607-uppish-grebe](http://pointflowmatch.cs.uni-freiburg.de/download/1717446607-uppish-grebe.zip) | [1717446558-qualified-finch](http://pointflowmatch.cs.uni-freiburg.de/download/1717446558-qualified-finch.zip) | [1717446565-astute-stingray](http://pointflowmatch.cs.uni-freiburg.de/download/1717446565-astute-stingray.zip) | [1717446708-analytic-cuckoo](http://pointflowmatch.cs.uni-freiburg.de/download/1717446708-analytic-cuckoo.zip) | [1717446706-natural-scallop](http://pointflowmatch.cs.uni-freiburg.de/download/1717446706-natural-scallop.zip) | [1717446594-astute-panda](http://pointflowmatch.cs.uni-freiburg.de/download/1717446594-astute-panda.zip) | [1717447341-indigo-quokka](http://pointflowmatch.cs.uni-freiburg.de/download/1717447341-indigo-quokka.zip) |
+## Evaluation
+To reproduce the results from the paper, run:
+```bash
+python scripts/evaluate.py log_wandb=True env_runner.env_config.vis=False policy.ckpt_name=<ckpt_name>
+```
+Where `<ckpt_name>` is the folder name of the selected checkpoint. Each checkpoint will be automatically evaluated on the correct environment.
+## Training
+To train your own policies instead of using the pretrained checkpoints, you first need to collect demonstrations:
+```bash
+bash bash/collect_data.sh
+```
+Then, you can train your own policies:
+```bash
+python scripts/train.py log_wandb=True dataloader.num_workers=8 task_name=<task_name> +experiment=<experiment_name>
+```
+Valid task names are all those supported by RLBench. In this work, we used the following tasks: `unplug_charger`, `close_door`, `open_box`, `open_fridge`, `take_frame_off_hanger`, `open_oven`, `put_books_on_bookshelf`, `take_shoes_out_of_box`.
+Valid experiment names are the following, and they represent the different baselines we tested: `adaflow`, `diffusion_policy`, `dp3`, `pointflowmatch`, `pointflowmatch_images`, `pointflowmatch_ddim`, `pointflowmatch_so3`.

third_party/PointFlowMatch/conf/collect_demos_train.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+mode: train
+seed: 1234
+num_episodes: 100
+save_data: False
+env_config:
+  task_name: take_lid_off_saucepan
+  voxel_size: 0.01
+  n_points: 5500
+  headless: True
+  vis: True

third_party/PointFlowMatch/conf/collect_demos_valid.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+mode: valid
+seed: 5678
+num_episodes: 10
+save_data: False
+env_config:
+  task_name: open_fridge
+  voxel_size: 0.01
+  n_points: 5500
+  headless: True
+  vis: False

third_party/PointFlowMatch/conf/eval.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+seed: 5678
+log_wandb: False
+env_runner:
+  num_episodes: 100
+  max_episode_length: 200
+  verbose: True
+  env_config:
+    voxel_size: 0.01
+    headless: True
+    vis: True
+policy:
+  ckpt_name: 1717446544-didactic-woodpecker
+  ckpt_episode: ep1500  # latest, ep1500, ep1000
+  num_k_infer: 50
+  # Uncomment the following to override settings used during training
+  # flow_schedule: linear  # linear | cosine | exp
+  # exp_scale: 4.0

third_party/PointFlowMatch/conf/model/flow_so3delta.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+  _target_: pfp.policy.fm_so3delta_policy.FMSO3DeltaPolicy
+  x_dim: ${x_dim}
+  y_dim: ${y_dim}
+  n_obs_steps: ${n_obs_steps}
+  n_pred_steps: ${n_pred_steps}
+  num_k_infer: 10
+  norm_pcd_center: [0.4, 0.0, 1.4]
+  augment_data: False
+  loss_type: l2  # l2 | l1
+  flow_schedule: exp  # linear | cosine | exp
+  exp_scale: 4.0
+  obs_encoder: ${backbone}
+  diffusion_net:
+    _target_: diffusion_policy.model.diffusion.conditional_unet1d.ConditionalUnet1D
+    input_dim: ${y_dim}
+    # output_dim: 10
+    global_cond_dim: "${eval: '${x_dim} * ${n_obs_steps}'}"
+    diffusion_step_embed_dim: 256
+    down_dims: [256, 512, 1024]
+    kernel_size: 5
+    n_groups: 8
+    cond_predict_scale: True
+  loss_weights:
+    xyz: 10.0
+    rot6d: 10.0
+    grip: 1.0

third_party/PointFlowMatch/conf/model/flow_target.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+  _target_: pfp.policy.fm_target_policy.FMTargetPolicy
+  x_dim: ${x_dim}
+  y_dim: ${y_dim}
+  n_obs_steps: ${n_obs_steps}
+  n_pred_steps: ${n_pred_steps}
+  num_k_infer: 10
+  time_conditioning: False
+  norm_pcd_center: [0.4, 0.0, 1.4]
+  augment_data: False
+  loss_type: l2  # l2 | l1
+  flow_schedule: exp  # linear | cosine | exp
+  exp_scale: 4.0
+  obs_encoder: ${backbone}
+  diffusion_net:
+    _target_: diffusion_policy.model.diffusion.conditional_unet1d.ConditionalUnet1D
+    input_dim: ${y_dim}
+    global_cond_dim: "${eval: '${x_dim} * ${n_obs_steps}'}"
+    diffusion_step_embed_dim: "${eval: '256 if ${model.time_conditioning} else 0'}"
+    down_dims: [256, 512, 1024]
+    kernel_size: 5
+    n_groups: 8
+    cond_predict_scale: True
+  loss_weights:
+    xyz: 10.0
+    rot6d: 10.0
+    grip: 1.0

third_party/PointFlowMatch/conf/train.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+seed: 1234
+epochs: 1500
+log_wandb: False
+task_name: unplug_charger
+obs_features_dim: 256
+y_dim: 10  # (xyz, rot6d, g)
+x_dim: "${eval: '${obs_features_dim} + ${y_dim}'}"
+n_obs_steps: 2
+n_pred_steps: 32  # Must be divisible by 4
+use_ema: True
+save_each_n_epochs: 500
+obs_mode: pcd  # pcd | rgb
+run_name: null  # set this to continue training from previous ckpt
+# env_runner:
+#   num_episodes: 20
+#   max_episode_length: 200
+#   task_name: ${task_name}
+#   env_config:
+#     seed: 1996
+#     lowdim_obs: False
+dataset:
+  n_obs_steps: ${n_obs_steps}
+  n_pred_steps: ${n_pred_steps}
+  subs_factor: 3
+  use_pc_color: False
+  n_points: 4096
+dataloader:
+  batch_size: 128
+  num_workers: 0
+  # pin_memory: True
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 3.0e-5
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+lr_scheduler:
+  name: cosine  # constant | cosine | linear | ...
+  num_warmup_steps: 5000
+defaults:
+  - model: flow
+  - backbone: pointnet

third_party/PointFlowMatch/conf/trainer_eval.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+seed: 5678
+log_wandb: False
+run_name: 1716560279-subtle-kestrel  # previous ckpt
+model:
+  num_k_infer: 5

third_party/PointFlowMatch/pfp/__init__.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+import random
+import pathlib
+import numpy as np
+from dataclasses import dataclass
+@dataclass
+class DATA_DIRS:
+    ROOT = pathlib.Path(__file__).parents[1] / "demos"
+    PFP = ROOT / "sim"
+    PFP_REAL = ROOT / "real"
+@dataclass
+class REPO_DIRS:
+    ROOT = pathlib.Path(__file__).parents[1]
+    CKPT = ROOT / "ckpt"
+    URDFS = ROOT / "urdfs"
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def set_seeds(seed=0):
+    """Sets all seeds."""
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)

third_party/PointFlowMatch/pfp/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.09 kB). View file

third_party/PointFlowMatch/pfp/backbones/__pycache__/pointnet.cpython-310.pyc ADDED Viewed

Binary file (7.6 kB). View file

third_party/PointFlowMatch/pfp/backbones/mlp_3dp.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+import torch.nn as nn
+from diffusion_policy_3d.model.vision.pointnet_extractor import (
+    PointNetEncoderXYZRGB,
+    PointNetEncoderXYZ,
+)
+class MLP3DP(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int):
+        super().__init__()
+        if in_channels == 3:
+            self.backbone = PointNetEncoderXYZ(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                use_layernorm=True,
+                final_norm="layernorm",
+                normal_channel=False,
+            )
+        elif in_channels == 6:
+            self.backbone = PointNetEncoderXYZRGB(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                use_layernorm=True,
+                final_norm="layernorm",
+                normal_channel=False,
+            )
+        else:
+            raise ValueError("Invalid number of input channels for MLP3DP")
+        return
+    def forward(self, pcd: torch.Tensor, robot_state_obs: torch.Tensor = None) -> torch.Tensor:
+        B = pcd.shape[0]
+        # Flatten the batch and time dimensions
+        pcd = pcd.float().reshape(-1, *pcd.shape[2:])
+        robot_state_obs = robot_state_obs.float().reshape(-1, *robot_state_obs.shape[2:])
+        # Encode all point clouds (across time steps and batch size)
+        encoded_pcd = self.backbone(pcd)
+        nx = torch.cat([encoded_pcd, robot_state_obs], dim=1)
+        # Reshape back to the batch dimension. Now the features of each time step are concatenated
+        nx = nx.reshape(B, -1)
+        return nx

third_party/PointFlowMatch/pfp/backbones/pointmlp.py ADDED Viewed

	@@ -0,0 +1,503 @@

+""" Adapted from https://github.com/ma-xu/pointMLP-pytorch/blob/main/classification_ScanObjectNN/models/pointmlp.py """
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from pytorch3d.ops import sample_farthest_points, knn_points
+def get_activation(activation):
+    if activation.lower() == "gelu":
+        return nn.GELU()
+    elif activation.lower() == "rrelu":
+        return nn.RReLU(inplace=True)
+    elif activation.lower() == "selu":
+        return nn.SELU(inplace=True)
+    elif activation.lower() == "silu":
+        return nn.SiLU(inplace=True)
+    elif activation.lower() == "hardswish":
+        return nn.Hardswish(inplace=True)
+    elif activation.lower() == "leakyrelu":
+        return nn.LeakyReLU(inplace=True)
+    else:
+        return nn.ReLU(inplace=True)
+def square_distance(src, dst):
+    """
+    Calculate Euclid distance between each two points.
+    src^T * dst = xn * xm + yn * ym + zn * zm；
+    sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn;
+    sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm;
+    dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2
+         = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst
+    Input:
+        src: source points, [B, N, C]
+        dst: target points, [B, M, C]
+    Output:
+        dist: per-point square distance, [B, N, M]
+    """
+    B, N, _ = src.shape
+    _, M, _ = dst.shape
+    dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
+    dist += torch.sum(src**2, -1).view(B, N, 1)
+    dist += torch.sum(dst**2, -1).view(B, 1, M)
+    return dist
+def index_points(points, idx):
+    """
+    Input:
+        points: input points data, [B, N, C]
+        idx: sample index data, [B, S]
+    Return:
+        new_points:, indexed points data, [B, S, C]
+    """
+    device = points.device
+    B = points.shape[0]
+    view_shape = list(idx.shape)
+    view_shape[1:] = [1] * (len(view_shape) - 1)
+    repeat_shape = list(idx.shape)
+    repeat_shape[0] = 1
+    batch_indices = (
+        torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
+    )
+    new_points = points[batch_indices, idx, :]
+    return new_points
+def farthest_point_sample(xyz, npoint):
+    """
+    Input:
+        xyz: pointcloud data, [B, N, 3]
+        npoint: number of samples
+    Return:
+        centroids: sampled pointcloud index, [B, npoint]
+    """
+    device = xyz.device
+    B, N, C = xyz.shape
+    centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
+    distance = torch.ones(B, N).to(device) * 1e10
+    farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
+    batch_indices = torch.arange(B, dtype=torch.long).to(device)
+    for i in range(npoint):
+        centroids[:, i] = farthest
+        centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
+        dist = torch.sum((xyz - centroid) ** 2, -1)
+        distance = torch.min(distance, dist)
+        farthest = torch.max(distance, -1)[1]
+    return centroids
+def query_ball_point(radius, nsample, xyz, new_xyz):
+    """
+    Input:
+        radius: local region radius
+        nsample: max sample number in local region
+        xyz: all points, [B, N, 3]
+        new_xyz: query points, [B, S, 3]
+    Return:
+        group_idx: grouped points index, [B, S, nsample]
+    """
+    device = xyz.device
+    B, N, C = xyz.shape
+    _, S, _ = new_xyz.shape
+    group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
+    sqrdists = square_distance(new_xyz, xyz)
+    group_idx[sqrdists > radius**2] = N
+    group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
+    group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
+    mask = group_idx == N
+    group_idx[mask] = group_first[mask]
+    return group_idx
+def knn_point(nsample, xyz, new_xyz):
+    """
+    Input:
+        nsample: max sample number in local region
+        xyz: all points, [B, N, C]
+        new_xyz: query points, [B, S, C]
+    Return:
+        group_idx: grouped points index, [B, S, nsample]
+    """
+    sqrdists = square_distance(new_xyz, xyz)
+    _, group_idx = torch.topk(sqrdists, nsample, dim=-1, largest=False, sorted=False)
+    return group_idx
+class LocalGrouper(nn.Module):
+    def __init__(self, channel, groups, kneighbors, use_xyz=True, normalize="center", **kwargs):
+        """
+        Give xyz[b,p,3] and fea[b,p,d], return new_xyz[b,g,3] and new_fea[b,g,k,d]
+        :param groups: groups number
+        :param kneighbors: k-nerighbors
+        :param kwargs: others
+        """
+        super(LocalGrouper, self).__init__()
+        self.groups = groups
+        self.kneighbors = kneighbors
+        self.use_xyz = use_xyz
+        if normalize is not None:
+            self.normalize = normalize.lower()
+        else:
+            self.normalize = None
+        if self.normalize not in ["center", "anchor"]:
+            print(
+                "Unrecognized normalize parameter (self.normalize), set to None. Should be one of [center, anchor]."
+            )
+            self.normalize = None
+        if self.normalize is not None:
+            add_channel = 3 if self.use_xyz else 0
+            self.affine_alpha = nn.Parameter(torch.ones([1, 1, 1, channel + add_channel]))
+            self.affine_beta = nn.Parameter(torch.zeros([1, 1, 1, channel + add_channel]))
+    def forward(self, xyz, points):
+        B, N, C = xyz.shape
+        S = self.groups
+        xyz = xyz.contiguous()  # xyz [btach, points, xyz]
+        # fps_idx = torch.multinomial(torch.linspace(0, N - 1, steps=N).repeat(B, 1).to(xyz.device), num_samples=self.groups, replacement=False).long()
+        # fps_idx = farthest_point_sample(xyz, self.groups).long()
+        # fps_idx = pointnet2_utils.furthest_point_sample(xyz, self.groups).long()  # [B, npoint]
+        new_xyz, fps_idx = sample_farthest_points(xyz, K=self.groups)
+        # new_xyz = index_points(xyz, fps_idx)  # [B, npoint, 3]
+        new_points = index_points(points, fps_idx)  # [B, npoint, d]
+        # idx = knn_point(self.kneighbors, xyz, new_xyz)
+        _, idx, _ = knn_points(new_xyz, xyz, K=self.kneighbors, return_nn=False)
+        # idx = query_ball_point(radius, nsample, xyz, new_xyz)
+        grouped_points = index_points(points, idx)  # [B, npoint, k, d]
+        if self.use_xyz:
+            grouped_xyz = index_points(xyz, idx)  # [B, npoint, k, 3]
+            grouped_points = torch.cat([grouped_points, grouped_xyz], dim=-1)  # [B, npoint, k, d+3]
+        if self.normalize is not None:
+            if self.normalize == "center":
+                mean = torch.mean(grouped_points, dim=2, keepdim=True)
+            if self.normalize == "anchor":
+                mean = torch.cat([new_points, new_xyz], dim=-1) if self.use_xyz else new_points
+                mean = mean.unsqueeze(dim=-2)  # [B, npoint, 1, d+3]
+            std = (
+                torch.std((grouped_points - mean).reshape(B, -1), dim=-1, keepdim=True)
+                .unsqueeze(dim=-1)
+                .unsqueeze(dim=-1)
+            )
+            grouped_points = (grouped_points - mean) / (std + 1e-5)
+            grouped_points = self.affine_alpha * grouped_points + self.affine_beta
+        new_points = torch.cat(
+            [grouped_points, new_points.view(B, S, 1, -1).repeat(1, 1, self.kneighbors, 1)], dim=-1
+        )
+        return new_xyz, new_points
+class ConvBNReLU1D(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1, bias=True, activation="relu"):
+        super(ConvBNReLU1D, self).__init__()
+        self.act = get_activation(activation)
+        self.net = nn.Sequential(
+            nn.Conv1d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                bias=bias,
+            ),
+            nn.BatchNorm1d(out_channels),
+            self.act,
+        )
+    def forward(self, x):
+        return self.net(x)
+class ConvBNReLURes1D(nn.Module):
+    def __init__(
+        self, channel, kernel_size=1, groups=1, res_expansion=1.0, bias=True, activation="relu"
+    ):
+        super(ConvBNReLURes1D, self).__init__()
+        self.act = get_activation(activation)
+        self.net1 = nn.Sequential(
+            nn.Conv1d(
+                in_channels=channel,
+                out_channels=int(channel * res_expansion),
+                kernel_size=kernel_size,
+                groups=groups,
+                bias=bias,
+            ),
+            nn.BatchNorm1d(int(channel * res_expansion)),
+            self.act,
+        )
+        if groups > 1:
+            self.net2 = nn.Sequential(
+                nn.Conv1d(
+                    in_channels=int(channel * res_expansion),
+                    out_channels=channel,
+                    kernel_size=kernel_size,
+                    groups=groups,
+                    bias=bias,
+                ),
+                nn.BatchNorm1d(channel),
+                self.act,
+                nn.Conv1d(
+                    in_channels=channel, out_channels=channel, kernel_size=kernel_size, bias=bias
+                ),
+                nn.BatchNorm1d(channel),
+            )
+        else:
+            self.net2 = nn.Sequential(
+                nn.Conv1d(
+                    in_channels=int(channel * res_expansion),
+                    out_channels=channel,
+                    kernel_size=kernel_size,
+                    bias=bias,
+                ),
+                nn.BatchNorm1d(channel),
+            )
+    def forward(self, x):
+        return self.act(self.net2(self.net1(x)) + x)
+class PreExtraction(nn.Module):
+    def __init__(
+        self,
+        channels,
+        out_channels,
+        blocks=1,
+        groups=1,
+        res_expansion=1,
+        bias=True,
+        activation="relu",
+        use_xyz=True,
+    ):
+        """
+        input: [b,g,k,d]: output:[b,d,g]
+        :param channels:
+        :param blocks:
+        """
+        super(PreExtraction, self).__init__()
+        in_channels = 3 + 2 * channels if use_xyz else 2 * channels
+        self.transfer = ConvBNReLU1D(in_channels, out_channels, bias=bias, activation=activation)
+        operation = []
+        for _ in range(blocks):
+            operation.append(
+                ConvBNReLURes1D(
+                    out_channels,
+                    groups=groups,
+                    res_expansion=res_expansion,
+                    bias=bias,
+                    activation=activation,
+                )
+            )
+        self.operation = nn.Sequential(*operation)
+    def forward(self, x):
+        b, n, s, d = x.size()  # torch.Size([32, 512, 32, 6])
+        x = x.permute(0, 1, 3, 2)
+        x = x.reshape(-1, d, s)
+        x = self.transfer(x)
+        batch_size, _, _ = x.size()
+        x = self.operation(x)  # [b, d, k]
+        x = F.adaptive_max_pool1d(x, 1).view(batch_size, -1)
+        x = x.reshape(b, n, -1).permute(0, 2, 1)
+        return x
+class PosExtraction(nn.Module):
+    def __init__(self, channels, blocks=1, groups=1, res_expansion=1, bias=True, activation="relu"):
+        """
+        input[b,d,g]; output[b,d,g]
+        :param channels:
+        :param blocks:
+        """
+        super(PosExtraction, self).__init__()
+        operation = []
+        for _ in range(blocks):
+            operation.append(
+                ConvBNReLURes1D(
+                    channels,
+                    groups=groups,
+                    res_expansion=res_expansion,
+                    bias=bias,
+                    activation=activation,
+                )
+            )
+        self.operation = nn.Sequential(*operation)
+    def forward(self, x):  # [b, d, g]
+        return self.operation(x)
+class Model(nn.Module):
+    def __init__(
+        self,
+        points=1024,
+        input_channels=3,
+        embed_dim=64,
+        groups=1,
+        res_expansion=1.0,
+        activation="relu",
+        bias=True,
+        use_xyz=True,
+        normalize="center",
+        dim_expansion=[2, 2, 2, 2],
+        pre_blocks=[2, 2, 2, 2],
+        pos_blocks=[2, 2, 2, 2],
+        k_neighbors=[32, 32, 32, 32],
+        reducers=[2, 2, 2, 2],
+        **kwargs,
+    ):
+        super(Model, self).__init__()
+        self.stages = len(pre_blocks)
+        self.points = points
+        self.embedding = ConvBNReLU1D(input_channels, embed_dim, bias=bias, activation=activation)
+        assert (
+            len(pre_blocks)
+            == len(k_neighbors)
+            == len(reducers)
+            == len(pos_blocks)
+            == len(dim_expansion)
+        ), "Please check stage number consistent for pre_blocks, pos_blocks k_neighbors, reducers."
+        self.local_grouper_list = nn.ModuleList()
+        self.pre_blocks_list = nn.ModuleList()
+        self.pos_blocks_list = nn.ModuleList()
+        last_channel = embed_dim
+        anchor_points = self.points
+        for i in range(len(pre_blocks)):
+            out_channel = last_channel * dim_expansion[i]
+            pre_block_num = pre_blocks[i]
+            pos_block_num = pos_blocks[i]
+            kneighbor = k_neighbors[i]
+            reduce = reducers[i]
+            anchor_points = anchor_points // reduce
+            # append local_grouper_list
+            local_grouper = LocalGrouper(
+                last_channel, anchor_points, kneighbor, use_xyz, normalize
+            )  # [b,g,k,d]
+            self.local_grouper_list.append(local_grouper)
+            # append pre_block_list
+            pre_block_module = PreExtraction(
+                last_channel,
+                out_channel,
+                pre_block_num,
+                groups=groups,
+                res_expansion=res_expansion,
+                bias=bias,
+                activation=activation,
+                use_xyz=use_xyz,
+            )
+            self.pre_blocks_list.append(pre_block_module)
+            # append pos_block_list
+            pos_block_module = PosExtraction(
+                out_channel,
+                pos_block_num,
+                groups=groups,
+                res_expansion=res_expansion,
+                bias=bias,
+                activation=activation,
+            )
+            self.pos_blocks_list.append(pos_block_module)
+            last_channel = out_channel
+        self.act = get_activation(activation)
+        return
+    def forward(self, x):
+        xyz = x.permute(0, 2, 1)
+        batch_size, _, _ = x.size()
+        x = self.embedding(x)  # B,D,N
+        for i in range(self.stages):
+            # Give xyz[b, p, 3] and fea[b, p, d], return new_xyz[b, g, 3] and new_fea[b, g, k, d]
+            xyz, x = self.local_grouper_list[i](xyz, x.permute(0, 2, 1))  # [b,g,3]  [b,g,k,d]
+            x = self.pre_blocks_list[i](x)  # [b,d,g]
+            x = self.pos_blocks_list[i](x)  # [b,d,g]
+        x = F.adaptive_max_pool1d(x, 1).squeeze(dim=-1)
+        return x
+class PointMLP(Model):
+    def __init__(self, points: int, input_channels: int, embed_dim: int, **kwargs):
+        super().__init__()
+        assert input_channels == 3 or input_channels == 6, "Input channels must be 3 or 6"
+        self.backbone = Model(
+            points=points,
+            input_channels=input_channels,
+            embed_dim=embed_dim // 16,
+            groups=1,
+            res_expansion=1.0,
+            activation="relu",
+            bias=False,
+            use_xyz=False,
+            normalize="anchor",
+            dim_expansion=[2, 2, 2, 2],
+            pre_blocks=[2, 2, 2, 2],
+            pos_blocks=[2, 2, 2, 2],
+            k_neighbors=[24, 24, 24, 24],
+            reducers=[2, 2, 2, 2],
+            **kwargs,
+        )
+        return
+    def forward(self, pcd: torch.Tensor, robot_state_obs: torch.Tensor = None) -> torch.Tensor:
+        B = pcd.shape[0]
+        # Flatten the batch and time dimensions
+        pcd = pcd.float().reshape(-1, *pcd.shape[2:])
+        robot_state_obs = robot_state_obs.float().reshape(-1, *robot_state_obs.shape[2:])
+        # Permute [B, P, 3] -> [B, 3, P]
+        pcd = pcd.permute(0, 2, 1)
+        # Encode all point clouds (across time steps and batch size)
+        encoded_pcd = self.backbone(pcd)
+        nx = torch.cat([encoded_pcd, robot_state_obs], dim=1)
+        # Reshape back to the batch dimension. Now the features of each time step are concatenated
+        nx = nx.reshape(B, -1)
+        return nx
+class PointMLPElite(nn.Module):
+    def __init__(self, points: int, input_channels: int, embed_dim: int, **kwargs):
+        super().__init__()
+        assert input_channels == 3 or input_channels == 6, "Input channels must be 3 or 6"
+        self.backbone = Model(
+            points=points,
+            input_channels=input_channels,
+            embed_dim=embed_dim // 16,
+            groups=1,
+            res_expansion=0.25,
+            activation="relu",
+            bias=False,
+            use_xyz=False,
+            normalize="anchor",
+            dim_expansion=[2, 2, 2, 1],
+            pre_blocks=[1, 1, 2, 1],
+            pos_blocks=[1, 1, 2, 1],
+            k_neighbors=[24, 24, 24, 24],
+            reducers=[2, 2, 2, 2],
+            **kwargs,
+        )
+        return
+    def forward(self, pcd: torch.Tensor, robot_state_obs: torch.Tensor = None) -> torch.Tensor:
+        B = pcd.shape[0]
+        # Flatten the batch and time dimensions
+        pcd = pcd.float().reshape(-1, *pcd.shape[2:])
+        robot_state_obs = robot_state_obs.float().reshape(-1, *robot_state_obs.shape[2:])
+        # Permute [B, P, 3] -> [B, 3, P]
+        pcd = pcd.permute(0, 2, 1)
+        # Encode all point clouds (across time steps and batch size)
+        encoded_pcd = self.backbone(pcd)
+        nx = torch.cat([encoded_pcd, robot_state_obs], dim=1)
+        # Reshape back to the batch dimension. Now the features of each time step are concatenated
+        nx = nx.reshape(B, -1)
+        return nx
+if __name__ == "__main__":
+    num_points = 1024
+    embed_dim = 512
+    data = torch.rand(2, 3, num_points)
+    print("===> testing pointMLP ...")
+    model = PointMLP(num_points, embed_dim)
+    out = model.backbone(data)
+    print(out.shape)

third_party/PointFlowMatch/pfp/backbones/pointnet.py ADDED Viewed

	@@ -0,0 +1,237 @@

+""" Adapted from https://github.com/dyson-ai/hdp/blob/main/rk_diffuser/models/pointnet.py """
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.parallel
+import torch.utils.data
+from torch.autograd import Variable
+from diffusion_policy.common.pytorch_util import replace_submodules
+class STN3d(nn.Module):
+    def __init__(self):
+        super(STN3d, self).__init__()
+        self.conv1 = torch.nn.Conv1d(3, 64, 1)
+        self.conv2 = torch.nn.Conv1d(64, 128, 1)
+        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
+        self.fc1 = nn.Linear(1024, 512)
+        self.fc2 = nn.Linear(512, 256)
+        self.fc3 = nn.Linear(256, 9)
+        self.relu = nn.ReLU()
+        self.bn1 = nn.BatchNorm1d(64)
+        self.bn2 = nn.BatchNorm1d(128)
+        self.bn3 = nn.BatchNorm1d(1024)
+        # self.bn4 = nn.BatchNorm1d(512)
+        # self.bn5 = nn.BatchNorm1d(256)
+        self.bn4 = nn.LayerNorm(512)
+        self.bn5 = nn.LayerNorm(256)
+    def forward(self, x):
+        batchsize = x.size()[0]
+        x = F.relu(self.bn1(self.conv1(x)))
+        x = F.relu(self.bn2(self.conv2(x)))
+        x = F.relu(self.bn3(self.conv3(x)))
+        x = torch.max(x, 2, keepdim=True)[0]
+        x = x.view(-1, 1024)
+        x = F.relu(self.bn4(self.fc1(x)))
+        x = F.relu(self.bn5(self.fc2(x)))
+        x = self.fc3(x)
+        iden = (
+            Variable(torch.from_numpy(np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]).astype(np.float32)))
+            .view(1, 9)
+            .repeat(batchsize, 1)
+        )
+        if x.is_cuda:
+            iden = iden.cuda()
+        x = x + iden
+        x = x.view(-1, 3, 3)
+        return x
+class STNkd(nn.Module):
+    def __init__(self, k=64):
+        super(STNkd, self).__init__()
+        self.conv1 = torch.nn.Conv1d(k, 64, 1)
+        self.conv2 = torch.nn.Conv1d(64, 128, 1)
+        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
+        self.fc1 = nn.Linear(1024, 512)
+        self.fc2 = nn.Linear(512, 256)
+        self.fc3 = nn.Linear(256, k * k)
+        self.relu = nn.ReLU()
+        self.bn1 = nn.BatchNorm1d(64)
+        self.bn2 = nn.BatchNorm1d(128)
+        self.bn3 = nn.BatchNorm1d(1024)
+        # self.bn4 = nn.BatchNorm1d(512)
+        # self.bn5 = nn.BatchNorm1d(256)
+        self.bn4 = nn.LayerNorm(512)
+        self.bn5 = nn.LayerNorm(256)
+        self.k = k
+    def forward(self, x):
+        batchsize = x.size()[0]
+        x = F.relu(self.bn1(self.conv1(x)))
+        x = F.relu(self.bn2(self.conv2(x)))
+        x = F.relu(self.bn3(self.conv3(x)))
+        x = torch.max(x, 2, keepdim=True)[0]
+        x = x.view(-1, 1024)
+        x = F.relu(self.bn4(self.fc1(x)))
+        x = F.relu(self.bn5(self.fc2(x)))
+        x = self.fc3(x)
+        iden = (
+            Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32)))
+            .view(1, self.k * self.k)
+            .repeat(batchsize, 1)
+        )
+        if x.is_cuda:
+            iden = iden.cuda()
+        x = x + iden
+        x = x.view(-1, self.k, self.k)
+        return x
+class PointNetfeat(nn.Module):
+    def __init__(self, input_channels: int, input_transform: bool, feature_transform=False):
+        super(PointNetfeat, self).__init__()
+        self.input_transform = input_transform
+        if self.input_transform:
+            self.stn = STNkd(k=input_channels)
+        self.conv1 = torch.nn.Conv1d(input_channels, 64, 1)
+        self.conv2 = torch.nn.Conv1d(64, 128, 1)
+        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
+        self.bn1 = nn.BatchNorm1d(64)
+        self.bn2 = nn.BatchNorm1d(128)
+        self.bn3 = nn.BatchNorm1d(1024)
+        self.feature_transform = feature_transform
+        if self.feature_transform:
+            self.fstn = STNkd(k=64)
+    def forward(self, x):
+        b = x.size(0)
+        if len(x.shape) == 4:
+            x = x.view(b, -1, 3).permute(0, 2, 1).contiguous()
+        if self.input_transform:
+            trans = self.stn(x)
+            x = x.transpose(2, 1)
+            x = torch.bmm(x, trans)
+            x = x.transpose(2, 1)
+        else:
+            trans = None
+        x = F.relu(self.bn1(self.conv1(x)))
+        if self.feature_transform:
+            trans_feat = self.fstn(x)
+            x = x.transpose(2, 1)
+            x = torch.bmm(x, trans_feat)
+            x = x.transpose(2, 1)
+        else:
+            trans_feat = None
+        x = F.relu(self.bn2(self.conv2(x)))
+        x = self.bn3(self.conv3(x))
+        x = torch.max(x, 2, keepdim=True)[0]
+        x = x.view(-1, 1024)
+        return x
+class PointNetCls(nn.Module):
+    def __init__(self, k=2, feature_transform=False):
+        super(PointNetCls, self).__init__()
+        self.feature_transform = feature_transform
+        self.feat = PointNetfeat(global_feat=True, feature_transform=feature_transform)
+        self.fc1 = nn.Linear(1024, 512)
+        self.fc2 = nn.Linear(512, 256)
+        self.fc3 = nn.Linear(256, k)
+        self.dropout = nn.Dropout(p=0.3)
+        self.bn1 = nn.BatchNorm1d(512)
+        self.bn2 = nn.BatchNorm1d(256)
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        x, trans, trans_feat = self.feat(x)
+        x = F.relu(self.bn1(self.fc1(x)))
+        x = F.relu(self.bn2(self.dropout(self.fc2(x))))
+        x = self.fc3(x)
+        return F.log_softmax(x, dim=1), trans, trans_feat
+class PointNetDenseCls(nn.Module):
+    def __init__(self, k=2, feature_transform=False):
+        super(PointNetDenseCls, self).__init__()
+        self.k = k
+        self.feature_transform = feature_transform
+        self.feat = PointNetfeat(global_feat=False, feature_transform=feature_transform)
+        self.conv1 = torch.nn.Conv1d(1088, 512, 1)
+        self.conv2 = torch.nn.Conv1d(512, 256, 1)
+        self.conv3 = torch.nn.Conv1d(256, 128, 1)
+        self.conv4 = torch.nn.Conv1d(128, self.k, 1)
+        self.bn1 = nn.BatchNorm1d(512)
+        self.bn2 = nn.BatchNorm1d(256)
+        self.bn3 = nn.BatchNorm1d(128)
+    def forward(self, x):
+        batchsize = x.size()[0]
+        n_pts = x.size()[2]
+        x, trans, trans_feat = self.feat(x)
+        x = F.relu(self.bn1(self.conv1(x)))
+        x = F.relu(self.bn2(self.conv2(x)))
+        x = F.relu(self.bn3(self.conv3(x)))
+        x = self.conv4(x)
+        x = x.transpose(2, 1).contiguous()
+        x = F.log_softmax(x.view(-1, self.k), dim=-1)
+        x = x.view(batchsize, n_pts, self.k)
+        return x, trans, trans_feat
+class PointNetBackbone(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        input_channels: int,
+        input_transform: bool,
+        use_group_norm: bool = False,
+    ):
+        super().__init__()
+        assert input_channels in [3, 6], "Input channels must be 3 or 6"
+        self.backbone = nn.Sequential(
+            PointNetfeat(input_channels, input_transform),
+            nn.Mish(),
+            nn.Linear(1024, 512),
+            nn.Mish(),
+            nn.Linear(512, embed_dim),
+        )
+        if use_group_norm:
+            self.backbone = replace_submodules(
+                root_module=self.backbone,
+                predicate=lambda x: isinstance(x, nn.BatchNorm1d),
+                func=lambda x: nn.GroupNorm(
+                    num_groups=x.num_features // 16, num_channels=x.num_features
+                ),
+            )
+        return
+    def forward(self, pcd: torch.Tensor, robot_state_obs: torch.Tensor = None) -> torch.Tensor:
+        B = pcd.shape[0]
+        # Flatten the batch and time dimensions
+        pcd = pcd.float().reshape(-1, *pcd.shape[2:])
+        robot_state_obs = robot_state_obs.float().reshape(-1, *robot_state_obs.shape[2:])
+        # Permute [B, P, C] -> [B, C, P]
+        pcd = pcd.permute(0, 2, 1)
+        # Encode all point clouds (across time steps and batch size)
+        encoded_pcd = self.backbone(pcd)
+        nx = torch.cat([encoded_pcd, robot_state_obs], dim=1)
+        # Reshape back to the batch dimension. Now the features of each time step are concatenated
+        nx = nx.reshape(B, -1)
+        return nx

third_party/PointFlowMatch/pfp/backbones/resnet_dp.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import torch
+import torch.nn as nn
+from diffusion_policy.model.vision.model_getter import get_resnet
+from diffusion_policy.model.vision.multi_image_obs_encoder import MultiImageObsEncoder
+class ResnetDP(nn.Module):
+    def __init__(self, shape_meta: dict):
+        super().__init__()
+        rgb_model = get_resnet(name="resnet18")
+        self.backbone = MultiImageObsEncoder(
+            shape_meta=shape_meta,
+            rgb_model=rgb_model,
+            crop_shape=(76, 76),
+            random_crop=True,
+            use_group_norm=True,
+            share_rgb_model=False,
+            imagenet_norm=True,
+        )
+        return
+    def forward(self, images: torch.Tensor, robot_state_obs: torch.Tensor = None) -> torch.Tensor:
+        B = images.shape[0]
+        # Flatten the batch and time dimensions
+        images = images.reshape(-1, *images.shape[2:]).permute(0, 1, 4, 2, 3)
+        robot_state_obs = robot_state_obs.float().reshape(-1, *robot_state_obs.shape[2:])
+        # Encode all observations (across time steps and batch size)
+        obs_dict = {f"img_{i}": images[:, i] for i in range(images.shape[1])}
+        obs_dict["robot_state"] = robot_state_obs
+        nx = self.backbone(obs_dict)
+        # Reshape back to the batch dimension. Now the features of each time step are concatenated
+        nx = nx.reshape(B, -1)
+        return nx

third_party/PointFlowMatch/pfp/common/__pycache__/fm_utils.cpython-310.pyc ADDED Viewed

Binary file (724 Bytes). View file

third_party/PointFlowMatch/pfp/common/__pycache__/o3d_utils.cpython-310.pyc ADDED Viewed

Binary file (1.46 kB). View file

third_party/PointFlowMatch/pfp/common/__pycache__/se3_utils.cpython-310.pyc ADDED Viewed

Binary file (6.09 kB). View file

third_party/PointFlowMatch/pfp/common/__pycache__/visualization.cpython-310.pyc ADDED Viewed

Binary file (7.11 kB). View file

third_party/PointFlowMatch/pfp/common/fm_utils.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import torch
+def get_timesteps(schedule: str, k_steps: int, exp_scale: float = 1.0):
+    t = torch.linspace(0, 1, k_steps + 1)[:-1]
+    if schedule == "linear":
+        dt = torch.ones(k_steps) / k_steps
+    elif schedule == "cosine":
+        dt = torch.cos(t * torch.pi) + 1
+        dt /= torch.sum(dt)
+    elif schedule == "exp":
+        dt = torch.exp(-t * exp_scale)
+        dt /= torch.sum(dt)
+    else:
+        raise ValueError(f"Invalid schedule: {schedule}")
+    t0 = torch.cat((torch.zeros(1), torch.cumsum(dt, dim=0)[:-1]))
+    return t0, dt

third_party/PointFlowMatch/pfp/common/o3d_utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from __future__ import annotations
+import functools
+import numpy as np
+import open3d as o3d
+def make_pcd(
+    xyz: np.ndarray,
+    rgb: np.ndarray,
+) -> o3d.geometry.PointCloud:
+    points = o3d.utility.Vector3dVector(xyz.reshape(-1, 3))
+    colors = o3d.utility.Vector3dVector(rgb.reshape(-1, 3).astype(np.float64) / 255)
+    pcd = o3d.geometry.PointCloud(points)
+    pcd.colors = colors
+    return pcd
+def merge_pcds(
+    voxel_size: float,
+    n_points: int,
+    pcds: list[o3d.geometry.PointCloud],
+    ws_aabb: o3d.geometry.AxisAlignedBoundingBox,
+) -> o3d.geometry.PointCloud:
+    merged_pcd = functools.reduce(lambda a, b: a + b, pcds, o3d.geometry.PointCloud())
+    merged_pcd = merged_pcd.crop(ws_aabb)
+    downsampled_pcd = merged_pcd.voxel_down_sample(voxel_size=voxel_size)
+    if len(downsampled_pcd.points) > n_points:
+        ratio = n_points / len(downsampled_pcd.points)
+        downsampled_pcd = downsampled_pcd.random_down_sample(ratio)
+    if len(downsampled_pcd.points) < n_points:
+        # Append zeros to make the point cloud have the desired number of points
+        num_missing_points = n_points - len(downsampled_pcd.points)
+        zeros = np.zeros((num_missing_points, 3))
+        zeros_pcd = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(zeros))
+        zeros_pcd.colors = o3d.utility.Vector3dVector(zeros)
+        downsampled_pcd += zeros_pcd
+    return downsampled_pcd

third_party/PointFlowMatch/pfp/common/se3_utils.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import torch
+import numpy as np
+from spatialmath.base import r2q
+from spatialmath.base.transforms3d import isrot
+try:
+    from pytorch3d.ops import corresponding_points_alignment
+except ImportError:
+    print("pytorch3d not installed")
+from pfp import DEVICE
+def transform_th(transform: torch.Tensor, points: torch.Tensor) -> torch.Tensor:
+    """Apply a 4x4 transformation matrix to a set of points."""
+    new_points = points @ transform[..., :3, :3].mT + transform[..., :3, 3]
+    return new_points
+def vec_projection_np(v: np.ndarray, e: np.ndarray) -> np.ndarray:
+    """Project vector v onto unit vector e."""
+    proj = np.sum(v * e, axis=-1, keepdims=True) * e
+    return proj
+def vec_projection_th(v: torch.Tensor, e: torch.Tensor) -> torch.Tensor:
+    """Project vector v onto unit vector e."""
+    proj = torch.sum(v * e, dim=-1, keepdim=True) * e
+    return proj
+def grahm_schmidt_np(v1: np.ndarray, v2: np.ndarray) -> np.ndarray:
+    """Compute orthonormal basis from two vectors."""
+    v1 = v1.astype(np.float64)
+    v2 = v2.astype(np.float64)
+    u1 = v1
+    e1 = u1 / np.linalg.norm(u1, axis=-1, keepdims=True)
+    u2 = v2 - vec_projection_np(v2, e1)
+    e2 = u2 / np.linalg.norm(u2, axis=-1, keepdims=True)
+    e3 = np.cross(e1, e2, axis=-1)
+    rot_matrix = np.concatenate([e1[..., None], e2[..., None], e3[..., None]], axis=-1)
+    return rot_matrix
+def grahm_schmidt_th(v1: torch.Tensor, v2: torch.Tensor) -> torch.Tensor:
+    """Compute orthonormal basis from two vectors."""
+    u1 = v1
+    e1 = u1 / torch.norm(u1, dim=-1, keepdim=True)
+    u2 = v2 - vec_projection_th(v2, e1)
+    e2 = u2 / torch.norm(u2, dim=-1, keepdim=True)
+    e3 = torch.cross(e1, e2, dim=-1)
+    rot_matrix = torch.cat(
+        [e1.unsqueeze(dim=-1), e2.unsqueeze(dim=-1), e3.unsqueeze(dim=-1)], dim=-1
+    )
+    return rot_matrix
+def pfp_to_pose_np(robot_states: np.ndarray) -> np.ndarray:
+    """Convert pfp state (T, 10) to 4x4 poses (T, 4, 4)."""
+    T = robot_states.shape[0]
+    poses = np.eye(4)[np.newaxis, ...]
+    poses = np.tile(poses, (T, 1, 1))
+    poses[:, :3, 3] = robot_states[:, :3]
+    poses[:, :3, :3] = grahm_schmidt_np(robot_states[:, 3:6], robot_states[:, 6:9])
+    return poses
+def pfp_to_pose_th(robot_states: torch.Tensor) -> torch.Tensor:
+    """Convert pfp state (B, T, 10) to 4x4 poses (B, T, 4, 4) and gripper (B, T, 1)."""
+    B = robot_states.shape[0]
+    T = robot_states.shape[1]
+    poses = (
+        torch.eye(4, device=robot_states.device)
+        .unsqueeze(0)
+        .unsqueeze(0)
+        .expand(B, T, 4, 4)
+        .contiguous()
+    )
+    poses[..., :3, 3] = robot_states[..., :3]
+    poses[..., :3, :3] = grahm_schmidt_th(robot_states[..., 3:6], robot_states[..., 6:9])
+    gripper = robot_states[..., -1:]
+    return poses, gripper
+def rot6d_to_quat_np(rot6d: np.ndarray, order: str = "xyzs") -> np.ndarray:
+    """Convert 6d rotation matrix to quaternion."""
+    rot = grahm_schmidt_np(rot6d[:3], rot6d[3:])
+    quat = r2q(rot, order=order)
+    return quat
+def rot6d_to_rot_np(rot6d: np.ndarray) -> np.ndarray:
+    """Convert 6d rotation matrix to 3x3 rotation matrix."""
+    rot = grahm_schmidt_np(rot6d[:3], rot6d[3:])
+    return rot
+def check_valid_rot(rot: np.ndarray) -> bool:
+    """Check if the 3x3 rotation matrix is valid."""
+    valid = isrot(rot, check=True, tol=1e10)
+    return valid
+def get_canonical_5p_th() -> torch.Tensor:
+    """Return the (5,3) canonical 5points representation of the franka hand."""
+    gripper_width = 0.08
+    left_y = 0.5 * gripper_width
+    right_y = -0.5 * gripper_width
+    mid_z = -0.041
+    top_z = -0.1034
+    a = [0, 0, top_z]
+    b = [0, left_y, mid_z]
+    c = [0, right_y, mid_z]
+    d = [0, left_y, 0]
+    e = [0, right_y, 0]
+    pose_5p = torch.tensor([a, b, c, d, e])
+    return pose_5p
+def pfp_to_state5p_th(robot_states: torch.Tensor) -> torch.Tensor:
+    """
+    Convert pfp state (B, T, 10) to 5points representation (B, T, 16).
+    5p: [x0, y0, z0, x1, y1, z1, x2, y2, z2, x3, y3, z3, x4, y4, z4, gripper]
+    """
+    device = robot_states.device
+    poses, gripper = pfp_to_pose_th(robot_states)
+    canonical_5p = get_canonical_5p_th().to(device)
+    canonical_5p_homog = torch.cat([canonical_5p, torch.ones(5, 1, device=device)], dim=-1)
+    poses_5p_homog = (poses @ canonical_5p_homog.mT).mT
+    poses_5p = poses_5p_homog[..., :3].contiguous().flatten(start_dim=-2)
+    state5p = torch.cat([poses_5p, gripper], dim=-1)
+    return state5p
+def state5p_to_pfp_th(state5p: torch.Tensor) -> torch.Tensor:
+    """
+    Convert 5points representation (B, T, 16) to pfp state (B, T, 10) using svd projection.
+    """
+    device = state5p.device
+    leading_dims = state5p.shape[0:2]
+    # Flatten the batch and time dimensions
+    state5p = state5p.reshape(-1, *state5p.shape[2:])
+    poses_5p, gripper = state5p[..., :-1], state5p[..., -1:]
+    poses_5p = poses_5p.reshape(-1, 5, 3)
+    canonical_5p = get_canonical_5p_th().expand(poses_5p.shape[0], 5, 3).to(device)
+    with torch.cuda.amp.autocast(enabled=False):
+        result = corresponding_points_alignment(canonical_5p, poses_5p)
+    rotations = result.R.mT
+    translations = result.T
+    pfp_state = torch.cat([translations, rotations[..., 0], rotations[..., 1], gripper], dim=-1)
+    # Reshape back to the batch and time dimensions
+    pfp_state = pfp_state.reshape(*leading_dims, -1)
+    return pfp_state
+def init_random_traj_th(B: int, T: int, noise_scale: float) -> torch.Tensor:
+    """
+    B: batch size
+    T: number of time steps
+    """
+    # Position
+    random_xyz = torch.randn((B, 1, 3), device=DEVICE) * noise_scale
+    direction = torch.randn((B, 1, 3), device=DEVICE)
+    direction = direction / torch.norm(direction, dim=-1, keepdim=True)
+    t = torch.linspace(0, 1, T, device=DEVICE).unsqueeze(0).unsqueeze(-1)
+    random_xyz = random_xyz + t * direction
+    # Rotation 6d
+    random_r1 = torch.randn((B, 1, 3), device=DEVICE)
+    random_r1 = random_r1 / torch.norm(random_r1, dim=-1, keepdim=True)
+    random_r2 = torch.randn((B, 1, 3), device=DEVICE)
+    random_r2 = random_r2 - vec_projection_th(random_r2, random_r1)
+    random_r2 = random_r2 / torch.norm(random_r2, dim=-1, keepdim=True)
+    random_r6d = torch.cat([random_r1, random_r2], dim=-1)
+    random_r6d = random_r6d.expand(B, T, 6)
+    # Gripper
+    gripper = torch.ones((B, T, 1), device=DEVICE)
+    random_traj = torch.cat([random_xyz, random_r6d, gripper], dim=-1)
+    return random_traj

third_party/PointFlowMatch/pfp/common/visualization.py ADDED Viewed

	@@ -0,0 +1,178 @@

+from __future__ import annotations
+import trimesh
+import numpy as np
+import open3d as o3d
+from yourdfpy.urdf import URDF
+from pfp.common.se3_utils import pfp_to_pose_np
+try:
+    import rerun as rr
+except ImportError:
+    print("WARNING: Rerun not installed. Visualization will not work.")
+class RerunViewer:
+    def __init__(self, name: str, addr: str = None):
+        rr.init(name)
+        if addr is None:
+            addr = "127.0.0.1"
+        port = ":9876"
+        rr.connect(addr + port)
+        RerunViewer.clear()
+        return
+    @staticmethod
+    def add_obs_dict(obs_dict: dict, timestep: int = None):
+        if timestep is not None:
+            rr.set_time_sequence("timestep", timestep)
+        RerunViewer.add_rgb("rgb", obs_dict["image"])
+        RerunViewer.add_depth("depth", obs_dict["depth"])
+        RerunViewer.add_np_pointcloud(
+            "vis/pointcloud",
+            points=obs_dict["point_cloud"][:, :3],
+            colors_uint8=obs_dict["point_cloud"][:, 3:],
+        )
+        return
+    @staticmethod
+    def add_o3d_pointcloud(name: str, pointcloud: o3d.geometry.PointCloud, radii: float = None):
+        points = np.asanyarray(pointcloud.points)
+        colors = np.asanyarray(pointcloud.colors) if pointcloud.has_colors() else None
+        colors_uint8 = (colors * 255).astype(np.uint8) if pointcloud.has_colors() else None
+        RerunViewer.add_np_pointcloud(name, points, colors_uint8, radii)
+        return
+    @staticmethod
+    def add_np_pointcloud(
+        name: str, points: np.ndarray, colors_uint8: np.ndarray = None, radii: float = None
+    ):
+        rr_points = rr.Points3D(positions=points, colors=colors_uint8, radii=radii)
+        rr.log(name, rr_points)
+        return
+    @staticmethod
+    def add_axis(name: str, pose: np.ndarray, size: float = 0.004, timeless: bool = False):
+        mesh = trimesh.creation.axis(origin_size=size, transform=pose)
+        RerunViewer.add_mesh_trimesh(name, mesh, timeless)
+        return
+    @staticmethod
+    def add_aabb(name: str, centers: np.ndarray, extents: np.ndarray, timeless=False):
+        rr.log(name, rr.Boxes3D(centers=centers, sizes=extents), timeless=timeless)
+        return
+    @staticmethod
+    def add_mesh_trimesh(name: str, mesh: trimesh.Trimesh, timeless: bool = False):
+        # Handle colors
+        if mesh.visual.kind in ["vertex", "face"]:
+            vertex_colors = mesh.visual.vertex_colors
+        elif mesh.visual.kind == "texture":
+            vertex_colors = mesh.visual.to_color().vertex_colors
+        else:
+            vertex_colors = None
+        # Log mesh
+        rr_mesh = rr.Mesh3D(
+            vertex_positions=mesh.vertices,
+            vertex_colors=vertex_colors,
+            vertex_normals=mesh.vertex_normals,
+            indices=mesh.faces,
+        )
+        rr.log(name, rr_mesh, timeless=timeless)
+        return
+    @staticmethod
+    def add_mesh_list_trimesh(name: str, meshes: list[trimesh.Trimesh]):
+        for i, mesh in enumerate(meshes):
+            RerunViewer.add_mesh_trimesh(name + f"/{i}", mesh)
+        return
+    @staticmethod
+    def add_rgb(name: str, rgb_uint8: np.ndarray):
+        if rgb_uint8.shape[0] == 3:
+            # CHW -> HWC
+            rgb_uint8 = np.transpose(rgb_uint8, (1, 2, 0))
+        rr.log(name, rr.Image(rgb_uint8))
+    @staticmethod
+    def add_depth(name: str, detph: np.ndarray):
+        rr.log(name, rr.DepthImage(detph))
+    @staticmethod
+    def add_traj(name: str, traj: np.ndarray):
+        """
+        name: str
+        traj: np.ndarray (T, 10)
+        """
+        poses = pfp_to_pose_np(traj)
+        for i, pose in enumerate(poses):
+            RerunViewer.add_axis(name + f"/{i}t", pose)
+        return
+    @staticmethod
+    def clear():
+        rr.log("vis", rr.Clear(recursive=True))
+        return
+class RerunTraj:
+    def __init__(self) -> None:
+        self.traj_shape = None
+        return
+    def add_traj(self, name: str, traj: np.ndarray, size: float = 0.004):
+        """
+        name: str
+        traj: np.ndarray (T, 10)
+        """
+        if self.traj_shape is None or self.traj_shape != traj.shape:
+            self.traj_shape = traj.shape
+            for i in range(traj.shape[0]):
+                RerunViewer.add_axis(name + f"/{i}t", np.eye(4), size)
+        poses = pfp_to_pose_np(traj)
+        for i, pose in enumerate(poses):
+            rr.log(
+                name + f"/{i}t",
+                rr.Transform3D(mat3x3=pose[:3, :3], translation=pose[:3, 3]),
+            )
+        return
+class RerunURDF:
+    def __init__(self, name: str, urdf_path: str, meshes_root: str):
+        self.name = name
+        self.urdf: URDF = URDF.load(urdf_path, mesh_dir=meshes_root)
+        return
+    def update_vis(
+        self,
+        joint_state: list | np.ndarray,
+        root_pose: np.ndarray = np.eye(4),
+        name_suffix: str = "",
+    ):
+        self._update_joints(joint_state)
+        scene = self.urdf.scene
+        trimeshes = self._scene_to_trimeshes(scene)
+        trimeshes = [t.apply_transform(root_pose) for t in trimeshes]
+        RerunViewer.add_mesh_list_trimesh(self.name + name_suffix, trimeshes)
+        return
+    def _update_joints(self, joint_state: list | np.ndarray):
+        assert len(joint_state) == len(self.urdf.actuated_joints), "Wrong number of joint values."
+        self.urdf.update_cfg(joint_state)
+        return
+    def _scene_to_trimeshes(self, scene: trimesh.Scene) -> list[trimesh.Trimesh]:
+        """
+        Convert a trimesh.Scene to a list of trimesh.Trimesh.
+        Skips objects that are not an instance of trimesh.Trimesh.
+        """
+        trimeshes = []
+        scene_dump = scene.dump()
+        geometries = [scene_dump] if not isinstance(scene_dump, list) else scene_dump
+        for geometry in geometries:
+            if isinstance(geometry, trimesh.Trimesh):
+                trimeshes.append(geometry)
+            elif isinstance(geometry, trimesh.Scene):
+                trimeshes.extend(self._scene_to_trimeshes(geometry))
+        return trimeshes

third_party/PointFlowMatch/pfp/data/__pycache__/dataset_pcd.cpython-310.pyc ADDED Viewed

Binary file (3.68 kB). View file

third_party/PointFlowMatch/pfp/data/__pycache__/replay_buffer.cpython-310.pyc ADDED Viewed

Binary file (2.92 kB). View file

third_party/PointFlowMatch/pfp/data/dataset_images.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from __future__ import annotations
+import torch
+import numpy as np
+from diffusion_policy.common.sampler import SequenceSampler
+from pfp.data.replay_buffer import RobotReplayBuffer
+from pfp import DATA_DIRS
+class RobotDatasetImages(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        data_path: str,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        subs_factor: int = 1,  # 1 means no subsampling
+        **kwargs,
+    ) -> None:
+        """
+        To me it makes sense that sequence_length == n_obs_steps + n_prediction_steps
+        """
+        replay_buffer = RobotReplayBuffer.create_from_path(data_path, mode="r")
+        data_keys = ["robot_state", "images"]
+        data_key_first_k = {"images": n_obs_steps * subs_factor}
+        self.sampler = SequenceSampler(
+            replay_buffer=replay_buffer,
+            sequence_length=(n_obs_steps + n_pred_steps) * subs_factor - (subs_factor - 1),
+            pad_before=(n_obs_steps - 1) * subs_factor,
+            pad_after=(n_pred_steps - 1) * subs_factor + (subs_factor - 1),
+            keys=data_keys,
+            key_first_k=data_key_first_k,
+        )
+        self.n_obs_steps = n_obs_steps
+        self.n_prediction_steps = n_pred_steps
+        self.subs_factor = subs_factor
+        self.rng = np.random.default_rng()
+        return
+    def __len__(self) -> int:
+        return len(self.sampler)
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, ...]:
+        sample: dict[str, np.ndarray] = self.sampler.sample_sequence(idx)
+        cur_step_i = self.n_obs_steps * self.subs_factor
+        images = sample["images"][: cur_step_i : self.subs_factor]
+        robot_state_obs = sample["robot_state"][: cur_step_i : self.subs_factor]
+        robot_state_pred = sample["robot_state"][cur_step_i :: self.subs_factor]
+        return images, robot_state_obs, robot_state_pred
+if __name__ == "__main__":
+    dataset = RobotDatasetImages(
+        data_path=DATA_DIRS.PFP / "open_fridge" / "train",
+        n_obs_steps=2,
+        n_pred_steps=8,
+        subs_factor=5,
+    )
+    i = 20
+    obs, robot_state_obs, robot_state_pred = dataset[i]
+    print("robot_state_obs: ", robot_state_obs)
+    print("robot_state_pred: ", robot_state_pred)
+    print("done")

third_party/PointFlowMatch/pfp/data/dataset_pcd.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from __future__ import annotations
+import torch
+import numpy as np
+import pypose as pp
+from diffusion_policy.common.sampler import SequenceSampler
+from pfp.data.replay_buffer import RobotReplayBuffer
+from pfp.common.se3_utils import transform_th
+from pfp import DATA_DIRS
+def rand_range(low: float, high: float, size: tuple[int], device) -> torch.Tensor:
+    return torch.rand(size, device=device) * (high - low) + low
+def augment_pcd_data(batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+    pcd, robot_state_obs, robot_state_pred = batch
+    BT_robot_obs = robot_state_obs.shape[:-1]
+    BT_robot_pred = robot_state_pred.shape[:-1]
+    # sigma=(sigma_transl, sigma_rot_rad)
+    transform = pp.randn_SE3(sigma=(0.1, 0.2), device=pcd.device).matrix()
+    pcd[..., :3] = transform_th(transform, pcd[..., :3])
+    robot_obs_pseudoposes = robot_state_obs[..., :9].reshape(*BT_robot_obs, 3, 3)
+    robot_pred_pseudoposes = robot_state_pred[..., :9].reshape(*BT_robot_pred, 3, 3)
+    robot_obs_pseudoposes = transform_th(transform, robot_obs_pseudoposes)
+    robot_pred_pseudoposes = transform_th(transform, robot_pred_pseudoposes)
+    robot_state_obs[..., :9] = robot_obs_pseudoposes.reshape(*BT_robot_obs, 9)
+    robot_state_pred[..., :9] = robot_pred_pseudoposes.reshape(*BT_robot_pred, 9)
+    # We shuffle the points, i.e. shuffle pcd along dim=2 (B, T, P, 3)
+    idx = torch.randperm(pcd.shape[2])
+    pcd = pcd[:, :, idx, :]
+    return pcd, robot_state_obs, robot_state_pred
+class RobotDatasetPcd(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        data_path: str,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        use_pc_color: bool,
+        n_points: int,
+        subs_factor: int = 1,  # 1 means no subsampling
+    ) -> None:
+        """
+        To me it makes sense that sequence_length == n_obs_steps + n_prediction_steps
+        """
+        replay_buffer = RobotReplayBuffer.create_from_path(data_path, mode="r")
+        data_keys = ["robot_state", "pcd_xyz"]
+        data_key_first_k = {"pcd_xyz": n_obs_steps * subs_factor}
+        if use_pc_color:
+            data_keys.append("pcd_color")
+            data_key_first_k["pcd_color"] = n_obs_steps * subs_factor
+        self.sampler = SequenceSampler(
+            replay_buffer=replay_buffer,
+            sequence_length=(n_obs_steps + n_pred_steps) * subs_factor - (subs_factor - 1),
+            pad_before=(n_obs_steps - 1) * subs_factor,
+            pad_after=(n_pred_steps - 1) * subs_factor + (subs_factor - 1),
+            keys=data_keys,
+            key_first_k=data_key_first_k,
+        )
+        self.n_obs_steps = n_obs_steps
+        self.n_prediction_steps = n_pred_steps
+        self.subs_factor = subs_factor
+        self.use_pc_color = use_pc_color
+        self.n_points = n_points
+        self.rng = np.random.default_rng()
+        return
+    def __len__(self) -> int:
+        return len(self.sampler)
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, ...]:
+        sample: dict[str, np.ndarray] = self.sampler.sample_sequence(idx)
+        cur_step_i = self.n_obs_steps * self.subs_factor
+        pcd = sample["pcd_xyz"][: cur_step_i : self.subs_factor]
+        if self.use_pc_color:
+            pcd_color = sample["pcd_color"][: cur_step_i : self.subs_factor]
+            pcd_color = pcd_color.astype(np.float32) / 255.0
+            pcd = np.concatenate([pcd, pcd_color], axis=-1)
+        robot_state_obs = sample["robot_state"][: cur_step_i : self.subs_factor].astype(np.float32)
+        robot_state_pred = sample["robot_state"][cur_step_i :: self.subs_factor].astype(np.float32)
+        # Random sample pcd points
+        if pcd.shape[1] > self.n_points:
+            random_indices = np.random.choice(pcd.shape[1], self.n_points, replace=False)
+            pcd = pcd[:, random_indices]
+        return pcd, robot_state_obs, robot_state_pred
+if __name__ == "__main__":
+    dataset = RobotDatasetPcd(
+        data_path=DATA_DIRS.PFP / "open_fridge" / "train",
+        n_obs_steps=2,
+        n_pred_steps=8,
+        subs_factor=5,
+        use_pc_color=False,
+        n_points=4096,
+    )
+    i = 20
+    obs, robot_state_obs, robot_state_pred = dataset[i]
+    print("robot_state_obs: ", robot_state_obs)
+    print("robot_state_pred: ", robot_state_pred)
+    print("done")

third_party/PointFlowMatch/pfp/data/replay_buffer.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from __future__ import annotations
+import zarr
+import numpy as np
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.codecs.imagecodecs_numcodecs import register_codec, Jpeg2k
+register_codec(Jpeg2k)
+class RobotReplayBuffer(ReplayBuffer):
+    def __init__(self, root: zarr.Group):
+        super().__init__(root)
+        self.jpeg_compressor = Jpeg2k()
+        return
+    def add_episode_from_list(self, data_list: list[dict[str, np.ndarray]], **kwargs):
+        """
+        data_list is a list of dictionaries, where each dictionary contains the data for one step.
+        """
+        data_dict = dict()
+        for key in data_list[0].keys():
+            data_dict[key] = np.stack([x[key] for x in data_list])
+        self.add_episode(data_dict, **kwargs)
+        return
+    def add_episode_from_list_compressed(self, data_list: list[dict[str, np.ndarray]], **kwargs):
+        """
+        data_list is a list of dictionaries, where each dictionary contains the data for one step.
+        WARNING: decoding (i.e. reading) is broken.
+        """
+        data_dict = {key: np.stack([x[key] for x in data_list]) for key in data_list[0].keys()}
+        # get the keys starting with 'rgb*'
+        rgb_keys = [key for key in data_dict.keys() if key.startswith("rgb")]
+        rgb_shapes = [data_list[0][key].shape for key in rgb_keys]
+        chunks = {rgb_keys[i]: (1, *rgb_shapes[i]) for i in range(len(rgb_keys))}
+        compressors = {key: self.jpeg_compressor for key in rgb_keys}
+        self.add_episode(data_dict, chunks, compressors, **kwargs)
+        return

third_party/PointFlowMatch/pfp/envs/__pycache__/base_env.cpython-310.pyc ADDED Viewed

Binary file (907 Bytes). View file

third_party/PointFlowMatch/pfp/envs/__pycache__/rlbench_env.cpython-310.pyc ADDED Viewed

Binary file (7.99 kB). View file

third_party/PointFlowMatch/pfp/envs/__pycache__/rlbench_runner.cpython-310.pyc ADDED Viewed

Binary file (1.6 kB). View file

third_party/PointFlowMatch/pfp/envs/base_env.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from abc import ABC, abstractmethod
+class BaseEnv(ABC):
+    """
+    The base abstract class for all envs.
+    """
+    @abstractmethod
+    def reset(self):
+        pass
+    @abstractmethod
+    def reset_rng(self):
+        pass
+    @abstractmethod
+    def step(self, action):
+        pass
+    @abstractmethod
+    def get_obs(self):
+        pass

third_party/PointFlowMatch/pfp/envs/rlbench_env.py ADDED Viewed

	@@ -0,0 +1,247 @@

+import time
+import numpy as np
+import open3d as o3d
+import spatialmath.base as sm
+from pyrep.const import RenderMode
+from pfp.envs.base_env import BaseEnv
+from pyrep.errors import IKError
+from rlbench.environment import Environment
+from rlbench.backend.observation import Observation
+from rlbench.backend.exceptions import InvalidActionError
+from rlbench.action_modes.action_mode import MoveArmThenGripper
+from rlbench.action_modes.gripper_action_modes import Discrete
+from rlbench.action_modes.arm_action_modes import EndEffectorPoseViaPlanning
+from rlbench.observation_config import ObservationConfig, CameraConfig
+from rlbench.utils import name_to_task_class
+from pfp.common.visualization import RerunViewer as RV
+from pfp.common.o3d_utils import make_pcd, merge_pcds
+from pfp.common.se3_utils import rot6d_to_quat_np, pfp_to_pose_np
+try:
+    import rerun as rr
+except ImportError:
+    print("WARNING: Rerun not installed. Visualization will not work.")
+class RLBenchEnv(BaseEnv):
+    """
+    DT = 0.05 (50ms/20Hz)
+    robot_state = [px, py, pz, r00, r10, r20, r01, r11, r21, gripper]
+    The pose is the ttip frame, with x pointing backwards, y pointing left, and z pointing down.
+    """
+    def __init__(
+        self,
+        task_name: str,
+        voxel_size: float,
+        n_points: int,
+        use_pc_color: bool,
+        headless: bool,
+        vis: bool,
+        obs_mode: str = "pcd",
+    ):
+        assert obs_mode in ["pcd", "rgb"], "Invalid obs_mode"
+        self.obs_mode = obs_mode
+        # image_size=(128, 128)
+        self.voxel_size = voxel_size
+        self.n_points = n_points
+        self.use_pc_color = use_pc_color
+        camera_config = CameraConfig(
+            rgb=True,
+            depth=False,
+            mask=False,
+            point_cloud=True,
+            image_size=(128, 128),
+            render_mode=RenderMode.OPENGL,
+        )
+        obs_config = ObservationConfig(
+            camera_configs={
+                "over_shoulder_left": camera_config,
+                "over_shoulder_right": camera_config,
+                "overhead": camera_config,
+                "wrist": camera_config,
+                "front": camera_config,
+            },
+            gripper_matrix=True,
+            gripper_joint_positions=True,
+        )
+        # EE pose is (X,Y,Z,Qx,Qy,Qz,Qw)
+        action_mode = MoveArmThenGripper(
+            arm_action_mode=EndEffectorPoseViaPlanning(), gripper_action_mode=Discrete()
+        )
+        self.env = Environment(
+            action_mode,
+            obs_config=obs_config,
+            headless=headless,
+        )
+        self.env.launch()
+        self.task = self.env.get_task(name_to_task_class(task_name))
+        self.robot_position = self.env._robot.arm.get_position()
+        self.ws_aabb = o3d.geometry.AxisAlignedBoundingBox(
+            min_bound=(self.robot_position[0] + 0.1, -0.65, self.robot_position[2] - 0.05),
+            max_bound=(1, 0.65, 2),
+        )
+        self.vis = vis
+        self.last_obs = None
+        if self.vis:
+            RV.add_axis("vis/origin", np.eye(4), size=0.01, timeless=True)
+            RV.add_aabb(
+                "vis/ws_aabb", self.ws_aabb.get_center(), self.ws_aabb.get_extent(), timeless=True
+            )
+        return
+    def reset(self):
+        self.task.reset()
+        return
+    def reset_rng(self):
+        return
+    def step(self, robot_state: np.ndarray):
+        ee_position = robot_state[:3]
+        ee_quat = rot6d_to_quat_np(robot_state[3:9])
+        gripper = robot_state[-1:]
+        action = np.concatenate([ee_position, ee_quat, gripper])
+        reward, terminate = self._step_safe(action)
+        return reward, terminate
+    def _step_safe(self, action: np.ndarray, recursion_depth=0):
+        if recursion_depth > 15:
+            print("Warning: Recursion depth limit reached.")
+            return 0.0, True
+        try:
+            _, reward, terminate = self.task.step(action)
+        except (IKError, InvalidActionError, AttributeError, RuntimeError) as e:
+            print(e)
+            cur_position = self.last_obs.gripper_pose[:3]
+            des_position = action[:3]
+            new_position = cur_position + (des_position - cur_position) * 0.25
+            cur_quat = self.last_obs.gripper_pose[3:]
+            cur_quat = np.array([cur_quat[3], cur_quat[0], cur_quat[1], cur_quat[2]])
+            des_quat = action[3:7]
+            des_quat = np.array([des_quat[3], des_quat[0], des_quat[1], des_quat[2]])
+            new_quat = sm.qslerp(cur_quat, des_quat, 0.25, shortest=True)
+            new_quat = np.array([new_quat[1], new_quat[2], new_quat[3], new_quat[0]])
+            new_action = np.concatenate([new_position, new_quat, action[-1:]])
+            reward, terminate = self._step_safe(new_action, recursion_depth + 1)
+        return reward, terminate
+    def get_obs(self) -> tuple[np.ndarray, ...]:
+        obs_rlbench = self.task.get_observation()
+        self.last_obs = obs_rlbench
+        robot_state = self.get_robot_state(obs_rlbench)
+        if self.obs_mode == "pcd":
+            pcd_o3d = self.get_pcd(obs_rlbench)
+            pcd = np.asarray(pcd_o3d.points)
+            if self.use_pc_color:
+                pcd_color = np.asarray(pcd_o3d.colors, dtype=np.float32)
+                pcd = np.concatenate([pcd, pcd_color], axis=-1)
+            obs = pcd
+        elif self.obs_mode == "rgb":
+            obs = self.get_images(obs_rlbench)
+        return robot_state, obs
+    def get_robot_state(self, obs: Observation) -> np.ndarray:
+        ee_position = obs.gripper_matrix[:3, 3]
+        ee_rot6d = obs.gripper_matrix[:3, :2].flatten(order="F")
+        gripper = np.array([obs.gripper_open])
+        robot_state = np.concatenate([ee_position, ee_rot6d, gripper])
+        return robot_state
+    def get_pcd(self, obs: Observation) -> o3d.geometry.PointCloud:
+        perception = obs.perception_data
+        right_pcd = make_pcd(
+            perception["over_shoulder_right_point_cloud"], perception["over_shoulder_right_rgb"]
+        )
+        left_pcd = make_pcd(
+            perception["over_shoulder_left_point_cloud"], perception["over_shoulder_left_rgb"]
+        )
+        overhead_pcd = make_pcd(perception["overhead_point_cloud"], perception["overhead_rgb"])
+        front_pcd = make_pcd(perception["front_point_cloud"], perception["front_rgb"])
+        wrist_pcd = make_pcd(perception["wrist_point_cloud"], perception["wrist_rgb"])
+        pcd_list = [right_pcd, left_pcd, overhead_pcd, front_pcd, wrist_pcd]
+        pcd = merge_pcds(self.voxel_size, self.n_points, pcd_list, self.ws_aabb)
+        return pcd
+    def get_images(self, obs: Observation) -> np.ndarray:
+        perception = obs.perception_data
+        images = np.stack(
+            (
+                perception["over_shoulder_right_rgb"],
+                perception["over_shoulder_left_rgb"],
+                perception["overhead_rgb"],
+                perception["front_rgb"],
+                perception["wrist_rgb"],
+            )
+        )
+        return images
+    def vis_step(self, robot_state: np.ndarray, obs: np.ndarray, prediction: np.ndarray = None):
+        """
+        robot_state: the current robot state (10,)
+        obs: either pcd or images
+            - pcd: the current point cloud (N, 6) or (N, 3)
+            - images: the current images (5, H, W, 3)
+        prediction: the full trajectory of robot states (T, 10)
+        """
+        VIS_FLOW = False
+        if not self.vis:
+            return
+        rr.set_time_seconds("time", time.time())
+        # Point cloud
+        if self.obs_mode == "pcd":
+            pcd = obs
+            pcd_xyz = pcd[:, :3]
+            pcd_color = (pcd[:, 3:6] * 255).astype(np.uint8) if self.use_pc_color else None
+            RV.add_np_pointcloud("vis/pcd_obs", points=pcd_xyz, colors_uint8=pcd_color, radii=0.003)
+        # RGB images
+        elif self.obs_mode == "rgb":
+            images = obs
+            for i, img in enumerate(images):
+                RV.add_rgb(f"vis/rgb_obs_{i}", img)
+        # EE State
+        ee_pose = pfp_to_pose_np(robot_state[np.newaxis, ...]).squeeze()
+        RV.add_axis("vis/ee_state", ee_pose)
+        rr.log("plot/gripper_state", rr.Scalar(robot_state[-1]))
+        if prediction is None:
+            return
+        # EE predictions
+        final_pred = prediction[-1]
+        if VIS_FLOW:
+            for traj in prediction:
+                RV.add_traj("vis/traj_k", traj)
+        else:
+            RV.add_traj("vis/ee_pred", final_pred)
+        # Gripper action prediction
+        rr.log("plot/gripper_pred", rr.Scalar(final_pred[0, -1]))
+        return
+    def close(self):
+        self.env.shutdown()
+        return
+if __name__ == "__main__":
+    env = RLBenchEnv(
+        "close_microwave",
+        voxel_size=0.01,
+        n_points=5500,
+        use_pc_color=False,
+        headless=True,
+        vis=True,
+    )
+    env.reset()
+    for i in range(1000):
+        robot_state, pcd = env.get_obs()
+        next_robot_state = robot_state.copy()
+        next_robot_state[:3] += np.array([-0.005, 0.005, 0.0])
+        env.step(next_robot_state)
+    env.close()

third_party/PointFlowMatch/pfp/envs/rlbench_runner.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import wandb
+from tqdm import tqdm
+from pfp.envs.rlbench_env import RLBenchEnv
+from pfp.policy.base_policy import BasePolicy
+class RLBenchRunner:
+    def __init__(
+        self,
+        num_episodes: int,
+        max_episode_length: int,
+        env_config: dict,
+        verbose=False,
+    ) -> None:
+        self.env: RLBenchEnv = RLBenchEnv(**env_config)
+        self.num_episodes = num_episodes
+        self.max_episode_length = max_episode_length
+        self.verbose = verbose
+        return
+    def run(self, policy: BasePolicy):
+        wandb.define_metric("success", summary="mean")
+        wandb.define_metric("steps", summary="mean")
+        success_list: list[bool] = []
+        steps_list: list[int] = []
+        self.env.reset_rng()
+        for episode in tqdm(range(self.num_episodes)):
+            policy.reset_obs()
+            self.env.reset()
+            for step in range(self.max_episode_length):
+                robot_state, obs = self.env.get_obs()
+                prediction = policy.predict_action(obs, robot_state)
+                self.env.vis_step(robot_state, obs, prediction)
+                next_robot_state = prediction[-1, 0]  # Last K step, first T step
+                reward, terminate = self.env.step(next_robot_state)
+                success = bool(reward)
+                if success or terminate:
+                    break
+            success_list.append(success)
+            if success:
+                steps_list.append(step)
+            if self.verbose:
+                print(f"Steps: {step}")
+                print(f"Success: {success}")
+            wandb.log({"episode": episode, "success": int(success), "steps": step})
+        return success_list, steps_list

third_party/PointFlowMatch/pfp/policy/__pycache__/base_policy.cpython-310.pyc ADDED Viewed

Binary file (3.05 kB). View file

third_party/PointFlowMatch/pfp/policy/__pycache__/fm_policy.cpython-310.pyc ADDED Viewed

Binary file (9.49 kB). View file

third_party/PointFlowMatch/pfp/policy/base_policy.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+import numpy as np
+from collections import deque
+from abc import ABC, abstractmethod
+from pfp import DEVICE
+class BasePolicy(ABC):
+    """
+    The base abstract class for all policies.
+    """
+    def __init__(self, n_obs_steps: int, subs_factor: int = 1) -> None:
+        maxlen = n_obs_steps * subs_factor - (subs_factor - 1)
+        self.obs_list = deque(maxlen=maxlen)
+        self.robot_state_list = deque(maxlen=maxlen)
+        self.subs_factor = subs_factor
+        return
+    def reset_obs(self):
+        self.obs_list.clear()
+        self.robot_state_list.clear()
+        return
+    def update_obs_lists(self, obs: np.ndarray, robot_state: np.ndarray):
+        self.obs_list.append(obs)
+        if len(self.obs_list) < self.obs_list.maxlen:
+            self.obs_list.extendleft(
+                [self.obs_list[0]] * (self.obs_list.maxlen - len(self.obs_list))
+            )
+        self.robot_state_list.append(robot_state)
+        if len(self.robot_state_list) < self.robot_state_list.maxlen:
+            n = self.robot_state_list.maxlen - len(self.robot_state_list)
+            self.robot_state_list.extendleft([self.robot_state_list[0]] * n)
+        return
+    def sample_stacked_obs(self) -> tuple[np.ndarray, ...]:
+        obs_stacked = np.stack(self.obs_list, axis=0)[:: self.subs_factor]
+        robot_state_stacked = np.stack(self.robot_state_list, axis=0)[:: self.subs_factor]
+        return obs_stacked, robot_state_stacked
+    def predict_action(self, obs: np.ndarray, robot_state: np.ndarray) -> np.ndarray:
+        self.update_obs_lists(obs, robot_state)
+        obs_stacked, robot_state_stacked = self.sample_stacked_obs()
+        action = self.infer_from_np(obs_stacked, robot_state_stacked)
+        return action
+    def infer_from_np(self, obs: np.ndarray, robot_state: np.ndarray) -> np.ndarray:
+        obs_th = torch.tensor(obs, device=DEVICE).unsqueeze(0)
+        robot_state_th = torch.tensor(robot_state, device=DEVICE).unsqueeze(0)
+        obs_th = self._norm_obs(obs_th)
+        robot_state_th = self._norm_robot_state(robot_state_th)
+        ny = self.infer_y(
+            obs_th,
+            robot_state_th,
+            return_traj=True,
+        )
+        ny = self._denorm_robot_state(ny)
+        ny = ny.squeeze().detach().cpu().numpy()
+        # Return the full trajectory (both integration time K and horizon T)
+        return ny  # (K, T, 10)
+    @abstractmethod
+    def _norm_obs(self, obs: torch.Tensor) -> torch.Tensor:
+        pass
+    @abstractmethod
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        pass
+    @abstractmethod
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        pass
+    @abstractmethod
+    def infer_y(
+        self, obs: torch.Tensor, robot_state: torch.Tensor, return_traj: bool
+    ) -> torch.Tensor:
+        pass

third_party/PointFlowMatch/pfp/policy/ddim_policy.py ADDED Viewed

	@@ -0,0 +1,237 @@

+from __future__ import annotations
+import copy
+import hydra
+import torch
+import torch.nn as nn
+from omegaconf import OmegaConf
+from composer.models import ComposerModel
+from diffusers.schedulers.scheduling_ddim import DDIMScheduler
+from pfp.policy.base_policy import BasePolicy
+from pfp import DEVICE, REPO_DIRS
+class DDIMPolicy(ComposerModel, BasePolicy):
+    """Class to train the DDIM diffusion model"""
+    def __init__(
+        self,
+        x_dim: int,
+        y_dim: int,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        num_k_train: int,
+        num_k_infer: int,
+        obs_encoder: nn.Module,
+        diffusion_net: nn.Module,
+        noise_scheduler_train: DDIMScheduler,
+        augment_data: bool = False,
+        loss_weights: dict[int] = None,
+        norm_pcd_center: list = None,
+    ) -> None:
+        ComposerModel.__init__(self)
+        BasePolicy.__init__(self, n_obs_steps)
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_pred_steps = n_pred_steps
+        self.num_k_train = num_k_train
+        self.num_k_infer = num_k_infer
+        self.obs_encoder = obs_encoder
+        self.diffusion_net = diffusion_net
+        self.norm_pcd_center = norm_pcd_center
+        self.augment_data = augment_data
+        # It's easier to have two different schedulers for training and eval/inference
+        self.noise_scheduler_train = noise_scheduler_train
+        self.noise_scheduler_infer = copy.deepcopy(noise_scheduler_train)
+        self.noise_scheduler_infer.set_timesteps(num_k_infer)
+        self.ny_shape = (n_pred_steps, y_dim)
+        self.l_w = loss_weights
+        return
+    def set_num_k_infer(self, num_k_infer: int):
+        self.num_k_infer = num_k_infer
+        self.noise_scheduler_infer.set_timesteps(num_k_infer)
+        return
+    def _norm_obs(self, pcd: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        pcd[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        return pcd
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        robot_state[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] -= torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        robot_state[..., :3] += torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] += torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _norm_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        pcd = self._norm_obs(pcd)
+        robot_state_obs = self._norm_robot_state(robot_state_obs)
+        robot_state_pred = self._norm_robot_state(robot_state_pred)
+        return pcd, robot_state_obs, robot_state_pred
+    def _rand_range(self, low: float, high: float, size: tuple[int]) -> torch.Tensor:
+        return torch.rand(size, device=DEVICE) * (high - low) + low
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        # xyz1 = self._rand_range(low=0.8, high=1.2, size=(3,))
+        xyz2 = self._rand_range(low=-0.2, high=0.2, size=(3,))
+        pcd[..., :3] = pcd[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_obs[..., :3] = robot_state_obs[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_pred[..., :3] = robot_state_pred[..., :3] + xyz2  # * xyz1 + xyz2
+        # We shuffle the points, i.e. shuffle pcd along dim=2 (B, T, P, 3)
+        idx = torch.randperm(pcd.shape[2])
+        pcd = pcd[:, :, idx, :]
+        return pcd, robot_state_obs, robot_state_pred
+    # ########### TRAIN ###########
+    def forward(self, batch):
+        """batch: the output of the dataloader"""
+        return 0
+    def loss(self, outputs, batch: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        """
+        outputs: the output of the forward pass
+        batch: the output of the dataloader
+        """
+        with torch.no_grad():
+            batch = self._norm_data(batch)
+            if self.augment_data:
+                batch = self._augment_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        noise_pred, noise = self.train_noise(pcd, robot_state_obs, robot_state_pred)
+        loss_xyz = nn.functional.mse_loss(noise_pred[..., :3], noise[..., :3])
+        loss_rot6d = nn.functional.mse_loss(noise_pred[..., 3:9], noise[..., 3:9])
+        loss_grip = nn.functional.mse_loss(noise_pred[..., 9], noise[..., 9])
+        loss = (
+            self.l_w["xyz"] * loss_xyz
+            + self.l_w["rot6d"] * loss_rot6d
+            + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/train/xyz": loss_xyz.item(),
+                "loss/train/rot6d": loss_rot6d.item(),
+                "loss/train/grip": loss_grip.item(),
+            }
+        )
+        return loss
+    def train_noise(
+        self, pcd: torch.Tensor, robot_state_obs: torch.Tensor, robot_state_pred: torch.Tensor
+    ) -> tuple[torch.Tensor, ...]:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        ny: torch.Tensor = robot_state_pred
+        B = nx.shape[0]
+        noise = torch.randn(ny.shape).to(DEVICE)
+        timesteps = torch.randint(0, self.num_k_train, (B,)).long().to(DEVICE)
+        noisy_y = self.noise_scheduler_train.add_noise(ny, noise, timesteps)
+        noise_pred = self.diffusion_net(noisy_y, timesteps.float(), global_cond=nx)
+        return noise_pred, noise
+    # ########### EVAL ###########
+    def eval_forward(self, batch: tuple[torch.Tensor, ...], outputs=None) -> torch.Tensor:
+        """
+        batch: the output of the eval dataloader
+        outputs: the output of the forward pass
+        """
+        batch = self._norm_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        pred_y = self.infer_y(pcd, robot_state_obs)
+        mse_xyz = nn.functional.mse_loss(pred_y[..., :3], robot_state_pred[..., :3])
+        mse_rot6d = nn.functional.mse_loss(pred_y[..., 3:9], robot_state_pred[..., 3:9])
+        mse_grip = nn.functional.mse_loss(pred_y[..., 9], robot_state_pred[..., 9])
+        self.logger.log_metrics(
+            {
+                "metrics/eval/mse_xyz": mse_xyz.item(),
+                "metrics/eval/mse_rot6d": mse_rot6d.item(),
+                "metrics/eval/mse_grip": mse_grip.item(),
+            }
+        )
+        return pred_y
+    def infer_y(
+        self,
+        pcd: torch.Tensor,
+        robot_state_obs: torch.Tensor,
+        noise=None,
+        return_traj=False,
+    ) -> torch.Tensor:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        if noise is None:
+            B = nx.shape[0]
+            noise = torch.randn((B, *self.ny_shape), device=DEVICE)
+        ny = noise
+        traj = [ny]
+        for k in self.noise_scheduler_infer.timesteps:
+            noise_pred = self.diffusion_net(ny, k, global_cond=nx)
+            if self.num_k_infer == 1:
+                print("one step generation")
+                ny = self.noise_scheduler_infer.step(
+                    model_output=noise_pred,
+                    timestep=k,
+                    sample=ny,
+                ).pred_original_sample
+            else:
+                ny = self.noise_scheduler_infer.step(
+                    model_output=noise_pred,
+                    timestep=k,
+                    sample=ny,
+                ).prev_sample
+            traj.append(ny)
+        if return_traj:
+            return torch.stack(traj)
+        return traj[-1]
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        ckpt_name: str,
+        ckpt_episode: str,
+        num_k_infer: int = None,
+        **kwargs,
+    ):
+        ckpt_dir = REPO_DIRS.CKPT / ckpt_name
+        ckpt_path_list = list(ckpt_dir.glob(f"{ckpt_episode}*"))
+        assert len(ckpt_path_list) > 0, f"No checkpoint found in {ckpt_dir} with {ckpt_episode}"
+        assert len(ckpt_path_list) < 2, f"Multiple ckpts found in {ckpt_dir} with {ckpt_episode}"
+        ckpt_fpath = ckpt_path_list[0]
+        state_dict = torch.load(ckpt_fpath, map_location=DEVICE)
+        cfg = OmegaConf.load(ckpt_dir / "config.yaml")
+        assert cfg.model._target_.split(".")[-1] == cls.__name__
+        model: DDIMPolicy = hydra.utils.instantiate(cfg.model)
+        model.load_state_dict(state_dict["state"]["model"])
+        model.to(DEVICE)
+        model.eval()
+        if num_k_infer is not None:
+            model.set_num_k_infer(num_k_infer)
+        return model
+class DDIMPolicyImage(DDIMPolicy):
+    def _norm_obs(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Image normalization is already done in the backbone, so here we just make it float
+        """
+        image = image.float() / 255.0
+        return image
+if __name__ == "__main__":
+    ckpt_name = "1714199471-peculiar-earthworm"
+    model = DDIMPolicy.load_from_checkpoint(ckpt_name, num_k_infer=10)
+    print(model.obs_list)

third_party/PointFlowMatch/pfp/policy/fm_5p_policy.py ADDED Viewed

	@@ -0,0 +1,290 @@

+from __future__ import annotations
+import hydra
+import torch
+import torch.nn as nn
+import pypose as pp
+from omegaconf import OmegaConf
+from composer.models import ComposerModel
+from pfp.policy.base_policy import BasePolicy
+from pfp import DEVICE, REPO_DIRS
+from pfp.common.fm_utils import get_timesteps
+from pfp.common.se3_utils import pfp_to_state5p_th, state5p_to_pfp_th
+class FM5PPolicy(ComposerModel, BasePolicy):
+    def __init__(
+        self,
+        x_dim: int,
+        y_dim: int,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        num_k_infer: int,
+        time_conditioning: bool,
+        obs_encoder: nn.Module,
+        diffusion_net: nn.Module,
+        augment_data: bool = False,
+        loss_weights: dict[int] = None,
+        pos_emb_scale: int = 20,
+        norm_pcd_center: list = None,
+        noise_type: str = "gaussian",
+        noise_scale: float = 1.0,
+        loss_type: str = "l2",
+        flow_schedule: str = "linear",
+        exp_scale: float = None,
+    ) -> None:
+        ComposerModel.__init__(self)
+        BasePolicy.__init__(self, n_obs_steps)
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_pred_steps = n_pred_steps
+        self.pos_emb_scale = pos_emb_scale
+        self.num_k_infer = num_k_infer
+        self.time_conditioning = time_conditioning
+        self.obs_encoder = obs_encoder
+        self.diffusion_net = diffusion_net
+        self.norm_pcd_center = norm_pcd_center
+        self.augment_data = augment_data
+        self.noise_type = noise_type
+        self.noise_scale = noise_scale
+        self.ny_shape = (n_pred_steps, y_dim)
+        self.l_w = loss_weights
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        if loss_type == "l2":
+            self.loss_fun = nn.MSELoss()
+        elif loss_type == "l1":
+            self.loss_fun = nn.L1Loss()
+        else:
+            raise NotImplementedError
+        return
+    def set_num_k_infer(self, num_k_infer: int):
+        self.num_k_infer = num_k_infer
+        return
+    def set_flow_schedule(self, flow_schedule: str, exp_scale: float):
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        return
+    def _norm_obs(self, pcd: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        pcd[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        return pcd
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        robot_state[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] -= torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        robot_state[..., :3] += torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] += torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _norm_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        pcd = self._norm_obs(pcd)
+        robot_state_obs = self._norm_robot_state(robot_state_obs)
+        robot_state_pred = self._norm_robot_state(robot_state_pred)
+        return pcd, robot_state_obs, robot_state_pred
+    def _rand_range(self, low: float, high: float, size: tuple[int]) -> torch.Tensor:
+        return torch.rand(size, device=DEVICE) * (high - low) + low
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        # xyz1 = self._rand_range(low=0.8, high=1.2, size=(3,))
+        xyz2 = self._rand_range(low=-0.2, high=0.2, size=(3,))
+        pcd[..., :3] = pcd[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_obs[..., :3] = robot_state_obs[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_pred[..., :3] = robot_state_pred[..., :3] + xyz2  # * xyz1 + xyz2
+        # We shuffle the points, i.e. shuffle pcd along dim=2 (B, T, P, 3)
+        idx = torch.randperm(pcd.shape[2])
+        pcd = pcd[:, :, idx, :]
+        return pcd, robot_state_obs, robot_state_pred
+    def _init_noise(self, batch_size: int) -> torch.Tensor:
+        B = batch_size
+        T = self.n_pred_steps
+        noise_poses = pp.randn_SE3((B, T), device=DEVICE).matrix()
+        noise_gripper = torch.randn((B, T, 1), device=DEVICE)
+        noise_pfp = torch.cat(
+            [
+                noise_poses[..., :3, 3],
+                noise_poses[..., :3, 0],
+                noise_poses[..., :3, 1],
+                noise_gripper,
+            ],
+            dim=-1,
+        )
+        noise_5p = pfp_to_state5p_th(noise_pfp)
+        return noise_5p
+    def _init_target(self, ny: torch.Tensor) -> torch.Tensor:
+        """
+        ny: (B, T, 10) -> xyz, rot6d, grip
+        """
+        target_5p = pfp_to_state5p_th(ny)
+        return target_5p
+    # ############### Training ################
+    def forward(self, batch):
+        """batch is the output of the dataloader"""
+        return 0
+    def loss(self, outputs, batch: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        """
+        outputs: the output of the forward pass
+        batch: the output of the dataloader
+        """
+        with torch.no_grad():
+            batch = self._norm_data(batch)
+            if self.augment_data:
+                batch = self._augment_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        loss_5p, loss_grip = self.calculate_loss(pcd, robot_state_obs, robot_state_pred)
+        loss = self.l_w["5p"] * loss_5p + self.l_w["grip"] * loss_grip
+        self.logger.log_metrics(
+            {
+                "loss/train/5p": loss_5p.item(),
+                "loss/train/grip": loss_grip.item(),
+            }
+        )
+        return loss
+    def calculate_loss(
+        self, pcd: torch.Tensor, robot_state_obs: torch.Tensor, robot_state_pred: torch.Tensor
+    ):
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        ny: torch.Tensor = robot_state_pred
+        B = ny.shape[0]
+        T = ny.shape[1]
+        # Sample random time step
+        t_shape = [1] * len(ny.shape)
+        t_shape[0] = ny.shape[0]  # B
+        t = torch.rand(t_shape, device=DEVICE)
+        # Initialize start and end poses + gripper state
+        z0_5p = self._init_noise(B)
+        z1_5p = self._init_target(ny)
+        # Move to intermediate step
+        z_t = t * z1_5p + (1.0 - t) * z0_5p
+        # Calculate relative change between them
+        target_vel = z1_5p - z0_5p
+        # Do prediction
+        timesteps = t.squeeze() * self.pos_emb_scale if self.time_conditioning else None
+        pred_vel = self.diffusion_net(z_t, timesteps, global_cond=nx)
+        assert pred_vel.shape == (B, T, 16)
+        # Calculate loss
+        loss_5p = self.loss_fun(pred_vel[..., :15], target_vel[..., :15])
+        loss_grip = self.loss_fun(pred_vel[..., 15], target_vel[..., 15])
+        return loss_5p, loss_grip
+    # ############### Inference ################
+    def eval_forward(self, batch: tuple[torch.Tensor, ...], outputs=None) -> torch.Tensor:
+        """
+        batch: the output of the eval dataloader
+        outputs: the output of the forward pass
+        """
+        batch = self._norm_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        # Eval loss
+        loss_5p, loss_grip = self.calculate_loss(pcd, robot_state_obs, robot_state_pred)
+        loss_total = self.l_w["5p"] * loss_5p + self.l_w["grip"] * loss_grip
+        self.logger.log_metrics(
+            {
+                "loss/eval/5p": loss_5p.item(),
+                "loss/eval/grip": loss_grip.item(),
+                "loss/eval/total": loss_total.item(),
+            }
+        )
+        # Eval metrics
+        pred_y = self.infer_y(pcd, robot_state_obs)
+        mse_xyz = nn.functional.mse_loss(pred_y[..., :3], robot_state_pred[..., :3])
+        mse_rot6d = nn.functional.mse_loss(pred_y[..., 3:9], robot_state_pred[..., 3:9])
+        mse_grip = nn.functional.mse_loss(pred_y[..., 9], robot_state_pred[..., 9])
+        self.logger.log_metrics(
+            {
+                "metrics/eval/mse_xyz": mse_xyz.item(),
+                "metrics/eval/mse_rot6d": mse_rot6d.item(),
+                "metrics/eval/mse_grip": mse_grip.item(),
+            }
+        )
+        return pred_y
+    def infer_y(
+        self,
+        pcd: torch.Tensor,
+        robot_state_obs: torch.Tensor,
+        noise=None,
+        return_traj=False,
+    ) -> torch.Tensor:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        B = nx.shape[0]
+        z = self._init_noise(B) if noise is None else noise
+        traj = [state5p_to_pfp_th(z)]
+        t0, dt = get_timesteps(self.flow_schedule, self.num_k_infer, exp_scale=self.exp_scale)
+        for i in range(self.num_k_infer):
+            timesteps = torch.ones((B), device=DEVICE) * t0[i]
+            timesteps *= self.pos_emb_scale
+            vel_pred = self.diffusion_net(z, timesteps, global_cond=nx)
+            z = z.detach().clone() + vel_pred * dt[i]
+            traj.append(state5p_to_pfp_th(z))
+        if return_traj:
+            return torch.stack(traj)
+        return traj[-1]
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        ckpt_name: str,
+        ckpt_episode: str,
+        num_k_infer: int,
+        flow_schedule: str = None,
+        exp_scale: float = None,
+    ):
+        ckpt_dir = REPO_DIRS.CKPT / ckpt_name
+        ckpt_path_list = list(ckpt_dir.glob(f"{ckpt_episode}*"))
+        assert len(ckpt_path_list) > 0, f"No checkpoint found in {ckpt_dir} with {ckpt_episode}"
+        assert len(ckpt_path_list) < 2, f"Multiple ckpts found in {ckpt_dir} with {ckpt_episode}"
+        ckpt_fpath = ckpt_path_list[0]
+        state_dict = torch.load(ckpt_fpath, map_location=DEVICE)
+        cfg = OmegaConf.load(ckpt_dir / "config.yaml")
+        # cfg.model.obs_encoder.encoder.random_crop = False
+        assert cfg.model._target_.split(".")[-1] == cls.__name__
+        model: FM5PPolicy = hydra.utils.instantiate(cfg.model)
+        model.load_state_dict(state_dict["state"]["model"])
+        model.to(DEVICE)
+        model.eval()
+        if flow_schedule is not None:
+            model.set_flow_schedule(flow_schedule, exp_scale)
+        if num_k_infer is not None:
+            model.set_num_k_infer(num_k_infer)
+        return model
+class FM5PPolicyImage(FM5PPolicy):
+    def _norm_obs(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Image normalization is already done in the backbone, so here we just make it float
+        """
+        image = image.float() / 255.0
+        return image

third_party/PointFlowMatch/pfp/policy/fm_policy.py ADDED Viewed

	@@ -0,0 +1,298 @@

+from __future__ import annotations
+import hydra
+import torch
+import torch.nn as nn
+import pypose as pp
+from omegaconf import OmegaConf
+from pfp.policy.base_policy import BasePolicy
+from pfp import DEVICE, REPO_DIRS
+from pfp.common.se3_utils import init_random_traj_th
+from pfp.common.fm_utils import get_timesteps
+from pfp.data.dataset_pcd import augment_pcd_data
+try:
+    from composer.models import ComposerModel
+except Exception:
+    class _NullLogger:
+        def log_metrics(self, *args, **kwargs):
+            return
+    class ComposerModel(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.logger = _NullLogger()
+class FMPolicy(ComposerModel, BasePolicy):
+    def __init__(
+        self,
+        x_dim: int,
+        y_dim: int,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        num_k_infer: int,
+        time_conditioning: bool,
+        obs_encoder: nn.Module,
+        diffusion_net: nn.Module,
+        augment_data: bool = False,
+        loss_weights: dict[int] = None,
+        pos_emb_scale: int = 20,
+        norm_pcd_center: list = None,
+        noise_type: str = "gaussian",
+        noise_scale: float = 1.0,
+        loss_type: str = "l2",
+        flow_schedule: str = "linear",
+        exp_scale: float = None,
+        snr_sampler: str = "uniform",
+        subs_factor: int = 1,
+    ) -> None:
+        ComposerModel.__init__(self)
+        BasePolicy.__init__(self, n_obs_steps, subs_factor)
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_pred_steps = n_pred_steps
+        self.pos_emb_scale = pos_emb_scale
+        self.num_k_infer = num_k_infer
+        self.time_conditioning = time_conditioning
+        self.obs_encoder = obs_encoder
+        self.diffusion_net = diffusion_net
+        self.norm_pcd_center = norm_pcd_center
+        self.augment_data = augment_data
+        self.noise_type = noise_type
+        self.noise_scale = noise_scale
+        self.ny_shape = (n_pred_steps, y_dim)
+        self.l_w = loss_weights
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        self.snr_sampler = snr_sampler
+        if loss_type == "l2":
+            self.loss_fun = nn.MSELoss()
+        elif loss_type == "l1":
+            self.loss_fun = nn.L1Loss()
+        else:
+            raise NotImplementedError
+        return
+    def set_num_k_infer(self, num_k_infer: int):
+        self.num_k_infer = num_k_infer
+        return
+    def set_flow_schedule(self, flow_schedule: str, exp_scale: float):
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        return
+    def _norm_obs(self, pcd: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        pcd[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        return pcd
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        robot_state[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] -= torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        robot_state[..., :3] += torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] += torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _norm_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        pcd = self._norm_obs(pcd)
+        robot_state_obs = self._norm_robot_state(robot_state_obs)
+        robot_state_pred = self._norm_robot_state(robot_state_pred)
+        return pcd, robot_state_obs, robot_state_pred
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        return augment_pcd_data(batch)
+    def _init_noise(self, batch_size: int) -> torch.Tensor:
+        B = batch_size
+        T = self.n_pred_steps
+        if self.noise_type == "gaussian":
+            noise = torch.randn((batch_size, *self.ny_shape), device=DEVICE)
+            return noise * self.noise_scale
+        elif self.noise_type == "trajectory":
+            return init_random_traj_th(batch_size, self.n_pred_steps, self.noise_scale)
+        elif self.noise_type == "igso3":
+            noise_pos = torch.randn((B, T, 3), device=DEVICE)
+            noise_rot = pp.randn_SO3((B, T), device=DEVICE).matrix()
+            noise_gripper = torch.randn((B, T, 1), device=DEVICE)
+            noise = torch.cat(
+                [noise_pos, noise_rot[..., :3, 0], noise_rot[..., :3, 1], noise_gripper], dim=-1
+            )
+            return noise
+        else:
+            raise NotImplementedError
+    def _sample_snr(self, batch_size: int) -> torch.Tensor:
+        if self.snr_sampler == "uniform":
+            return torch.rand((batch_size, 1, 1), device=DEVICE)
+        elif self.snr_sampler == "logit_normal":
+            return torch.sigmoid(torch.randn((batch_size, 1, 1), device=DEVICE))
+        else:
+            raise NotImplementedError
+    # ############### Training ################
+    def forward(self, batch):
+        """batch is the output of the dataloader"""
+        return 0
+    def loss(self, outputs, batch: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        """
+        outputs: the output of the forward pass
+        batch: the output of the dataloader
+        """
+        with torch.no_grad():
+            batch = self._norm_data(batch)
+            if self.augment_data:
+                batch = self._augment_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        loss_xyz, loss_rot6d, loss_grip = self.calculate_loss(
+            pcd, robot_state_obs, robot_state_pred
+        )
+        loss = (
+            self.l_w["xyz"] * loss_xyz
+            + self.l_w["rot6d"] * loss_rot6d
+            + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/train/xyz": loss_xyz.item(),
+                "loss/train/rot6d": loss_rot6d.item(),
+                "loss/train/grip": loss_grip.item(),
+            }
+        )
+        return loss
+    def calculate_loss(
+        self, pcd: torch.Tensor, robot_state_obs: torch.Tensor, robot_state_pred: torch.Tensor
+    ):
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        ny: torch.Tensor = robot_state_pred
+        B = ny.shape[0]
+        t = self._sample_snr(B)
+        z0 = self._init_noise(ny.shape[0])
+        z1 = ny
+        z_t = t * z1 + (1.0 - t) * z0
+        target_vel = z1 - z0
+        timesteps = t.squeeze() * self.pos_emb_scale if self.time_conditioning else None
+        pred_vel = self.diffusion_net(z_t, timesteps, global_cond=nx)
+        loss_xyz = self.loss_fun(pred_vel[..., :3], target_vel[..., :3])
+        loss_rot6d = self.loss_fun(pred_vel[..., 3:9], target_vel[..., 3:9])
+        loss_grip = self.loss_fun(pred_vel[..., 9], target_vel[..., 9])
+        return loss_xyz, loss_rot6d, loss_grip
+    # ############### Inference ################
+    def eval_forward(self, batch: tuple[torch.Tensor, ...], outputs=None) -> torch.Tensor:
+        """
+        batch: the output of the eval dataloader
+        outputs: the output of the forward pass
+        """
+        batch = self._norm_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        # Eval loss
+        loss_xyz, loss_rot6d, loss_grip = self.calculate_loss(
+            pcd, robot_state_obs, robot_state_pred
+        )
+        loss_total = (
+            self.l_w["xyz"] * loss_xyz
+            + self.l_w["rot6d"] * loss_rot6d
+            + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/eval/xyz": loss_xyz.item(),
+                "loss/eval/rot6d": loss_rot6d.item(),
+                "loss/eval/grip": loss_grip.item(),
+                "loss/eval/total": loss_total.item(),
+            }
+        )
+        # Eval metrics
+        pred_y = self.infer_y(pcd, robot_state_obs)
+        mse_xyz = nn.functional.mse_loss(pred_y[..., :3], robot_state_pred[..., :3])
+        mse_rot6d = nn.functional.mse_loss(pred_y[..., 3:9], robot_state_pred[..., 3:9])
+        mse_grip = nn.functional.mse_loss(pred_y[..., 9], robot_state_pred[..., 9])
+        self.logger.log_metrics(
+            {
+                "metrics/eval/mse_xyz": mse_xyz.item(),
+                "metrics/eval/mse_rot6d": mse_rot6d.item(),
+                "metrics/eval/mse_grip": mse_grip.item(),
+            }
+        )
+        return pred_y
+    def infer_y(
+        self,
+        pcd: torch.Tensor,
+        robot_state_obs: torch.Tensor,
+        noise=None,
+        return_traj=False,
+    ) -> torch.Tensor:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        B = nx.shape[0]
+        z = self._init_noise(B) if noise is None else noise
+        traj = [z]
+        t0, dt = get_timesteps(self.flow_schedule, self.num_k_infer, exp_scale=self.exp_scale)
+        for i in range(self.num_k_infer):
+            timesteps = torch.ones((B), device=DEVICE) * t0[i]
+            timesteps *= self.pos_emb_scale
+            vel_pred = self.diffusion_net(z, timesteps, global_cond=nx)
+            z = z.detach().clone() + vel_pred * dt[i]
+            traj.append(z)
+        if return_traj:
+            return torch.stack(traj)
+        return traj[-1]
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        ckpt_name: str,
+        ckpt_episode: str,
+        num_k_infer: int,
+        flow_schedule: str = None,
+        exp_scale: float = None,
+        subs_factor: int = 1,
+    ):
+        ckpt_dir = REPO_DIRS.CKPT / ckpt_name
+        ckpt_path_list = list(ckpt_dir.glob(f"{ckpt_episode}*"))
+        assert len(ckpt_path_list) > 0, f"No checkpoint found in {ckpt_dir} with {ckpt_episode}"
+        assert len(ckpt_path_list) < 2, f"Multiple ckpts found in {ckpt_dir} with {ckpt_episode}"
+        ckpt_fpath = ckpt_path_list[0]
+        state_dict = torch.load(ckpt_fpath, map_location=DEVICE, weights_only=False)
+        cfg = OmegaConf.load(ckpt_dir / "config.yaml")
+        # cfg.model.obs_encoder.encoder.random_crop = False
+        cfg.model.subs_factor = subs_factor
+        assert cfg.model._target_.split(".")[-1] == cls.__name__
+        model: FMPolicy = hydra.utils.instantiate(cfg.model)
+        model.load_state_dict(state_dict["state"]["model"])
+        model.to(DEVICE)
+        model.eval()
+        if flow_schedule is not None:
+            model.set_flow_schedule(flow_schedule, exp_scale)
+        if num_k_infer is not None:
+            model.set_num_k_infer(num_k_infer)
+        return model
+class FMPolicyImage(FMPolicy):
+    def _norm_obs(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Image normalization is already done in the backbone, so here we just make it float
+        """
+        image = image.float() / 255.0
+        return image
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        raise NotImplementedError

third_party/PointFlowMatch/pfp/policy/fm_se3_policy.py ADDED Viewed

	@@ -0,0 +1,270 @@

+from __future__ import annotations
+import hydra
+import torch
+import torch.nn as nn
+import pypose as pp
+from omegaconf import OmegaConf
+from composer.models import ComposerModel
+from pfp.policy.base_policy import BasePolicy
+from pfp import DEVICE, REPO_DIRS
+from pfp.common.se3_utils import pfp_to_pose_th
+from pfp.common.fm_utils import get_timesteps
+class FMSE3Policy(ComposerModel, BasePolicy):
+    def __init__(
+        self,
+        x_dim: int,
+        y_dim: int,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        num_k_infer: int,
+        obs_encoder: nn.Module,
+        diffusion_net: nn.Module,
+        augment_data: bool,
+        loss_weights: dict[int],
+        norm_pcd_center: list,
+        loss_type: str,
+        pos_emb_scale: int = 20,
+        flow_schedule: str = "linear",
+        exp_scale: float = None,
+    ) -> None:
+        ComposerModel.__init__(self)
+        BasePolicy.__init__(self, n_obs_steps)
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_pred_steps = n_pred_steps
+        self.pos_emb_scale = pos_emb_scale
+        self.num_k_infer = num_k_infer
+        self.obs_encoder = obs_encoder
+        self.diffusion_net = diffusion_net
+        self.norm_pcd_center = norm_pcd_center
+        self.augment_data = augment_data
+        self.ny_shape = (n_pred_steps, y_dim)
+        self.l_w = loss_weights
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        if loss_type == "l2":
+            self.loss_fun = nn.MSELoss()
+        elif loss_type == "l1":
+            self.loss_fun = nn.L1Loss()
+        else:
+            raise NotImplementedError
+        return
+    def set_num_k_infer(self, num_k_infer: int):
+        self.num_k_infer = num_k_infer
+        return
+    def set_flow_schedule(self, flow_schedule: str, exp_scale: float):
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        return
+    def _norm_obs(self, pcd: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        pcd[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        return pcd
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        robot_state[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] -= torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        robot_state[..., :3] += torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] += torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _norm_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        pcd = self._norm_obs(pcd)
+        robot_state_obs = self._norm_robot_state(robot_state_obs)
+        robot_state_pred = self._norm_robot_state(robot_state_pred)
+        return pcd, robot_state_obs, robot_state_pred
+    def _rand_range(self, low: float, high: float, size: tuple[int]) -> torch.Tensor:
+        return torch.rand(size, device=DEVICE) * (high - low) + low
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        # xyz1 = self._rand_range(low=0.8, high=1.2, size=(3,))
+        xyz2 = self._rand_range(low=-0.2, high=0.2, size=(3,))
+        pcd[..., :3] = pcd[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_obs[..., :3] = robot_state_obs[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_pred[..., :3] = robot_state_pred[..., :3] + xyz2  # * xyz1 + xyz2
+        # We shuffle the points, i.e. shuffle pcd along dim=2 (B, T, P, 3)
+        idx = torch.randperm(pcd.shape[2])
+        pcd = pcd[:, :, idx, :]
+        return pcd, robot_state_obs, robot_state_pred
+    def _init_noise(self, batch_size: int) -> tuple[pp.SE3, torch.Tensor]:
+        B = batch_size
+        T = self.n_pred_steps
+        noise_pp = pp.randn_SE3((B, T), device=DEVICE)
+        noise_gripper = torch.zeros((B, T, 1), device=DEVICE)
+        return noise_pp, noise_gripper
+    def _init_target(self, ny: torch.Tensor) -> tuple[pp.SE3, torch.Tensor]:
+        """
+        ny: (B, T, 10) -> xyz, rot6d, grip
+        """
+        poses_th, gripper_th = pfp_to_pose_th(ny)  # (B, T, 4, 4)
+        poses_pp = pp.mat2SE3(poses_th, check=False)  # (B, T, 7)
+        return poses_pp, gripper_th
+    def _pp_to_pfp(self, z_pp: pp.SE3, z_gripper: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            z_pp: (B, T, 7) pp.SE3 pose
+            z_gripper: (B, T, 1) gripper
+        Returns:
+            z: (B, T, 10) pfp state
+        """
+        z = torch.zeros((*z_pp.shape[:-1], 10), device=DEVICE)
+        pose = pp.matrix(z_pp)
+        z[..., :3] = pose[..., :3, 3]
+        z[..., 3:9] = pose[..., :3, :2].mT.flatten(start_dim=-2)
+        z[..., 9:] = z_gripper
+        return z
+    # ############### Training ################
+    def forward(self, batch):
+        """batch is the output of the dataloader"""
+        return 0
+    def loss(self, outputs, batch: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        """
+        outputs: the output of the forward pass
+        batch: the output of the dataloader
+        """
+        with torch.no_grad():
+            batch = self._norm_data(batch)
+            if self.augment_data:
+                batch = self._augment_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        ny: torch.Tensor = robot_state_pred
+        B = ny.shape[0]
+        T = ny.shape[1]
+        # Sample random time step
+        t_shape = (B, 1, 1)
+        t = torch.rand(t_shape, device=DEVICE)
+        # Initialize start and end poses + gripper state
+        z0_pp, z0_gripper = self._init_noise(B)
+        z1_pp, z1_gripper = self._init_target(ny)
+        # Calculate relative change between them
+        target_vel_pp = pp.Log(pp.Inv(z0_pp) @ z1_pp)
+        target_vel_gripper = z1_gripper - z0_gripper
+        # Move to intermediate step
+        zt_pp: pp.SE3 = z0_pp @ pp.Exp(target_vel_pp * t)
+        zt_gripper: torch.Tensor = z0_gripper + target_vel_gripper * t
+        # Convert to pfp network input representation
+        zt_pfp = self._pp_to_pfp(zt_pp, zt_gripper)
+        timesteps = t.squeeze() * self.pos_emb_scale
+        # Do prediction
+        pred_vel_pfp = self.diffusion_net(zt_pfp, timesteps, global_cond=nx)
+        assert pred_vel_pfp.shape == (B, T, 7)
+        pred_vel_pp = pred_vel_pfp[..., :6]
+        pred_vel_gripper = pred_vel_pfp[..., 6:]
+        # Calculate loss
+        loss_twist = self.loss_fun(pred_vel_pp, target_vel_pp)
+        loss_grip = self.loss_fun(pred_vel_gripper, target_vel_gripper)
+        loss = self.l_w["twist"] * loss_twist + self.l_w["grip"] * loss_grip
+        self.logger.log_metrics(
+            {
+                "loss/train/twist": loss_twist.item(),
+                "loss/train/grip": loss_grip.item(),
+            }
+        )
+        return loss
+    # ############### Inference ################
+    def eval_forward(self, batch: tuple[torch.Tensor, ...], outputs=None) -> torch.Tensor:
+        """
+        batch: the output of the eval dataloader
+        outputs: the output of the forward pass
+        """
+        batch = self._norm_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        pred_y = self.infer_y(pcd, robot_state_obs)
+        mse_xyz = nn.functional.mse_loss(pred_y[..., :3], robot_state_pred[..., :3])
+        mse_rot6d = nn.functional.mse_loss(pred_y[..., 3:9], robot_state_pred[..., 3:9])
+        mse_grip = nn.functional.mse_loss(pred_y[..., 9], robot_state_pred[..., 9])
+        self.logger.log_metrics(
+            {
+                "metrics/eval/mse_xyz": mse_xyz.item(),
+                "metrics/eval/mse_rot6d": mse_rot6d.item(),
+                "metrics/eval/mse_grip": mse_grip.item(),
+            }
+        )
+        return pred_y
+    def infer_y(
+        self,
+        pcd: torch.Tensor,
+        robot_state_obs: torch.Tensor,
+        noise=None,
+        return_traj=False,
+    ) -> torch.Tensor:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        B = nx.shape[0]
+        z_pp, z_gripper = self._init_noise(B) if noise is None else noise
+        z = self._pp_to_pfp(z_pp, z_gripper)
+        traj = [z]
+        t0, dt = get_timesteps(self.flow_schedule, self.num_k_infer, exp_scale=self.exp_scale)
+        for i in range(self.num_k_infer):
+            t = torch.ones((B), device=DEVICE) * t0[i]
+            timesteps = t * self.pos_emb_scale
+            pred_vel_pfp = self.diffusion_net(z, timesteps, global_cond=nx)
+            pred_vel_pp = pp.se3(pred_vel_pfp[..., :6])
+            pred_vel_gripper = pred_vel_pfp[..., 6:]
+            z_pp = z_pp @ pp.Exp(pred_vel_pp * dt[i])
+            z_gripper = z_gripper + pred_vel_gripper * dt[i]
+            z = self._pp_to_pfp(z_pp, z_gripper)
+            traj.append(z)
+        return torch.stack(traj) if return_traj else traj[-1]
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        ckpt_name: str,
+        ckpt_episode: str,
+        num_k_infer: int,
+        flow_schedule: str = None,
+        exp_scale: float = None,
+    ):
+        ckpt_dir = REPO_DIRS.CKPT / ckpt_name
+        ckpt_path_list = list(ckpt_dir.glob(f"{ckpt_episode}*"))
+        assert len(ckpt_path_list) > 0, f"No checkpoint found in {ckpt_dir} with {ckpt_episode}"
+        assert len(ckpt_path_list) < 2, f"Multiple ckpts found in {ckpt_dir} with {ckpt_episode}"
+        ckpt_fpath = ckpt_path_list[0]
+        state_dict = torch.load(ckpt_fpath, map_location=DEVICE)
+        cfg = OmegaConf.load(ckpt_dir / "config.yaml")
+        # cfg.model.obs_encoder.encoder.random_crop = False
+        assert cfg.model._target_.split(".")[-1] == cls.__name__
+        model: FMSE3Policy = hydra.utils.instantiate(cfg.model)
+        model.load_state_dict(state_dict["state"]["model"])
+        model.to(DEVICE)
+        model.eval()
+        if flow_schedule is not None:
+            model.set_flow_schedule(flow_schedule, exp_scale)
+        if num_k_infer is not None:
+            model.set_num_k_infer(num_k_infer)
+        return model

third_party/PointFlowMatch/pfp/policy/fm_so3_policy.py ADDED Viewed

	@@ -0,0 +1,341 @@

+from __future__ import annotations
+import hydra
+import torch
+import torch.nn as nn
+import pypose as pp
+from omegaconf import OmegaConf
+from composer.models import ComposerModel
+from pfp.policy.base_policy import BasePolicy
+from pfp import DEVICE, REPO_DIRS
+from pfp.common.se3_utils import pfp_to_pose_th
+from pfp.common.fm_utils import get_timesteps
+class FMSO3Policy(ComposerModel, BasePolicy):
+    def __init__(
+        self,
+        x_dim: int,
+        y_dim: int,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        num_k_infer: int,
+        obs_encoder: nn.Module,
+        diffusion_net: nn.Module,
+        augment_data: bool,
+        loss_weights: dict[int],
+        norm_pcd_center: list,
+        loss_type: str,
+        pos_emb_scale: int = 20,
+        flow_schedule: str = "linear",
+        exp_scale: float = None,
+        snr_sampler: str = "uniform",
+        noise_type: str = "uniform",  # uniform | biased
+    ) -> None:
+        ComposerModel.__init__(self)
+        BasePolicy.__init__(self, n_obs_steps)
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_pred_steps = n_pred_steps
+        self.pos_emb_scale = pos_emb_scale
+        self.num_k_infer = num_k_infer
+        self.obs_encoder = obs_encoder
+        self.diffusion_net = diffusion_net
+        self.norm_pcd_center = norm_pcd_center
+        self.augment_data = augment_data
+        self.ny_shape = (n_pred_steps, y_dim)
+        self.l_w = loss_weights
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        self.snr_sampler = snr_sampler
+        self.noise_type = noise_type
+        if loss_type == "l2":
+            self.loss_fun = nn.MSELoss()
+        elif loss_type == "l1":
+            self.loss_fun = nn.L1Loss()
+        else:
+            raise NotImplementedError
+        return
+    def set_num_k_infer(self, num_k_infer: int):
+        self.num_k_infer = num_k_infer
+        return
+    def set_flow_schedule(self, flow_schedule: str, exp_scale: float):
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        return
+    def _norm_obs(self, pcd: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        pcd[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        return pcd
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        robot_state[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] -= torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        robot_state[..., :3] += torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] += torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _norm_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        pcd = self._norm_obs(pcd)
+        robot_state_obs = self._norm_robot_state(robot_state_obs)
+        robot_state_pred = self._norm_robot_state(robot_state_pred)
+        return pcd, robot_state_obs, robot_state_pred
+    def _rand_range(self, low: float, high: float, size: tuple[int]) -> torch.Tensor:
+        return torch.rand(size, device=DEVICE) * (high - low) + low
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        # xyz1 = self._rand_range(low=0.8, high=1.2, size=(3,))
+        xyz2 = self._rand_range(low=-0.2, high=0.2, size=(3,))
+        pcd[..., :3] = pcd[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_obs[..., :3] = robot_state_obs[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_pred[..., :3] = robot_state_pred[..., :3] + xyz2  # * xyz1 + xyz2
+        # We shuffle the points, i.e. shuffle pcd along dim=2 (B, T, P, 3)
+        idx = torch.randperm(pcd.shape[2])
+        pcd = pcd[:, :, idx, :]
+        return pcd, robot_state_obs, robot_state_pred
+    def _init_noise(
+        self, batch_size: int, robot_state_obs: torch.Tensor
+    ) -> tuple[torch.Tensor, pp.SO3, torch.Tensor]:
+        B = batch_size
+        T = self.n_pred_steps
+        noise_xyz = torch.randn((B, T, 3), device=DEVICE)
+        noise_gripper = torch.randn((B, T, 1), device=DEVICE)
+        if self.noise_type == "uniform":
+            noise_SO3 = pp.randn_SO3((B, T), device=DEVICE)
+        elif self.noise_type == "biased":
+            random_euler = torch.FloatTensor(B, T, 3).uniform_(-torch.pi / 2, torch.pi / 2)
+            random_so3 = pp.Log(pp.euler2SO3(random_euler.to(DEVICE)))
+            _, cur_SO3, _ = self._pfp_to_pp(robot_state_obs)
+            start_SO3 = cur_SO3[:, -1:, :].expand(B, T, 4)  # Just take the current pose
+            noise_SO3 = start_SO3 @ pp.Exp(random_so3)
+        else:
+            raise NotImplementedError
+        return noise_xyz, noise_SO3, noise_gripper
+    def _pfp_to_pp(self, pfp_state: torch.Tensor) -> tuple[pp.SE3, torch.Tensor]:
+        """
+        pfp_state: (B, T, 10) -> xyz, rot6d, grip
+        """
+        poses_th, gripper_th = pfp_to_pose_th(pfp_state)  # (B, T, 4, 4)
+        xyz = poses_th[..., :3, 3]
+        rot_SO3 = pp.mat2SO3(poses_th[..., :3, :3], check=False)  # (B, T, 4)
+        gripper = gripper_th
+        return xyz, rot_SO3, gripper
+    def _sample_snr(self, batch_size: int) -> torch.Tensor:
+        if self.snr_sampler == "uniform":
+            return torch.rand((batch_size, 1, 1), device=DEVICE)
+        elif self.snr_sampler == "logit_normal":
+            return torch.sigmoid(torch.randn((batch_size, 1, 1), device=DEVICE))
+        else:
+            raise NotImplementedError
+    def _pp_to_pfp(
+        self, z_xyz: torch.Tensor, z_SO3: pp.SO3, z_gripper: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Args:
+            z_xyz: (B, T, 3) xyz
+            z_SO3: (B, T, 4) pp.SO3 rotation
+            z_gripper: (B, T, 1) gripper
+        Returns:
+            z: (B, T, 10) pfp state
+        """
+        B, T, _ = z_xyz.shape
+        z = torch.zeros((B, T, 10), device=DEVICE)
+        rot = pp.matrix(z_SO3)
+        z[..., :3] = z_xyz
+        z[..., 3:9] = rot[..., :3, :2].mT.flatten(start_dim=-2)
+        z[..., 9:] = z_gripper
+        return z
+    # ############### Training ################
+    def forward(self, batch):
+        """batch is the output of the dataloader"""
+        return 0
+    def loss(self, outputs, batch: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        """
+        outputs: the output of the forward pass
+        batch: the output of the dataloader
+        """
+        with torch.no_grad():
+            batch = self._norm_data(batch)
+            if self.augment_data:
+                batch = self._augment_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        loss_xyz, loss_so3, loss_grip = self.calculate_loss(pcd, robot_state_obs, robot_state_pred)
+        loss = (
+            self.l_w["xyz"] * loss_xyz + self.l_w["so3"] * loss_so3 + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/train/xyz": loss_xyz.item(),
+                "loss/train/so3": loss_so3.item(),
+                "loss/train/grip": loss_grip.item(),
+            }
+        )
+        return loss
+    def calculate_loss(
+        self, pcd: torch.Tensor, robot_state_obs: torch.Tensor, robot_state_pred: torch.Tensor
+    ):
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        ny: torch.Tensor = robot_state_pred
+        B = ny.shape[0]
+        T = ny.shape[1]
+        # Sample random time step
+        t = self._sample_snr(B)
+        # Initialize start and end poses + gripper state
+        z0_xyz, z0_SO3, z0_gripper = self._init_noise(B, robot_state_obs)
+        z1_xyz, z1_SO3, z1_gripper = self._pfp_to_pp(ny)
+        # Calculate relative change between them
+        target_vel_xyz = z1_xyz - z0_xyz
+        target_vel_so3 = pp.Log(pp.Inv(z0_SO3) @ z1_SO3)
+        target_vel_gripper = z1_gripper - z0_gripper
+        # Move to intermediate step
+        zt_xyz = z0_xyz + target_vel_xyz * t
+        zt_SO3: pp.SO3 = z0_SO3 @ pp.Exp(target_vel_so3 * t)
+        zt_gripper: torch.Tensor = z0_gripper + target_vel_gripper * t
+        # Convert to pfp network input representation
+        zt_pfp = self._pp_to_pfp(zt_xyz, zt_SO3, zt_gripper)
+        timesteps = t.squeeze() * self.pos_emb_scale
+        # Do prediction
+        pred_vel_pfp = self.diffusion_net(zt_pfp, timesteps, global_cond=nx)
+        assert pred_vel_pfp.shape == (B, T, 7)
+        pred_vel_xyz = pred_vel_pfp[..., :3]
+        pred_vel_so3 = pred_vel_pfp[..., 3:6]
+        pred_vel_gripper = pred_vel_pfp[..., 6:]
+        # Calculate loss
+        loss_xyz = self.loss_fun(pred_vel_xyz, target_vel_xyz)
+        loss_so3 = self.loss_fun(pred_vel_so3, target_vel_so3)
+        loss_grip = self.loss_fun(pred_vel_gripper, target_vel_gripper)
+        return loss_xyz, loss_so3, loss_grip
+    # ############### Inference ################
+    def eval_forward(self, batch: tuple[torch.Tensor, ...], outputs=None) -> torch.Tensor:
+        """
+        batch: the output of the eval dataloader
+        outputs: the output of the forward pass
+        """
+        batch = self._norm_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        # Eval loss
+        loss_xyz, loss_so3, loss_grip = self.calculate_loss(pcd, robot_state_obs, robot_state_pred)
+        loss_total = (
+            self.l_w["xyz"] * loss_xyz + self.l_w["so3"] * loss_so3 + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/eval/xyz": loss_xyz.item(),
+                "loss/eval/so3": loss_so3.item(),
+                "loss/eval/grip": loss_grip.item(),
+                "loss/eval/total": loss_total.item(),
+            }
+        )
+        # Eval metrics
+        pred_y = self.infer_y(pcd, robot_state_obs)
+        mse_xyz = nn.functional.mse_loss(pred_y[..., :3], robot_state_pred[..., :3])
+        mse_rot6d = nn.functional.mse_loss(pred_y[..., 3:9], robot_state_pred[..., 3:9])
+        mse_grip = nn.functional.mse_loss(pred_y[..., 9], robot_state_pred[..., 9])
+        self.logger.log_metrics(
+            {
+                "metrics/eval/mse_xyz": mse_xyz.item(),
+                "metrics/eval/mse_rot6d": mse_rot6d.item(),
+                "metrics/eval/mse_grip": mse_grip.item(),
+            }
+        )
+        return pred_y
+    def infer_y(
+        self,
+        pcd: torch.Tensor,
+        robot_state_obs: torch.Tensor,
+        noise=None,
+        return_traj=False,
+    ) -> torch.Tensor:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        B = nx.shape[0]
+        z_xyz, z_SO3, z_gripper = self._init_noise(B, robot_state_obs) if noise is None else noise
+        z = self._pp_to_pfp(z_xyz, z_SO3, z_gripper)
+        traj = [z]
+        t0, dt = get_timesteps(self.flow_schedule, self.num_k_infer, exp_scale=self.exp_scale)
+        for i in range(self.num_k_infer):
+            t = torch.ones((B), device=DEVICE) * t0[i]
+            timesteps = t * self.pos_emb_scale
+            pred_vel_pfp = self.diffusion_net(z, timesteps, global_cond=nx)
+            pred_vel_xyz = pred_vel_pfp[..., :3]
+            pred_vel_so3 = pp.so3(pred_vel_pfp[..., 3:6])
+            pred_vel_gripper = pred_vel_pfp[..., 6:]
+            z_xyz = z_xyz + pred_vel_xyz * dt[i]
+            z_SO3 = z_SO3 @ pp.Exp(pred_vel_so3 * dt[i])
+            z_gripper = z_gripper + pred_vel_gripper * dt[i]
+            z = self._pp_to_pfp(z_xyz, z_SO3, z_gripper)
+            traj.append(z)
+        return torch.stack(traj) if return_traj else traj[-1]
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        ckpt_name: str,
+        ckpt_episode: str,
+        num_k_infer: int,
+        flow_schedule: str = None,
+        exp_scale: float = None,
+    ):
+        ckpt_dir = REPO_DIRS.CKPT / ckpt_name
+        ckpt_path_list = list(ckpt_dir.glob(f"{ckpt_episode}*"))
+        assert len(ckpt_path_list) > 0, f"No checkpoint found in {ckpt_dir} with {ckpt_episode}"
+        assert len(ckpt_path_list) < 2, f"Multiple ckpts found in {ckpt_dir} with {ckpt_episode}"
+        ckpt_fpath = ckpt_path_list[0]
+        state_dict = torch.load(ckpt_fpath, map_location=DEVICE)
+        cfg = OmegaConf.load(ckpt_dir / "config.yaml")
+        # cfg.model.obs_encoder.encoder.random_crop = False
+        assert cfg.model._target_.split(".")[-1] == cls.__name__
+        model: FMSO3Policy = hydra.utils.instantiate(cfg.model)
+        model.load_state_dict(state_dict["state"]["model"])
+        model.to(DEVICE)
+        model.eval()
+        if flow_schedule is not None:
+            model.set_flow_schedule(flow_schedule, exp_scale)
+        if num_k_infer is not None:
+            model.set_num_k_infer(num_k_infer)
+        return model
+class FMSO3PolicyImage(FMSO3Policy):
+    def _norm_obs(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Image normalization is already done in the backbone, so here we just make it float
+        """
+        image = image.float() / 255.0
+        return image

third_party/PointFlowMatch/pfp/policy/fm_so3delta_policy.py ADDED Viewed

	@@ -0,0 +1,332 @@

+from __future__ import annotations
+import hydra
+import torch
+import torch.nn as nn
+import pypose as pp
+from omegaconf import OmegaConf
+from composer.models import ComposerModel
+from pfp.policy.base_policy import BasePolicy
+from pfp import DEVICE, REPO_DIRS
+from pfp.common.se3_utils import pfp_to_pose_th, grahm_schmidt_th
+from pfp.common.fm_utils import get_timesteps
+class FMSO3DeltaPolicy(ComposerModel, BasePolicy):
+    def __init__(
+        self,
+        x_dim: int,
+        y_dim: int,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        num_k_infer: int,
+        obs_encoder: nn.Module,
+        diffusion_net: nn.Module,
+        augment_data: bool,
+        loss_weights: dict[int],
+        norm_pcd_center: list,
+        loss_type: str,
+        pos_emb_scale: int = 20,
+        flow_schedule: str = "linear",
+        exp_scale: float = None,
+    ) -> None:
+        ComposerModel.__init__(self)
+        BasePolicy.__init__(self, n_obs_steps)
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_pred_steps = n_pred_steps
+        self.pos_emb_scale = pos_emb_scale
+        self.num_k_infer = num_k_infer
+        self.obs_encoder = obs_encoder
+        self.diffusion_net = diffusion_net
+        self.norm_pcd_center = norm_pcd_center
+        self.augment_data = augment_data
+        self.ny_shape = (n_pred_steps, y_dim)
+        self.l_w = loss_weights
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        if loss_type == "l2":
+            self.loss_fun = nn.MSELoss()
+        elif loss_type == "l1":
+            self.loss_fun = nn.L1Loss()
+        else:
+            raise NotImplementedError
+        return
+    def set_num_k_infer(self, num_k_infer: int):
+        self.num_k_infer = num_k_infer
+        return
+    def set_flow_schedule(self, flow_schedule: str, exp_scale: float):
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        return
+    def _norm_obs(self, pcd: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        pcd[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        return pcd
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        robot_state[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] -= torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        robot_state[..., :3] += torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] += torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _norm_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        pcd = self._norm_obs(pcd)
+        robot_state_obs = self._norm_robot_state(robot_state_obs)
+        robot_state_pred = self._norm_robot_state(robot_state_pred)
+        return pcd, robot_state_obs, robot_state_pred
+    def _rand_range(self, low: float, high: float, size: tuple[int]) -> torch.Tensor:
+        return torch.rand(size, device=DEVICE) * (high - low) + low
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        # xyz1 = self._rand_range(low=0.8, high=1.2, size=(3,))
+        xyz2 = self._rand_range(low=-0.2, high=0.2, size=(3,))
+        pcd[..., :3] = pcd[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_obs[..., :3] = robot_state_obs[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_pred[..., :3] = robot_state_pred[..., :3] + xyz2  # * xyz1 + xyz2
+        # We shuffle the points, i.e. shuffle pcd along dim=2 (B, T, P, 3)
+        idx = torch.randperm(pcd.shape[2])
+        pcd = pcd[:, :, idx, :]
+        return pcd, robot_state_obs, robot_state_pred
+    def _init_noise(self, batch_size: int) -> tuple[torch.Tensor, pp.SO3, torch.Tensor]:
+        B = batch_size
+        T = self.n_pred_steps
+        noise_xyz = torch.randn((B, T, 3), device=DEVICE)
+        noise_SO3 = pp.randn_SO3((B, T), device=DEVICE)
+        noise_gripper = torch.randn((B, T, 1), device=DEVICE)
+        return noise_xyz, noise_SO3, noise_gripper
+    def _init_target(self, ny: torch.Tensor) -> tuple[pp.SE3, torch.Tensor]:
+        """
+        ny: (B, T, 10) -> xyz, rot6d, grip
+        """
+        poses_th, gripper_th = pfp_to_pose_th(ny)  # (B, T, 4, 4)
+        target_xyz = poses_th[..., :3, 3]
+        target_SO3 = pp.mat2SO3(poses_th[..., :3, :3], check=False)  # (B, T, 4)
+        target_gripper = gripper_th
+        return target_xyz, target_SO3, target_gripper
+    def _pp_to_pfp(
+        self, z_xyz: torch.Tensor, z_SO3: pp.SO3, z_gripper: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Args:
+            z_xyz: (B, T, 3) xyz
+            z_SO3: (B, T, 4) pp.SO3 rotation
+            z_gripper: (B, T, 1) gripper
+        Returns:
+            z: (B, T, 10) pfp state
+        """
+        B, T, _ = z_xyz.shape
+        z = torch.zeros((B, T, 10), device=DEVICE)
+        rot = pp.matrix(z_SO3)
+        z[..., :3] = z_xyz
+        z[..., 3:9] = rot[..., :3, :2].mT.flatten(start_dim=-2)
+        z[..., 9:] = z_gripper
+        return z
+    # ############### Training ################
+    def forward(self, batch):
+        """batch is the output of the dataloader"""
+        return 0
+    def loss(self, outputs, batch: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        """
+        outputs: the output of the forward pass
+        batch: the output of the dataloader
+        """
+        with torch.no_grad():
+            batch = self._norm_data(batch)
+            if self.augment_data:
+                batch = self._augment_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        loss_xyz, loss_rot6d, loss_grip = self.calculate_loss(
+            pcd, robot_state_obs, robot_state_pred
+        )
+        loss = (
+            self.l_w["xyz"] * loss_xyz
+            + self.l_w["rot6d"] * loss_rot6d
+            + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/train/xyz": loss_xyz.item(),
+                "loss/train/rot6d": loss_rot6d.item(),
+                "loss/train/grip": loss_grip.item(),
+            }
+        )
+        return loss
+    def calculate_loss(
+        self, pcd: torch.Tensor, robot_state_obs: torch.Tensor, robot_state_pred: torch.Tensor
+    ):
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        ny: torch.Tensor = robot_state_pred
+        B = ny.shape[0]
+        T = ny.shape[1]
+        # Sample random time step
+        t_shape = (B, 1, 1)
+        t = torch.rand(t_shape, device=DEVICE)
+        # Initialize start and end poses + gripper state
+        z0_xyz, z0_SO3, z0_gripper = self._init_noise(B)
+        z1_xyz, z1_SO3, z1_gripper = self._init_target(ny)
+        # Calculate relative change between them
+        target_vel_xyz = z1_xyz - z0_xyz
+        target_delta_SO3 = pp.Inv(z0_SO3) @ z1_SO3
+        target_delta_R = pp.matrix(target_delta_SO3)[..., :3, :2].mT.flatten(start_dim=-2)
+        target_vel_so3 = pp.Log(target_delta_SO3)
+        target_vel_gripper = z1_gripper - z0_gripper
+        # Move to intermediate step
+        zt_xyz = z0_xyz + target_vel_xyz * t
+        zt_SO3: pp.SO3 = z0_SO3 @ pp.Exp(target_vel_so3 * t)
+        zt_gripper: torch.Tensor = z0_gripper + target_vel_gripper * t
+        # Convert to pfp network input representation
+        zt_pfp = self._pp_to_pfp(zt_xyz, zt_SO3, zt_gripper)
+        timesteps = t.squeeze() * self.pos_emb_scale
+        # Do prediction
+        pred_vel_pfp = self.diffusion_net(zt_pfp, timesteps, global_cond=nx)
+        assert pred_vel_pfp.shape == (B, T, 10)
+        pred_vel_xyz = pred_vel_pfp[..., :3]
+        pred_delta_R = pred_vel_pfp[..., 3:9]
+        # TODO: you could do gram schmidt here as well
+        pred_vel_gripper = pred_vel_pfp[..., 9:]
+        # Calculate loss
+        loss_xyz = self.loss_fun(pred_vel_xyz, target_vel_xyz)
+        loss_rot6d = self.loss_fun(pred_delta_R, target_delta_R)
+        loss_grip = self.loss_fun(pred_vel_gripper, target_vel_gripper)
+        return loss_xyz, loss_rot6d, loss_grip
+    # ############### Inference ################
+    def eval_forward(self, batch: tuple[torch.Tensor, ...], outputs=None) -> torch.Tensor:
+        """
+        batch: the output of the eval dataloader
+        outputs: the output of the forward pass
+        """
+        batch = self._norm_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        # Eval loss
+        loss_xyz, loss_rot6d, loss_grip = self.calculate_loss(
+            pcd, robot_state_obs, robot_state_pred
+        )
+        loss_total = (
+            self.l_w["xyz"] * loss_xyz
+            + self.l_w["rot6d"] * loss_rot6d
+            + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/eval/xyz": loss_xyz.item(),
+                "loss/eval/rot6d": loss_rot6d.item(),
+                "loss/eval/grip": loss_grip.item(),
+                "loss/eval/total": loss_total.item(),
+            }
+        )
+        # Eval metrics
+        pred_y = self.infer_y(pcd, robot_state_obs)
+        mse_xyz = nn.functional.mse_loss(pred_y[..., :3], robot_state_pred[..., :3])
+        mse_rot6d = nn.functional.mse_loss(pred_y[..., 3:9], robot_state_pred[..., 3:9])
+        mse_grip = nn.functional.mse_loss(pred_y[..., 9], robot_state_pred[..., 9])
+        self.logger.log_metrics(
+            {
+                "metrics/eval/mse_xyz": mse_xyz.item(),
+                "metrics/eval/mse_rot6d": mse_rot6d.item(),
+                "metrics/eval/mse_grip": mse_grip.item(),
+            }
+        )
+        return pred_y
+    def infer_y(
+        self,
+        pcd: torch.Tensor,
+        robot_state_obs: torch.Tensor,
+        noise=None,
+        return_traj=False,
+    ) -> torch.Tensor:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        B = nx.shape[0]
+        z_xyz, z_SO3, z_gripper = self._init_noise(B) if noise is None else noise
+        z = self._pp_to_pfp(z_xyz, z_SO3, z_gripper)
+        traj = [z]
+        t0, dt = get_timesteps(self.flow_schedule, self.num_k_infer, exp_scale=self.exp_scale)
+        for i in range(self.num_k_infer):
+            t = torch.ones((B), device=DEVICE) * t0[i]
+            timesteps = t * self.pos_emb_scale
+            pred_vel_pfp = self.diffusion_net(z, timesteps, global_cond=nx)
+            pred_vel_xyz = pred_vel_pfp[..., :3]
+            pred_delta_R = grahm_schmidt_th(pred_vel_pfp[..., 3:6], pred_vel_pfp[..., 6:9])
+            pred_delta_SO3 = pp.mat2SO3(pred_delta_R, check=False)
+            pred_vel_so3 = pp.Log(pred_delta_SO3)
+            pred_vel_gripper = pred_vel_pfp[..., 9:]
+            z_xyz = z_xyz + pred_vel_xyz * dt[i]
+            z_SO3 = z_SO3 @ pp.Exp(pred_vel_so3 * dt[i])
+            z_gripper = z_gripper + pred_vel_gripper * dt[i]
+            z = self._pp_to_pfp(z_xyz, z_SO3, z_gripper)
+            traj.append(z)
+        return torch.stack(traj) if return_traj else traj[-1]
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        ckpt_name: str,
+        ckpt_episode: str,
+        num_k_infer: int,
+        flow_schedule: str = None,
+        exp_scale: float = None,
+    ):
+        ckpt_dir = REPO_DIRS.CKPT / ckpt_name
+        ckpt_path_list = list(ckpt_dir.glob(f"{ckpt_episode}*"))
+        assert len(ckpt_path_list) > 0, f"No checkpoint found in {ckpt_dir} with {ckpt_episode}"
+        assert len(ckpt_path_list) < 2, f"Multiple ckpts found in {ckpt_dir} with {ckpt_episode}"
+        ckpt_fpath = ckpt_path_list[0]
+        state_dict = torch.load(ckpt_fpath, map_location=DEVICE)
+        cfg = OmegaConf.load(ckpt_dir / "config.yaml")
+        # cfg.model.obs_encoder.encoder.random_crop = False
+        assert cfg.model._target_.split(".")[-1] == cls.__name__
+        model: FMSO3DeltaPolicy = hydra.utils.instantiate(cfg.model)
+        model.load_state_dict(state_dict["state"]["model"])
+        model.to(DEVICE)
+        model.eval()
+        if flow_schedule is not None:
+            model.set_flow_schedule(flow_schedule, exp_scale)
+        if num_k_infer is not None:
+            model.set_num_k_infer(num_k_infer)
+        return model
+class FMSO3DeltaPolicyImage(FMSO3DeltaPolicy):
+    def _norm_obs(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Image normalization is already done in the backbone, so here we just make it float
+        """
+        image = image.float() / 255.0
+        return image

third_party/PointFlowMatch/pfp/policy/fm_target_policy.py ADDED Viewed

	@@ -0,0 +1,326 @@

+from __future__ import annotations
+import hydra
+import torch
+import torch.nn as nn
+import pypose as pp
+from omegaconf import OmegaConf
+from composer.models import ComposerModel
+from pfp.policy.base_policy import BasePolicy
+from pfp import DEVICE, REPO_DIRS
+from pfp.common.se3_utils import pfp_to_pose_th
+from pfp.common.fm_utils import get_timesteps
+class FMTargetPolicy(ComposerModel, BasePolicy):
+    def __init__(
+        self,
+        x_dim: int,
+        y_dim: int,
+        n_obs_steps: int,
+        n_pred_steps: int,
+        num_k_infer: int,
+        time_conditioning: bool,
+        obs_encoder: nn.Module,
+        diffusion_net: nn.Module,
+        augment_data: bool,
+        loss_weights: dict[int],
+        norm_pcd_center: list,
+        loss_type: str,
+        pos_emb_scale: int = 20,
+        flow_schedule: str = "linear",
+        exp_scale: float = None,
+    ) -> None:
+        ComposerModel.__init__(self)
+        BasePolicy.__init__(self, n_obs_steps)
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_pred_steps = n_pred_steps
+        self.pos_emb_scale = pos_emb_scale
+        self.num_k_infer = num_k_infer
+        self.time_conditioning = time_conditioning
+        self.obs_encoder = obs_encoder
+        self.diffusion_net = diffusion_net
+        self.norm_pcd_center = norm_pcd_center
+        self.augment_data = augment_data
+        self.ny_shape = (n_pred_steps, y_dim)
+        self.l_w = loss_weights
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        if loss_type == "l2":
+            self.loss_fun = nn.MSELoss()
+        elif loss_type == "l1":
+            self.loss_fun = nn.L1Loss()
+        else:
+            raise NotImplementedError
+        return
+    def set_num_k_infer(self, num_k_infer: int):
+        self.num_k_infer = num_k_infer
+        return
+    def set_flow_schedule(self, flow_schedule: str, exp_scale: float):
+        self.flow_schedule = flow_schedule
+        self.exp_scale = exp_scale
+        return
+    def _norm_obs(self, pcd: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        pcd[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        return pcd
+    def _norm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        # I only do centering here, no scaling, to keep the relative distances and interpretability
+        robot_state[..., :3] -= torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] -= torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _denorm_robot_state(self, robot_state: torch.Tensor) -> torch.Tensor:
+        robot_state[..., :3] += torch.tensor(self.norm_pcd_center, device=DEVICE)
+        robot_state[..., 9] += torch.tensor(0.5, device=DEVICE)
+        return robot_state
+    def _norm_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        pcd = self._norm_obs(pcd)
+        robot_state_obs = self._norm_robot_state(robot_state_obs)
+        robot_state_pred = self._norm_robot_state(robot_state_pred)
+        return pcd, robot_state_obs, robot_state_pred
+    def _rand_range(self, low: float, high: float, size: tuple[int]) -> torch.Tensor:
+        return torch.rand(size, device=DEVICE) * (high - low) + low
+    def _augment_data(self, batch: tuple[torch.Tensor, ...]) -> tuple[torch.Tensor, ...]:
+        pcd, robot_state_obs, robot_state_pred = batch
+        # xyz1 = self._rand_range(low=0.8, high=1.2, size=(3,))
+        xyz2 = self._rand_range(low=-0.2, high=0.2, size=(3,))
+        pcd[..., :3] = pcd[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_obs[..., :3] = robot_state_obs[..., :3] + xyz2  # * xyz1 + xyz2
+        robot_state_pred[..., :3] = robot_state_pred[..., :3] + xyz2  # * xyz1 + xyz2
+        # We shuffle the points, i.e. shuffle pcd along dim=2 (B, T, P, 3)
+        idx = torch.randperm(pcd.shape[2])
+        pcd = pcd[:, :, idx, :]
+        return pcd, robot_state_obs, robot_state_pred
+    def _init_noise(self, batch_size: int) -> tuple[torch.Tensor, pp.SO3, torch.Tensor]:
+        B = batch_size
+        T = self.n_pred_steps
+        noise_xyz = torch.randn((B, T, 3), device=DEVICE)
+        noise_SO3 = pp.randn_SO3((B, T), device=DEVICE)
+        noise_gripper = torch.randn((B, T, 1), device=DEVICE)
+        return noise_xyz, noise_SO3, noise_gripper
+    def _pfp_to_pp(self, pfp_state: torch.Tensor) -> tuple[pp.SE3, torch.Tensor]:
+        """
+        pfp_state: (B, T, 10) -> xyz, rot6d, grip
+        """
+        poses_th, gripper_th = pfp_to_pose_th(pfp_state)  # (B, T, 4, 4)
+        xyz = poses_th[..., :3, 3]
+        rot_SO3 = pp.mat2SO3(poses_th[..., :3, :3], check=False)  # (B, T, 4)
+        gripper = gripper_th
+        return xyz, rot_SO3, gripper
+    def _pp_to_pfp(
+        self, z_xyz: torch.Tensor, z_SO3: pp.SO3, z_gripper: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Args:
+            z_xyz: (B, T, 3) xyz
+            z_SO3: (B, T, 4) pp.SO3 rotation
+            z_gripper: (B, T, 1) gripper
+        Returns:
+            z: (B, T, 10) pfp state
+        """
+        B, T, _ = z_xyz.shape
+        z = torch.zeros((B, T, 10), device=DEVICE)
+        rot = pp.matrix(z_SO3)
+        z[..., :3] = z_xyz
+        z[..., 3:9] = rot[..., :3, :2].mT.flatten(start_dim=-2)
+        z[..., 9:] = z_gripper
+        return z
+    # ############### Training ################
+    def forward(self, batch):
+        """batch is the output of the dataloader"""
+        return 0
+    def loss(self, outputs, batch: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        """
+        outputs: the output of the forward pass
+        batch: the output of the dataloader
+        """
+        with torch.no_grad():
+            batch = self._norm_data(batch)
+            if self.augment_data:
+                batch = self._augment_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        loss_xyz, loss_rot6d, loss_grip = self.calculate_loss(
+            pcd, robot_state_obs, robot_state_pred
+        )
+        loss = (
+            self.l_w["xyz"] * loss_xyz
+            + self.l_w["rot6d"] * loss_rot6d
+            + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/train/xyz": loss_xyz.item(),
+                "loss/train/rot6d": loss_rot6d.item(),
+                "loss/train/grip": loss_grip.item(),
+            }
+        )
+        return loss
+    def calculate_loss(
+        self, pcd: torch.Tensor, robot_state_obs: torch.Tensor, robot_state_pred: torch.Tensor
+    ):
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        ny: torch.Tensor = robot_state_pred
+        B = ny.shape[0]
+        # T = ny.shape[1]
+        # Sample random time step
+        t_shape = (B, 1, 1)
+        t = torch.rand(t_shape, device=DEVICE)
+        # Initialize start and end poses + gripper state
+        z0_xyz, z0_SO3, z0_gripper = self._init_noise(B)
+        z1_xyz, z1_SO3, z1_gripper = self._pfp_to_pp(ny)
+        target_pfp = ny
+        # Calculate relative change between them
+        target_vel_xyz = z1_xyz - z0_xyz
+        target_vel_so3 = pp.Log(pp.Inv(z0_SO3) @ z1_SO3)
+        target_vel_gripper = z1_gripper - z0_gripper
+        # Move to intermediate step
+        zt_xyz = z0_xyz + target_vel_xyz * t
+        zt_SO3: pp.SO3 = z0_SO3 @ pp.Exp(target_vel_so3 * t)
+        zt_gripper: torch.Tensor = z0_gripper + target_vel_gripper * t
+        # Convert to pfp network input representation
+        zt_pfp = self._pp_to_pfp(zt_xyz, zt_SO3, zt_gripper)
+        timesteps = t.squeeze() * self.pos_emb_scale if self.time_conditioning else None
+        # Do prediction
+        pred_pfp = self.diffusion_net(zt_pfp, timesteps, global_cond=nx)
+        assert pred_pfp.shape == zt_pfp.shape
+        # TODO: you could do procrustes here
+        # Calculate loss
+        loss_xyz = self.loss_fun(pred_pfp[..., :3], target_pfp[..., :3])
+        loss_rot6d = self.loss_fun(pred_pfp[..., 3:9], target_pfp[..., 3:9])
+        loss_grip = self.loss_fun(pred_pfp[..., 9], target_pfp[..., 9])
+        return loss_xyz, loss_rot6d, loss_grip
+    # ############### Inference ################
+    def eval_forward(self, batch: tuple[torch.Tensor, ...], outputs=None) -> torch.Tensor:
+        """
+        batch: the output of the eval dataloader
+        outputs: the output of the forward pass
+        """
+        batch = self._norm_data(batch)
+        pcd, robot_state_obs, robot_state_pred = batch
+        # Eval loss
+        loss_xyz, loss_rot6d, loss_grip = self.calculate_loss(
+            pcd, robot_state_obs, robot_state_pred
+        )
+        loss_total = (
+            self.l_w["xyz"] * loss_xyz
+            + self.l_w["rot6d"] * loss_rot6d
+            + self.l_w["grip"] * loss_grip
+        )
+        self.logger.log_metrics(
+            {
+                "loss/eval/xyz": loss_xyz.item(),
+                "loss/eval/rot6d": loss_rot6d.item(),
+                "loss/eval/grip": loss_grip.item(),
+                "loss/eval/total": loss_total.item(),
+            }
+        )
+        # Eval metrics
+        pred_y = self.infer_y(pcd, robot_state_obs)
+        mse_xyz = nn.functional.mse_loss(pred_y[..., :3], robot_state_pred[..., :3])
+        mse_rot6d = nn.functional.mse_loss(pred_y[..., 3:9], robot_state_pred[..., 3:9])
+        mse_grip = nn.functional.mse_loss(pred_y[..., 9], robot_state_pred[..., 9])
+        self.logger.log_metrics(
+            {
+                "metrics/eval/mse_xyz": mse_xyz.item(),
+                "metrics/eval/mse_rot6d": mse_rot6d.item(),
+                "metrics/eval/mse_grip": mse_grip.item(),
+            }
+        )
+        return pred_y
+    def infer_y(
+        self,
+        pcd: torch.Tensor,
+        robot_state_obs: torch.Tensor,
+        noise=None,
+        return_traj=False,
+    ) -> torch.Tensor:
+        nx: torch.Tensor = self.obs_encoder(pcd, robot_state_obs)
+        B = nx.shape[0]
+        z_xyz, z_SO3, z_gripper = self._init_noise(B) if noise is None else noise
+        z = self._pp_to_pfp(z_xyz, z_SO3, z_gripper)
+        traj = [z]
+        t0, dt = get_timesteps(self.flow_schedule, self.num_k_infer, exp_scale=self.exp_scale)
+        for i in range(self.num_k_infer):
+            t = torch.ones((B), device=DEVICE) * t0[i]
+            timesteps = t * self.pos_emb_scale if self.time_conditioning else None
+            pred_final_pfp = self.diffusion_net(z, timesteps, global_cond=nx)
+            z1_xyz, z1_SO3, z1_gripper = self._pfp_to_pp(pred_final_pfp)
+            z_xyz = z_xyz + (z1_xyz - z_xyz) * dt[i]
+            z_SO3 = z_SO3 @ pp.Exp(pp.Log(pp.Inv(z_SO3) @ z1_SO3) * dt[i])
+            z_gripper = z_gripper + (z1_gripper - z_gripper) * dt[i]
+            z = self._pp_to_pfp(z_xyz, z_SO3, z_gripper)
+            traj.append(z)
+        return torch.stack(traj) if return_traj else traj[-1]
+    @classmethod
+    def load_from_checkpoint(
+        cls,
+        ckpt_name: str,
+        ckpt_episode: str,
+        num_k_infer: int,
+        flow_schedule: str = None,
+        exp_scale: float = None,
+    ):
+        ckpt_dir = REPO_DIRS.CKPT / ckpt_name
+        ckpt_path_list = list(ckpt_dir.glob(f"{ckpt_episode}*"))
+        assert len(ckpt_path_list) > 0, f"No checkpoint found in {ckpt_dir} with {ckpt_episode}"
+        assert len(ckpt_path_list) < 2, f"Multiple ckpts found in {ckpt_dir} with {ckpt_episode}"
+        ckpt_fpath = ckpt_path_list[0]
+        state_dict = torch.load(ckpt_fpath, map_location=DEVICE)
+        cfg = OmegaConf.load(ckpt_dir / "config.yaml")
+        # cfg.model.obs_encoder.encoder.random_crop = False
+        assert cfg.model._target_.split(".")[-1] == cls.__name__
+        model: FMTargetPolicy = hydra.utils.instantiate(cfg.model)
+        model.load_state_dict(state_dict["state"]["model"])
+        model.to(DEVICE)
+        model.eval()
+        if flow_schedule is not None:
+            model.set_flow_schedule(flow_schedule, exp_scale)
+        if num_k_infer is not None:
+            model.set_num_k_infer(num_k_infer)
+        return model
+class FMTargetPolicyImage(FMTargetPolicy):
+    def _norm_obs(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Image normalization is already done in the backbone, so here we just make it float
+        """
+        image = image.float() / 255.0
+        return image

third_party/PointFlowMatch/pyproject.toml ADDED Viewed

	@@ -0,0 +1,46 @@

+# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata
+# https://packaging.python.org/en/latest/tutorials/packaging-projects/
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.setuptools]
+py-modules = ["pfp"]
+[tool.black]
+line-length = 100
+[project]
+name = "pfp"
+version = "0.0.1"
+authors = [{ name = "Eugenio Chisari", email = "eugenio.chisari@gmail.com" }]
+description = ""
+readme = "README.md"
+requires-python = ">=3.8"
+dependencies = [
+    "numpy==1.23.5",
+    "spatialmath-python==1.1.9",
+    "prompt-toolkit==3.0.36",
+    "ipython<=8.17.2",
+    "trimesh==4.3.2",
+    "open3d==0.18.0",
+    "numba<=0.59.1",
+    "zarr<=2.17.2",
+    "matplotlib<=3.8.4",
+    "torch<=2.1.2",
+    "torchvision<=0.16.2",
+    "einops==0.7.0",
+    "diffusers==0.27.2",
+    "composer<=0.21.3",
+    "hydra-core==1.3.2",
+    "wandb<=0.17.3",
+    "av==8.1.0",
+    "yourdfpy==0.0.56",
+    "geomstats[pytorch]==2.7.0",
+    "imagecodecs"
+]
+[project.optional-dependencies]
+dev = []

third_party/PointFlowMatch/sandbox/augmentation.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import copy
+import numpy as np
+from torch.utils.data import DataLoader
+from pfp import DATA_DIRS
+from pfp.data.dataset_pcd import RobotDatasetPcd, augment_pcd_data
+from pfp.common.visualization import RerunViewer as RV
+from pfp.common.visualization import RerunTraj
+import rerun as rr
+rr_traj = {
+    "original_robot_obs": RerunTraj(),
+    "augmented_robot_obs": RerunTraj(),
+    "original_prediction": RerunTraj(),
+    "augmented_prediction": RerunTraj(),
+}
+def vis_batch(name, batch):
+    pcd, robot_state_obs, robot_state_pred = batch
+    pcd = pcd[0, -1].cpu().numpy()
+    robot_state_obs = robot_state_obs[0].cpu().numpy()
+    robot_state_pred = robot_state_pred[0].cpu().numpy()
+    RV.add_np_pointcloud(
+        f"vis/{name}_pcd", points=pcd[:, :3], colors_uint8=(pcd[:, 3:6] * 255).astype(np.uint8)
+    )
+    rr_traj[f"{name}_robot_obs"].add_traj(f"{name}_robot_obs", robot_state_obs, size=0.008)
+    rr_traj[f"{name}_prediction"].add_traj(f"{name}_prediction", robot_state_pred)
+    return
+RV("augmentation_vis")
+RV.add_axis("vis/origin", np.eye(4), timeless=True)
+task_name = "sponge_on_plate"
+data_path_train = DATA_DIRS.PFP_REAL / task_name / "train"
+dataset_train = RobotDatasetPcd(
+    data_path_train,
+    n_obs_steps=2,
+    n_pred_steps=32,
+    subs_factor=3,
+    use_pc_color=False,
+    n_points=4096,
+)
+dataloader_train = DataLoader(
+    dataset_train,
+    shuffle=False,
+    batch_size=1,
+    persistent_workers=False,
+)
+for i, batch in enumerate(dataloader_train):
+    rr.set_time_sequence("step", i)
+    original_batch = copy.deepcopy(batch)
+    vis_batch("original", original_batch)
+    augmented_batch = copy.deepcopy(batch)
+    augmented_batch = augment_pcd_data(augmented_batch)
+    vis_batch("augmented", augmented_batch)
+    if i > 500:
+        break

third_party/PointFlowMatch/sandbox/learning_rate.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+from torch.optim import AdamW
+from torch.optim.lr_scheduler import LambdaLR
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+epochs = 2000
+len_dataset = 10000
+params = torch.Tensor(1, 1, 1, 1)
+optimizer = AdamW([params], lr=1.0e-4, betas=[0.95, 0.999], eps=1.0e-8, weight_decay=1.0e-6)
+lr_scheduler: LambdaLR = get_scheduler(
+    "cosine",
+    optimizer=optimizer,
+    num_warmup_steps=500,
+    num_training_steps=(len_dataset * epochs),
+    # pytorch assumes stepping LRScheduler every epoch
+    # however huggingface diffusers steps it every batch
+)
+for epoch in range(epochs):
+    # for _ in range(len_dataset):
+    lr_scheduler.step()
+    # print(lr_scheduler.get_last_lr())
+    if epoch % 100 == 0:
+        print(f"Epoch: {epoch}, LR: {lr_scheduler.get_last_lr()}")