Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on 12 days ago

Commit

1d2e276

1 Parent(s): 5c8b822

update

Browse files

Files changed (15) hide show

embodied_gen/data/asset_converter.py +497 -0
embodied_gen/data/backproject_v2.py +10 -2
embodied_gen/envs/pick_embodiedgen.py +43 -14
embodied_gen/models/gs_model.py +16 -29
embodied_gen/scripts/compose_layout.py +2 -5
embodied_gen/scripts/gen_layout.py +6 -2
embodied_gen/scripts/imageto3d.py +2 -2
embodied_gen/scripts/parallel_sim.py +19 -1
embodied_gen/scripts/simulate_sapien.py +7 -7
embodied_gen/scripts/textto3d.py +1 -1
embodied_gen/utils/geometry.py +7 -5
embodied_gen/utils/monkey_patches.py +1 -1
embodied_gen/utils/simulation.py +40 -16
embodied_gen/utils/tags.py +1 -1
embodied_gen/validators/urdf_convertor.py +20 -1

embodied_gen/data/asset_converter.py ADDED Viewed

	@@ -0,0 +1,497 @@

+from __future__ import annotations
+import logging
+import os
+import xml.etree.ElementTree as ET
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from shutil import copy
+import trimesh
+from scipy.spatial.transform import Rotation
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+__all__ = [
+    "AssetConverterFactory",
+    "AssetType",
+    "MeshtoMJCFConverter",
+    "MeshtoUSDConverter",
+    "URDFtoUSDConverter",
+]
+@dataclass
+class AssetType(str):
+    """Asset type enumeration."""
+    MJCF = "mjcf"
+    USD = "usd"
+    URDF = "urdf"
+    MESH = "mesh"
+class AssetConverterBase(ABC):
+    """Converter abstract base class."""
+    @abstractmethod
+    def convert(self, urdf_path: str, output_path: str, **kwargs) -> str:
+        pass
+    def transform_mesh(
+        self, input_mesh: str, output_mesh: str, mesh_origin: ET.Element
+    ) -> None:
+        """Apply transform to the mesh based on the origin element in URDF."""
+        mesh = trimesh.load(input_mesh)
+        rpy = list(map(float, mesh_origin.get("rpy").split(" ")))
+        rotation = Rotation.from_euler("xyz", rpy, degrees=False)
+        offset = list(map(float, mesh_origin.get("xyz").split(" ")))
+        mesh.vertices = (mesh.vertices @ rotation.as_matrix().T) + offset
+        os.makedirs(os.path.dirname(output_mesh), exist_ok=True)
+        _ = mesh.export(output_mesh)
+        return
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        return False
+class MeshtoMJCFConverter(AssetConverterBase):
+    """Convert URDF files into MJCF format."""
+    def __init__(
+        self,
+        **kwargs,
+    ) -> None:
+        self.kwargs = kwargs
+    def _copy_asset_file(self, src: str, dst: str) -> None:
+        if os.path.exists(dst):
+            return
+        os.makedirs(os.path.dirname(dst), exist_ok=True)
+        copy(src, dst)
+    def add_geometry(
+        self,
+        mujoco_element: ET.Element,
+        link: ET.Element,
+        body: ET.Element,
+        tag: str,
+        input_dir: str,
+        output_dir: str,
+        mesh_name: str,
+        material: ET.Element | None = None,
+        is_collision: bool = False,
+    ) -> None:
+        """Add geometry to the MJCF body from the URDF link."""
+        element = link.find(tag)
+        geometry = element.find("geometry")
+        mesh = geometry.find("mesh")
+        filename = mesh.get("filename")
+        scale = mesh.get("scale", "1.0 1.0 1.0")
+        mesh_asset = ET.SubElement(
+            mujoco_element, "mesh", name=mesh_name, file=filename, scale=scale
+        )
+        geom = ET.SubElement(body, "geom", type="mesh", mesh=mesh_name)
+        self._copy_asset_file(
+            f"{input_dir}/{filename}",
+            f"{output_dir}/{filename}",
+        )
+        # Preprocess the mesh by applying rotation.
+        input_mesh = f"{input_dir}/{filename}"
+        output_mesh = f"{output_dir}/{filename}"
+        mesh_origin = element.find("origin")
+        if mesh_origin is not None:
+            self.transform_mesh(input_mesh, output_mesh, mesh_origin)
+        if material is not None:
+            geom.set("material", material.get("name"))
+        if is_collision:
+            geom.set("contype", "1")
+            geom.set("conaffinity", "1")
+            geom.set("rgba", "1 1 1 0")
+    def add_materials(
+        self,
+        mujoco_element: ET.Element,
+        link: ET.Element,
+        tag: str,
+        input_dir: str,
+        output_dir: str,
+        name: str,
+        reflectance: float = 0.2,
+    ) -> ET.Element:
+        """Add materials to the MJCF asset from the URDF link."""
+        element = link.find(tag)
+        geometry = element.find("geometry")
+        mesh = geometry.find("mesh")
+        filename = mesh.get("filename")
+        dirname = os.path.dirname(filename)
+        material = ET.SubElement(
+            mujoco_element,
+            "material",
+            name=f"material_{name}",
+            texture=f"texture_{name}",
+            reflectance=str(reflectance),
+        )
+        ET.SubElement(
+            mujoco_element,
+            "texture",
+            name=f"texture_{name}",
+            type="2d",
+            file=f"{dirname}/material_0.png",
+        )
+        self._copy_asset_file(
+            f"{input_dir}/{dirname}/material_0.png",
+            f"{output_dir}/{dirname}/material_0.png",
+        )
+        return material
+    def convert(self, urdf_path: str, mjcf_path: str):
+        """Convert a URDF file to MJCF format."""
+        tree = ET.parse(urdf_path)
+        root = tree.getroot()
+        mujoco_struct = ET.Element("mujoco")
+        mujoco_struct.set("model", root.get("name"))
+        mujoco_asset = ET.SubElement(mujoco_struct, "asset")
+        mujoco_worldbody = ET.SubElement(mujoco_struct, "worldbody")
+        input_dir = os.path.dirname(urdf_path)
+        output_dir = os.path.dirname(mjcf_path)
+        os.makedirs(output_dir, exist_ok=True)
+        for idx, link in enumerate(root.findall("link")):
+            link_name = link.get("name", "unnamed_link")
+            body = ET.SubElement(mujoco_worldbody, "body", name=link_name)
+            material = self.add_materials(
+                mujoco_asset,
+                link,
+                "visual",
+                input_dir,
+                output_dir,
+                name=str(idx),
+            )
+            self.add_geometry(
+                mujoco_asset,
+                link,
+                body,
+                "visual",
+                input_dir,
+                output_dir,
+                f"visual_mesh_{idx}",
+                material,
+            )
+            self.add_geometry(
+                mujoco_asset,
+                link,
+                body,
+                "collision",
+                input_dir,
+                output_dir,
+                f"collision_mesh_{idx}",
+                is_collision=True,
+            )
+        tree = ET.ElementTree(mujoco_struct)
+        ET.indent(tree, space="  ", level=0)
+        tree.write(mjcf_path, encoding="utf-8", xml_declaration=True)
+        logger.info(f"Successfully converted {urdf_path} → {mjcf_path}")
+class MeshtoUSDConverter(AssetConverterBase):
+    """Convert Mesh file from URDF into USD format."""
+    DEFAULT_BIND_APIS = [
+        "MaterialBindingAPI",
+        "PhysicsMeshCollisionAPI",
+        "PhysicsCollisionAPI",
+        "PhysxCollisionAPI",
+        "PhysicsMassAPI",
+        "PhysicsRigidBodyAPI",
+        "PhysxRigidBodyAPI",
+    ]
+    def __init__(
+        self,
+        force_usd_conversion: bool = True,
+        make_instanceable: bool = False,
+        simulation_app=None,
+        **kwargs,
+    ):
+        self.usd_parms = dict(
+            force_usd_conversion=force_usd_conversion,
+            make_instanceable=make_instanceable,
+            **kwargs,
+        )
+        if simulation_app is not None:
+            self.simulation_app = simulation_app
+    def __enter__(self):
+        from isaaclab.app import AppLauncher
+        if not hasattr(self, "simulation_app"):
+            launch_args = dict(
+                headless=True,
+                no_splash=True,
+                fast_shutdown=True,
+                disable_gpu=True,
+            )
+            self.app_launcher = AppLauncher(launch_args)
+            self.simulation_app = self.app_launcher.app
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Close the simulation app if it was created here
+        if hasattr(self, "app_launcher"):
+            self.simulation_app.close()
+        if exc_val is not None:
+            logger.error(f"Exception occurred: {exc_val}.")
+        return False
+    def convert(self, urdf_path: str, output_file: str):
+        """Convert a URDF file to USD and post-process collision meshes."""
+        from isaaclab.sim.converters import MeshConverter, MeshConverterCfg
+        from pxr import PhysxSchema, Sdf, Usd, UsdShade
+        tree = ET.parse(urdf_path)
+        root = tree.getroot()
+        mesh_file = root.find("link/visual/geometry/mesh").get("filename")
+        input_mesh = os.path.join(os.path.dirname(urdf_path), mesh_file)
+        output_dir = os.path.abspath(os.path.dirname(output_file))
+        output_mesh = f"{output_dir}/mesh/{os.path.basename(mesh_file)}"
+        mesh_origin = root.find("link/visual/origin")
+        if mesh_origin is not None:
+            self.transform_mesh(input_mesh, output_mesh, mesh_origin)
+        cfg = MeshConverterCfg(
+            asset_path=output_mesh,
+            usd_dir=output_dir,
+            usd_file_name=os.path.basename(output_file),
+            **self.usd_parms,
+        )
+        urdf_converter = MeshConverter(cfg)
+        usd_path = urdf_converter.usd_path
+        stage = Usd.Stage.Open(usd_path)
+        layer = stage.GetRootLayer()
+        with Usd.EditContext(stage, layer):
+            for prim in stage.Traverse():
+                # Change texture path to relative path.
+                if prim.GetName() == "material_0":
+                    shader = UsdShade.Shader(prim).GetInput("diffuse_texture")
+                    if shader.Get() is not None:
+                        relative_path = shader.Get().path.replace(
+                            f"{output_dir}/", ""
+                        )
+                        shader.Set(Sdf.AssetPath(relative_path))
+                # Add convex decomposition collision and set ShrinkWrap.
+                elif prim.GetName() == "mesh":
+                    approx_attr = prim.GetAttribute("physics:approximation")
+                    if not approx_attr:
+                        approx_attr = prim.CreateAttribute(
+                            "physics:approximation", Sdf.ValueTypeNames.Token
+                        )
+                    approx_attr.Set("convexDecomposition")
+                    physx_conv_api = (
+                        PhysxSchema.PhysxConvexDecompositionCollisionAPI.Apply(
+                            prim
+                        )
+                    )
+                    physx_conv_api.GetShrinkWrapAttr().Set(True)
+                    api_schemas = prim.GetMetadata("apiSchemas")
+                    if api_schemas is None:
+                        api_schemas = Sdf.TokenListOp()
+                    api_list = list(api_schemas.GetAddedOrExplicitItems())
+                    for api in self.DEFAULT_BIND_APIS:
+                        if api not in api_list:
+                            api_list.append(api)
+                    api_schemas.appendedItems = api_list
+                    prim.SetMetadata("apiSchemas", api_schemas)
+        layer.Save()
+        logger.info(f"Successfully converted {urdf_path} → {usd_path}")
+class URDFtoUSDConverter(MeshtoUSDConverter):
+    """Convert URDF files into USD format.
+    Args:
+        fix_base (bool): Whether to fix the base link.
+        merge_fixed_joints (bool): Whether to merge fixed joints.
+        make_instanceable (bool): Whether to make prims instanceable.
+        force_usd_conversion (bool): Force conversion to USD.
+        collision_from_visuals (bool): Generate collisions from visuals if not provided.
+    """
+    def __init__(
+        self,
+        fix_base: bool = False,
+        merge_fixed_joints: bool = False,
+        make_instanceable: bool = True,
+        force_usd_conversion: bool = True,
+        collision_from_visuals: bool = True,
+        joint_drive=None,
+        rotate_wxyz: tuple[float] | None = None,
+        simulation_app=None,
+        **kwargs,
+    ):
+        self.usd_parms = dict(
+            fix_base=fix_base,
+            merge_fixed_joints=merge_fixed_joints,
+            make_instanceable=make_instanceable,
+            force_usd_conversion=force_usd_conversion,
+            collision_from_visuals=collision_from_visuals,
+            joint_drive=joint_drive,
+            **kwargs,
+        )
+        self.rotate_wxyz = rotate_wxyz
+        if simulation_app is not None:
+            self.simulation_app = simulation_app
+    def convert(self, urdf_path: str, output_file: str):
+        """Convert a URDF file to USD and post-process collision meshes."""
+        from isaaclab.sim.converters import UrdfConverter, UrdfConverterCfg
+        from pxr import Gf, PhysxSchema, Sdf, Usd, UsdGeom
+        cfg = UrdfConverterCfg(
+            asset_path=urdf_path,
+            usd_dir=os.path.abspath(os.path.dirname(output_file)),
+            usd_file_name=os.path.basename(output_file),
+            **self.usd_parms,
+        )
+        urdf_converter = UrdfConverter(cfg)
+        usd_path = urdf_converter.usd_path
+        stage = Usd.Stage.Open(usd_path)
+        layer = stage.GetRootLayer()
+        with Usd.EditContext(stage, layer):
+            for prim in stage.Traverse():
+                if prim.GetName() == "collisions":
+                    approx_attr = prim.GetAttribute("physics:approximation")
+                    if not approx_attr:
+                        approx_attr = prim.CreateAttribute(
+                            "physics:approximation", Sdf.ValueTypeNames.Token
+                        )
+                    approx_attr.Set("convexDecomposition")
+                    physx_conv_api = (
+                        PhysxSchema.PhysxConvexDecompositionCollisionAPI.Apply(
+                            prim
+                        )
+                    )
+                    physx_conv_api.GetShrinkWrapAttr().Set(True)
+                    api_schemas = prim.GetMetadata("apiSchemas")
+                    if api_schemas is None:
+                        api_schemas = Sdf.TokenListOp()
+                    api_list = list(api_schemas.GetAddedOrExplicitItems())
+                    for api in self.DEFAULT_BIND_APIS:
+                        if api not in api_list:
+                            api_list.append(api)
+                    api_schemas.appendedItems = api_list
+                    prim.SetMetadata("apiSchemas", api_schemas)
+        if self.rotate_wxyz is not None:
+            inner_prim = next(
+                p
+                for p in stage.GetDefaultPrim().GetChildren()
+                if p.IsA(UsdGeom.Xform)
+            )
+            xformable = UsdGeom.Xformable(inner_prim)
+            xformable.ClearXformOpOrder()
+            orient_op = xformable.AddOrientOp(UsdGeom.XformOp.PrecisionDouble)
+            orient_op.Set(Gf.Quatd(*self.rotate_wxyz))
+        layer.Save()
+        logger.info(f"Successfully converted {urdf_path} → {usd_path}")
+class AssetConverterFactory:
+    """Factory class for creating asset converters based on target and source types."""
+    @staticmethod
+    def create(
+        target_type: AssetType, source_type: AssetType = "urdf", **kwargs
+    ) -> AssetConverterBase:
+        """Create an asset converter instance based on target and source types."""
+        if target_type == AssetType.MJCF and source_type == AssetType.URDF:
+            converter = MeshtoMJCFConverter(**kwargs)
+        elif target_type == AssetType.USD and source_type == AssetType.URDF:
+            converter = URDFtoUSDConverter(**kwargs)
+        elif target_type == AssetType.USD and source_type == AssetType.MESH:
+            converter = MeshtoUSDConverter(**kwargs)
+        else:
+            raise ValueError(
+                f"Unsupported converter type: {source_type} -> {target_type}."
+            )
+        return converter
+if __name__ == "__main__":
+    # target_asset_type = AssetType.MJCF
+    target_asset_type = AssetType.USD
+    urdf_paths = [
+        "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf",
+    ]
+    if target_asset_type == AssetType.MJCF:
+        output_files = [
+            "outputs/embodiedgen_assets/demo_assets/remote_control/mjcf/remote_control.mjcf",
+        ]
+        asset_converter = AssetConverterFactory.create(
+            target_type=AssetType.MJCF,
+            source_type=AssetType.URDF,
+        )
+    elif target_asset_type == AssetType.USD:
+        output_files = [
+            "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd",
+        ]
+        asset_converter = AssetConverterFactory.create(
+            target_type=AssetType.USD,
+            source_type=AssetType.MESH,
+        )
+    with asset_converter:
+        for urdf_path, output_file in zip(urdf_paths, output_files):
+            asset_converter.convert(urdf_path, output_file)
+    # urdf_path = "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf"
+    # output_file = "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd"
+    # asset_converter = AssetConverterFactory.create(
+    #     target_type=AssetType.USD,
+    #     source_type=AssetType.URDF,
+    #     rotate_wxyz=(0.7071, 0.7071, 0, 0),  # rotate 90 deg around the X-axis
+    # )
+    # with asset_converter:
+    #     asset_converter.convert(urdf_path, output_file)

embodied_gen/data/backproject_v2.py CHANGED Viewed

@@ -545,7 +545,7 @@ def parse_args():
         "--color_path",
         nargs="+",
         type=str,
-        help="Multiview color image in 6x512x512 file paths",
     )
     parser.add_argument(
         "--mesh_path",
@@ -625,7 +625,7 @@ def parse_args():
         action="store_true",
         help="Disable saving delight image",
     )
     args, unknown = parser.parse_known_args()
     return args
@@ -687,6 +687,14 @@ def entrypoint(
             num_views=1000,
             norm_mesh_ratio=0.5,
         )
         # Restore scale.
         mesh.vertices = mesh.vertices / scale
         mesh.vertices = mesh.vertices + center

         "--color_path",
         nargs="+",
         type=str,
+        help="Multiview color image in grid file paths",
     )
     parser.add_argument(
         "--mesh_path",
         action="store_true",
         help="Disable saving delight image",
     )
+    parser.add_argument("--n_max_faces", type=int, default=30000)
     args, unknown = parser.parse_known_args()
     return args
             num_views=1000,
             norm_mesh_ratio=0.5,
         )
+        if len(mesh.faces) > args.n_max_faces:
+            mesh.vertices, mesh.faces = mesh_fixer(
+                filter_ratio=0.8,
+                max_hole_size=0.04,
+                resolution=1024,
+                num_views=1000,
+                norm_mesh_ratio=0.5,
+            )
         # Restore scale.
         mesh.vertices = mesh.vertices / scale
         mesh.vertices = mesh.vertices + center

embodied_gen/envs/pick_embodiedgen.py CHANGED Viewed

@@ -16,7 +16,6 @@
 import json
 import os
-from copy import deepcopy
 import numpy as np
 import sapien
@@ -26,6 +25,7 @@ from mani_skill.envs.sapien_env import BaseEnv
 from mani_skill.sensors.camera import CameraConfig
 from mani_skill.utils import sapien_utils
 from mani_skill.utils.building import actors
 from mani_skill.utils.registration import register_env
 from mani_skill.utils.structs.actor import Actor
 from mani_skill.utils.structs.pose import Pose
@@ -78,6 +78,14 @@ class PickEmbodiedGen(BaseEnv):
         # Add small offset in z-axis to avoid collision.
         self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
         self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
         self.layouts = self.init_env_layouts(
             layout_file, num_envs, replace_objs
@@ -106,22 +114,30 @@ class PickEmbodiedGen(BaseEnv):
     def init_env_layouts(
         layout_file: str, num_envs: int, replace_objs: bool
     ) -> list[LayoutInfo]:
-        layout = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
         layouts = []
         for env_idx in range(num_envs):
             if replace_objs and env_idx > 0:
-                layout = bfs_placement(deepcopy(layout))
-            layouts.append(layout)
         return layouts
     @staticmethod
     def compute_robot_init_pose(
-        layouts: list[LayoutInfo], num_envs: int, z_offset: float = 0.0
     ) -> list[list[float]]:
         robot_pose = []
         for env_idx in range(num_envs):
-            layout = layouts[env_idx]
             robot_node = layout.relation[Scene3DItemEnum.ROBOT.value]
             x, y, z, qx, qy, qz, qw = layout.position[robot_node]
             robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz])
@@ -154,25 +170,32 @@ class PickEmbodiedGen(BaseEnv):
     @property
     def _default_human_render_camera_configs(self):
         pose = sapien_utils.look_at(
-            eye=[0.9, 0.0, 1.1], target=[0.0, 0.0, 0.9]
         )
         return CameraConfig(
-            "render_camera", pose, 256, 256, np.deg2rad(75), 0.01, 100
         )
     def _load_agent(self, options: dict):
         super()._load_agent(options, sapien.Pose(p=[-10, 0, 10]))
     def _load_scene(self, options: dict):
         all_objects = []
-        logger.info(f"Loading assets and decomposition mesh collisions...")
         for env_idx in range(self.num_envs):
             env_actors = load_assets_from_layout_file(
                 self.scene,
                 self.layouts[env_idx],
                 z_offset=self.objs_z_offset,
-                init_quat=self.init_quat,
                 env_idx=env_idx,
             )
             self.env_actors[f"env{env_idx}"] = env_actors
@@ -229,7 +252,7 @@ class PickEmbodiedGen(BaseEnv):
             self.agent.controller.controllers["gripper"].reset()
     def render_gs3d_images(
-        self, layouts: list[LayoutInfo], num_envs: int, init_quat: list[float]
     ) -> dict[str, np.ndarray]:
         sim_coord_align = (
             torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device)
@@ -237,12 +260,18 @@ class PickEmbodiedGen(BaseEnv):
         cameras = self.scene.sensors.copy()
         cameras.update(self.scene.human_render_cameras)
-        bg_node = layouts[0].relation[Scene3DItemEnum.BACKGROUND.value]
-        gs_path = os.path.join(layouts[0].assets[bg_node], "gs_model.ply")
         raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
         bg_images = dict()
         for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"):
-            layout = layouts[env_idx]
             x, y, z, qx, qy, qz, qw = layout.position[bg_node]
             qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat)
             init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])

 import json
 import os
 import numpy as np
 import sapien
 from mani_skill.sensors.camera import CameraConfig
 from mani_skill.utils import sapien_utils
 from mani_skill.utils.building import actors
+from mani_skill.utils.building.ground import build_ground
 from mani_skill.utils.registration import register_env
 from mani_skill.utils.structs.actor import Actor
 from mani_skill.utils.structs.pose import Pose
         # Add small offset in z-axis to avoid collision.
         self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
         self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
+        self.camera_cfg = kwargs.pop("camera_cfg", None)
+        if self.camera_cfg is None:
+            self.camera_cfg = dict(
+                camera_eye=[0.9, 0.0, 1.1],
+                camera_target_pt=[0.0, 0.0, 0.9],
+                image_hw=[256, 256],
+                fovy_deg=75,
+            )
         self.layouts = self.init_env_layouts(
             layout_file, num_envs, replace_objs
     def init_env_layouts(
         layout_file: str, num_envs: int, replace_objs: bool
     ) -> list[LayoutInfo]:
         layouts = []
         for env_idx in range(num_envs):
             if replace_objs and env_idx > 0:
+                layout_info = bfs_placement(layout_file)
+            else:
+                layout_info = json.load(open(layout_file, "r"))
+                layout_info = LayoutInfo.from_dict(layout_info)
+            layout_path = layout_file.replace(".json", f"_env{env_idx}.json")
+            with open(layout_path, "w") as f:
+                json.dump(layout_info.to_dict(), f, indent=4)
+            layouts.append(layout_path)
         return layouts
     @staticmethod
     def compute_robot_init_pose(
+        layouts: list[str], num_envs: int, z_offset: float = 0.0
     ) -> list[list[float]]:
         robot_pose = []
         for env_idx in range(num_envs):
+            layout = json.load(open(layouts[env_idx], "r"))
+            layout = LayoutInfo.from_dict(layout)
             robot_node = layout.relation[Scene3DItemEnum.ROBOT.value]
             x, y, z, qx, qy, qz, qw = layout.position[robot_node]
             robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz])
     @property
     def _default_human_render_camera_configs(self):
         pose = sapien_utils.look_at(
+            eye=self.camera_cfg["camera_eye"],
+            target=self.camera_cfg["camera_target_pt"],
         )
         return CameraConfig(
+            "render_camera",
+            pose,
+            self.camera_cfg["image_hw"][1],
+            self.camera_cfg["image_hw"][0],
+            np.deg2rad(self.camera_cfg["fovy_deg"]),
+            0.01,
+            100,
         )
     def _load_agent(self, options: dict):
+        self.ground = build_ground(self.scene)
         super()._load_agent(options, sapien.Pose(p=[-10, 0, 10]))
     def _load_scene(self, options: dict):
         all_objects = []
+        logger.info(f"Loading EmbodiedGen assets...")
         for env_idx in range(self.num_envs):
             env_actors = load_assets_from_layout_file(
                 self.scene,
                 self.layouts[env_idx],
                 z_offset=self.objs_z_offset,
                 env_idx=env_idx,
             )
             self.env_actors[f"env{env_idx}"] = env_actors
             self.agent.controller.controllers["gripper"].reset()
     def render_gs3d_images(
+        self, layouts: list[str], num_envs: int, init_quat: list[float]
     ) -> dict[str, np.ndarray]:
         sim_coord_align = (
             torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device)
         cameras = self.scene.sensors.copy()
         cameras.update(self.scene.human_render_cameras)
+        # Preload the background Gaussian Splatting model.
+        asset_root = os.path.dirname(layouts[0])
+        layout = LayoutInfo.from_dict(json.load(open(layouts[0], "r")))
+        bg_node = layout.relation[Scene3DItemEnum.BACKGROUND.value]
+        gs_path = os.path.join(
+            asset_root, layout.assets[bg_node], "gs_model.ply"
+        )
         raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
         bg_images = dict()
         for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"):
+            layout = json.load(open(layouts[env_idx], "r"))
+            layout = LayoutInfo.from_dict(layout)
             x, y, z, qx, qy, qz, qw = layout.position[bg_node]
             qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat)
             init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])

embodied_gen/models/gs_model.py CHANGED Viewed

@@ -39,6 +39,8 @@ __all__ = [
     "GaussianOperator",
 ]
 @dataclass
 class RenderResult:
@@ -210,9 +212,7 @@ class GaussianBase:
             device=device,
         )
-    def save_to_ply(
-        self, path: str, colors: torch.Tensor = None, enable_mask: bool = False
-    ):
         os.makedirs(os.path.dirname(path), exist_ok=True)
         numpy_data = self.get_numpy_data()
         means = numpy_data["_means"]
@@ -249,7 +249,6 @@ class GaussianBase:
             shN = shN[~invalid_mask]
         num_points = means.shape[0]
         with open(path, "wb") as f:
             # Write PLY header
             f.write(b"ply\n")
@@ -258,18 +257,11 @@ class GaussianBase:
             f.write(b"property float x\n")
             f.write(b"property float y\n")
             f.write(b"property float z\n")
-            f.write(b"property float nx\n")
-            f.write(b"property float ny\n")
-            f.write(b"property float nz\n")
-            if colors is not None:
-                for j in range(colors.shape[1]):
-                    f.write(f"property float f_dc_{j}\n".encode())
-            else:
-                for i, data in enumerate([sh0, shN]):
-                    prefix = "f_dc" if i == 0 else "f_rest"
-                    for j in range(data.shape[1]):
-                        f.write(f"property float {prefix}_{j}\n".encode())
             f.write(b"property float opacity\n")
@@ -283,24 +275,19 @@ class GaussianBase:
             # Write vertex data
             for i in range(num_points):
                 f.write(struct.pack("<fff", *means[i]))  # x, y, z
-                f.write(struct.pack("<fff", 0, 0, 0))  # nx, ny, nz (zeros)
-                if colors is not None:
-                    color = colors.detach().cpu().numpy()
-                    for j in range(color.shape[1]):
-                        f_dc = (color[i, j] - 0.5) / 0.2820947917738781
-                        f.write(struct.pack("<f", f_dc))
-                else:
-                    for data in [sh0, shN]:
-                        for j in range(data.shape[1]):
-                            f.write(struct.pack("<f", data[i, j]))
-                f.write(struct.pack("<f", opacities[i]))  # opacity
                 for data in [scales, quats]:
                     for j in range(data.shape[1]):
                         f.write(struct.pack("<f", data[i, j]))
 @dataclass
 class GaussianOperator(GaussianBase):
@@ -508,8 +495,8 @@ class GaussianOperator(GaussianBase):
 if __name__ == "__main__":
-    input_gs = "outputs/test/debug.ply"
-    output_gs = "./debug_v3.ply"
     gs_model: GaussianOperator = GaussianOperator.load_from_ply(input_gs)
     # 绕 x 轴旋转 180°

     "GaussianOperator",
 ]
+SH_C0 = 0.2820947917738781
 @dataclass
 class RenderResult:
             device=device,
         )
+    def save_to_ply(self, path: str, enable_mask: bool = False) -> None:
         os.makedirs(os.path.dirname(path), exist_ok=True)
         numpy_data = self.get_numpy_data()
         means = numpy_data["_means"]
             shN = shN[~invalid_mask]
         num_points = means.shape[0]
         with open(path, "wb") as f:
             # Write PLY header
             f.write(b"ply\n")
             f.write(b"property float x\n")
             f.write(b"property float y\n")
             f.write(b"property float z\n")
+            for i, data in enumerate([sh0, shN]):
+                prefix = "f_dc" if i == 0 else "f_rest"
+                for j in range(data.shape[1]):
+                    f.write(f"property float {prefix}_{j}\n".encode())
             f.write(b"property float opacity\n")
             # Write vertex data
             for i in range(num_points):
                 f.write(struct.pack("<fff", *means[i]))  # x, y, z
+                for data in [sh0, shN]:
+                    for j in range(data.shape[1]):
+                        f.write(struct.pack("<f", data[i, j]))
+                f.write(struct.pack("<f", opacities[i].item()))  # opacity
                 for data in [scales, quats]:
                     for j in range(data.shape[1]):
                         f.write(struct.pack("<f", data[i, j]))
+        return
 @dataclass
 class GaussianOperator(GaussianBase):
 if __name__ == "__main__":
+    input_gs = "outputs/layouts_gens_demo/task_0000/background/gs_model.ply"
+    output_gs = "./gs_model.ply"
     gs_model: GaussianOperator = GaussianOperator.load_from_ply(input_gs)
     # 绕 x 轴旋转 180°

embodied_gen/scripts/compose_layout.py CHANGED Viewed

@@ -50,10 +50,7 @@ def entrypoint(**kwargs):
     out_scene_path = f"{output_dir}/Iscene.glb"
     out_layout_path = f"{output_dir}/layout.json"
-    with open(args.layout_path, "r") as f:
-        layout_info = LayoutInfo.from_dict(json.load(f))
-    layout_info = bfs_placement(layout_info, seed=args.seed)
     with open(out_layout_path, "w") as f:
         json.dump(layout_info.to_dict(), f, indent=4)
@@ -63,7 +60,7 @@ def entrypoint(**kwargs):
     sim_cli(
         layout_path=out_layout_path,
         output_dir=output_dir,
-        robot_name="franka" if args.insert_robot else None,
     )
     logger.info(f"Layout placement completed in {output_dir}")

     out_scene_path = f"{output_dir}/Iscene.glb"
     out_layout_path = f"{output_dir}/layout.json"
+    layout_info = bfs_placement(args.layout_path, seed=args.seed)
     with open(out_layout_path, "w") as f:
         json.dump(layout_info.to_dict(), f, indent=4)
     sim_cli(
         layout_path=out_layout_path,
         output_dir=output_dir,
+        insert_robot=args.insert_robot,
     )
     logger.info(f"Layout placement completed in {output_dir}")

embodied_gen/scripts/gen_layout.py CHANGED Viewed

@@ -119,11 +119,15 @@ def entrypoint() -> None:
         match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
         bg_save_dir = os.path.join(output_root, "background")
         copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
-        layout_info.assets[bg_node] = bg_save_dir
         # BFS layout placement.
         layout_info = bfs_placement(
-            layout_info,
             limit_reach_range=True if args.insert_robot else False,
             seed=args.seed_layout,
         )

         match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
         bg_save_dir = os.path.join(output_root, "background")
         copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
+        layout_info.assets[bg_node] = "background"
         # BFS layout placement.
+        layout_path = f"{output_root}/layout.json"
+        with open(layout_path, "w") as f:
+            json.dump(layout_info.to_dict(), f, indent=4)
         layout_info = bfs_placement(
+            layout_path,
             limit_reach_range=True if args.insert_robot else False,
             seed=args.seed_layout,
         )

embodied_gen/scripts/imageto3d.py CHANGED Viewed

@@ -220,8 +220,8 @@ def entrypoint(**kwargs):
                 )
                 color_img = Image.open(color_path)
-                half_height = int(color_img.height * 2 / 3)
-                crop_img = color_img.crop((0, 0, color_img.width, half_height))
                 geo_flag, geo_result = GEO_CHECKER([crop_img], text=asset_node)
                 logger.warning(
                     f"{GEO_CHECKER.__class__.__name__}: {geo_result} for {seg_path}"

                 )
                 color_img = Image.open(color_path)
+                keep_height = int(color_img.height * 2 / 3)
+                crop_img = color_img.crop((0, 0, color_img.width, keep_height))
                 geo_flag, geo_result = GEO_CHECKER([crop_img], text=asset_node)
                 logger.warning(
                     f"{GEO_CHECKER.__class__.__name__}: {geo_result} for {seg_path}"

embodied_gen/scripts/parallel_sim.py CHANGED Viewed

@@ -20,7 +20,7 @@ from embodied_gen.utils.monkey_patches import monkey_patch_maniskill
 monkey_patch_maniskill()
 import json
 from collections import defaultdict
-from dataclasses import dataclass
 from typing import Literal
 import gymnasium as gym
@@ -69,6 +69,18 @@ class ParallelSimConfig:
     reach_target_only: bool = True
     """Whether to only reach target without full action"""
 def entrypoint(**kwargs):
     if kwargs is None or len(kwargs) == 0:
@@ -83,6 +95,12 @@ def entrypoint(**kwargs):
         enable_shadow=cfg.enable_shadow,
         layout_file=cfg.layout_file,
         control_mode=cfg.control_mode,
     )
     env = RecordEpisode(
         env,

 monkey_patch_maniskill()
 import json
 from collections import defaultdict
+from dataclasses import dataclass, field
 from typing import Literal
 import gymnasium as gym
     reach_target_only: bool = True
     """Whether to only reach target without full action"""
+    # Camera settings
+    camera_eye: list[float] = field(default_factory=lambda: [0.9, 0.0, 1.1])
+    """Camera eye position [x, y, z] in global coordiante system"""
+    camera_target_pt: list[float] = field(
+        default_factory=lambda: [0.0, 0.0, 0.9]
+    )
+    """Camera target(look-at) point [x, y, z] in global coordiante system"""
+    image_hw: list[int] = field(default_factory=lambda: [256, 256])
+    """Rendered image height and width [height, width]"""
+    fovy_deg: float = 75
+    """Camera vertical field of view in degrees"""
 def entrypoint(**kwargs):
     if kwargs is None or len(kwargs) == 0:
         enable_shadow=cfg.enable_shadow,
         layout_file=cfg.layout_file,
         control_mode=cfg.control_mode,
+        camera_cfg=dict(
+            camera_eye=cfg.camera_eye,
+            camera_target_pt=cfg.camera_target_pt,
+            image_hw=cfg.image_hw,
+            fovy_deg=cfg.fovy_deg,
+        ),
     )
     env = RecordEpisode(
         env,

embodied_gen/scripts/simulate_sapien.py CHANGED Viewed

@@ -91,17 +91,15 @@ def entrypoint(**kwargs):
         fovy_deg=cfg.fovy_deg,
     )
     with open(cfg.layout_path, "r") as f:
-        layout_data = json.load(f)
-        layout_data: LayoutInfo = LayoutInfo.from_dict(layout_data)
     actors = load_assets_from_layout_file(
         scene_manager.scene,
-        layout_data,
         cfg.z_offset,
-        cfg.init_quat,
     )
     agent = load_mani_skill_robot(
-        scene_manager.scene, layout_data, cfg.control_freq
     )
     frames = defaultdict(list)
@@ -134,8 +132,9 @@ def entrypoint(**kwargs):
     if "Foreground" not in cfg.render_keys:
         return
     bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value]
-    gs_path = f"{layout_data.assets[bg_node]}/gs_model.ply"
     gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
     x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
     qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)
@@ -170,7 +169,8 @@ def entrypoint(**kwargs):
         for node in actions:
             if actions[node] is None:
                 continue
-            for action in tqdm(actions[node]):
                 grasp_frames = scene_manager.step_action(
                     agent,
                     torch.Tensor(action[None, ...]),

         fovy_deg=cfg.fovy_deg,
     )
     with open(cfg.layout_path, "r") as f:
+        layout_data: LayoutInfo = LayoutInfo.from_dict(json.load(f))
     actors = load_assets_from_layout_file(
         scene_manager.scene,
+        cfg.layout_path,
         cfg.z_offset,
     )
     agent = load_mani_skill_robot(
+        scene_manager.scene, cfg.layout_path, cfg.control_freq
     )
     frames = defaultdict(list)
     if "Foreground" not in cfg.render_keys:
         return
+    asset_root = os.path.dirname(cfg.layout_path)
     bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value]
+    gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
     gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
     x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
     qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)
         for node in actions:
             if actions[node] is None:
                 continue
+            logger.info(f"Render SIM grasping in camera {idx} for {node}...")
+            for action in actions[node]:
                 grasp_frames = scene_manager.step_action(
                     agent,
                     torch.Tensor(action[None, ...]),

embodied_gen/scripts/textto3d.py CHANGED Viewed

@@ -187,7 +187,7 @@ def text_to_3d(**kwargs) -> dict:
             logger.warning(
                 f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}"
             )
-            results["assets"][node] = f"{node_save_dir}/result"
             results["quality"][node] = qa_result
             if qa_flag is None or qa_flag is True:

             logger.warning(
                 f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}"
             )
+            results["assets"][node] = f"asset3d/{save_node}/result"
             results["quality"][node] = qa_result
             if qa_flag is None or qa_flag is True:

embodied_gen/utils/geometry.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # implied. See the License for the specific language governing
 # permissions and limitations under the License.
 import os
 import random
 from collections import defaultdict, deque
@@ -32,7 +33,6 @@ from embodied_gen.utils.enum import LayoutInfo, Scene3DItemEnum
 from embodied_gen.utils.log import logger
 __all__ = [
-    "bfs_placement",
     "with_seed",
     "matrix_to_pose",
     "pose_to_matrix",
@@ -222,7 +222,7 @@ def check_reachable(
 @with_seed("seed")
 def bfs_placement(
-    layout_info: LayoutInfo,
     floor_margin: float = 0,
     beside_margin: float = 0.1,
     max_attempts: int = 3000,
@@ -232,6 +232,8 @@ def bfs_placement(
     robot_dim: float = 0.12,
     seed: int = None,
 ) -> LayoutInfo:
     object_mapping = layout_info.objs_mapping
     position = {}  # node: [x, y, z, qx, qy, qz, qw]
     parent_bbox_xy = {}
@@ -254,6 +256,7 @@ def bfs_placement(
         mesh_path = (
             f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj"
         )
         mesh_info[node]["path"] = mesh_path
         mesh = trimesh.load(mesh_path)
         vertices = mesh.vertices
@@ -282,10 +285,9 @@ def bfs_placement(
             # For manipulated and distractor objects, apply random rotation
             angle_rad = np.random.uniform(0, 2 * np.pi)
             object_quat = compute_axis_rotation_quat(
-                axis="y", angle_rad=angle_rad
             )
-            object_quat_scipy = np.roll(object_quat, 1)  # [w, x, y, z]
-            rotation = R.from_quat(object_quat_scipy).as_matrix()
             vertices = np.dot(mesh.vertices, rotation.T)
             z1 = np.percentile(vertices[:, 1], 1)
             z2 = np.percentile(vertices[:, 1], 99)

 # implied. See the License for the specific language governing
 # permissions and limitations under the License.
+import json
 import os
 import random
 from collections import defaultdict, deque
 from embodied_gen.utils.log import logger
 __all__ = [
     "with_seed",
     "matrix_to_pose",
     "pose_to_matrix",
 @with_seed("seed")
 def bfs_placement(
+    layout_file: str,
     floor_margin: float = 0,
     beside_margin: float = 0.1,
     max_attempts: int = 3000,
     robot_dim: float = 0.12,
     seed: int = None,
 ) -> LayoutInfo:
+    layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
+    asset_dir = os.path.dirname(layout_file)
     object_mapping = layout_info.objs_mapping
     position = {}  # node: [x, y, z, qx, qy, qz, qw]
     parent_bbox_xy = {}
         mesh_path = (
             f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj"
         )
+        mesh_path = os.path.join(asset_dir, mesh_path)
         mesh_info[node]["path"] = mesh_path
         mesh = trimesh.load(mesh_path)
         vertices = mesh.vertices
             # For manipulated and distractor objects, apply random rotation
             angle_rad = np.random.uniform(0, 2 * np.pi)
             object_quat = compute_axis_rotation_quat(
+                axis="z", angle_rad=angle_rad
             )
+            rotation = R.from_quat(object_quat).as_matrix()
             vertices = np.dot(mesh.vertices, rotation.T)
             z1 = np.percentile(vertices[:, 1], 1)
             z2 = np.percentile(vertices[:, 1], 99)

embodied_gen/utils/monkey_patches.py CHANGED Viewed

@@ -175,7 +175,7 @@ def monkey_patch_maniskill():
                 seg_labels = camera.get_obs(
                     rgb=False, depth=False, segmentation=True, position=False
                 )["segmentation"]
-                masks = np.where((seg_labels.cpu() > 0), 255, 0).astype(
                     np.uint8
                 )
                 masks = torch.tensor(masks).to(color.device)

                 seg_labels = camera.get_obs(
                     rgb=False, depth=False, segmentation=True, position=False
                 )["segmentation"]
+                masks = np.where((seg_labels.cpu() > 1), 255, 0).astype(
                     np.uint8
                 )
                 masks = torch.tensor(masks).to(color.device)

embodied_gen/utils/simulation.py CHANGED Viewed

@@ -15,7 +15,6 @@
 # permissions and limitations under the License.
 import json
-import logging
 import os
 import xml.etree.ElementTree as ET
 from collections import defaultdict
@@ -62,32 +61,48 @@ __all__ = [
 def load_actor_from_urdf(
-    scene: ManiSkillScene | sapien.Scene,
     file_path: str,
-    pose: sapien.Pose,
     env_idx: int = None,
     use_static: bool = False,
     update_mass: bool = False,
 ) -> sapien.pysapien.Entity:
     tree = ET.parse(file_path)
     root = tree.getroot()
     node_name = root.get("name")
     file_dir = os.path.dirname(file_path)
-    visual_mesh = root.find('.//visual/geometry/mesh')
     visual_file = visual_mesh.get("filename")
     visual_scale = visual_mesh.get("scale", "1.0 1.0 1.0")
     visual_scale = np.array([float(x) for x in visual_scale.split()])
-    collision_mesh = root.find('.//collision/geometry/mesh')
     collision_file = collision_mesh.get("filename")
     collision_scale = collision_mesh.get("scale", "1.0 1.0 1.0")
     collision_scale = np.array([float(x) for x in collision_scale.split()])
     visual_file = os.path.join(file_dir, visual_file)
     collision_file = os.path.join(file_dir, collision_file)
-    static_fric = root.find('.//collision/gazebo/mu1').text
-    dynamic_fric = root.find('.//collision/gazebo/mu2').text
     material = physx.PhysxMaterial(
         static_friction=np.clip(float(static_fric), 0.1, 0.7),
@@ -106,17 +121,27 @@ def load_actor_from_urdf(
         # decomposition_params=dict(
         #     threshold=0.05, max_convex_hull=64, verbose=False
         # ),
     )
-    builder.add_visual_from_file(visual_file, scale=visual_scale)
     builder.set_initial_pose(pose)
     if isinstance(scene, ManiSkillScene) and env_idx is not None:
         builder.set_scene_idxs([env_idx])
-    actor = builder.build(name=f"{node_name}-{env_idx}")
     if update_mass and hasattr(actor.components[1], "mass"):
-        node_mass = float(root.find('.//inertial/mass').get("value"))
         actor.components[1].set_mass(node_mass)
     return actor
@@ -124,7 +149,7 @@ def load_actor_from_urdf(
 def load_assets_from_layout_file(
     scene: ManiSkillScene | sapien.Scene,
-    layout: LayoutInfo | str,
     z_offset: float = 0.0,
     init_quat: list[float] = [0, 0, 0, 1],
     env_idx: int = None,
@@ -133,19 +158,18 @@ def load_assets_from_layout_file(
     Args:
         scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into.
-        layout (LayoutInfo): The layout information data.
         z_offset (float): Offset to apply to the Z-coordinate of non-context objects.
         init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment.
         env_idx (int): Environment index for multi-environment setup.
     """
-    if isinstance(layout, str) and layout.endswith(".json"):
-        layout = LayoutInfo.from_dict(json.load(open(layout, "r")))
     actors = dict()
     for node in layout.assets:
         file_dir = layout.assets[node]
         file_name = f"{node.replace(' ', '_')}.urdf"
-        urdf_file = os.path.join(file_dir, file_name)
         if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value:
             continue

 # permissions and limitations under the License.
 import json
 import os
 import xml.etree.ElementTree as ET
 from collections import defaultdict
 def load_actor_from_urdf(
+    scene: sapien.Scene | ManiSkillScene,
     file_path: str,
+    pose: sapien.Pose | None = None,
     env_idx: int = None,
     use_static: bool = False,
     update_mass: bool = False,
+    scale: float | np.ndarray = 1.0,
 ) -> sapien.pysapien.Entity:
+    def _get_local_pose(origin_tag: ET.Element | None) -> sapien.Pose:
+        local_pose = sapien.Pose(p=[0, 0, 0], q=[1, 0, 0, 0])
+        if origin_tag is not None:
+            xyz = list(map(float, origin_tag.get("xyz", "0 0 0").split()))
+            rpy = list(map(float, origin_tag.get("rpy", "0 0 0").split()))
+            qx, qy, qz, qw = R.from_euler("xyz", rpy, degrees=False).as_quat()
+            local_pose = sapien.Pose(p=xyz, q=[qw, qx, qy, qz])
+        return local_pose
     tree = ET.parse(file_path)
     root = tree.getroot()
     node_name = root.get("name")
     file_dir = os.path.dirname(file_path)
+    visual_mesh = root.find(".//visual/geometry/mesh")
     visual_file = visual_mesh.get("filename")
     visual_scale = visual_mesh.get("scale", "1.0 1.0 1.0")
     visual_scale = np.array([float(x) for x in visual_scale.split()])
+    visual_scale *= np.array(scale)
+    collision_mesh = root.find(".//collision/geometry/mesh")
     collision_file = collision_mesh.get("filename")
     collision_scale = collision_mesh.get("scale", "1.0 1.0 1.0")
     collision_scale = np.array([float(x) for x in collision_scale.split()])
+    collision_scale *= np.array(scale)
+    visual_pose = _get_local_pose(root.find(".//visual/origin"))
+    collision_pose = _get_local_pose(root.find(".//collision/origin"))
     visual_file = os.path.join(file_dir, visual_file)
     collision_file = os.path.join(file_dir, collision_file)
+    static_fric = root.find(".//collision/gazebo/mu1").text
+    dynamic_fric = root.find(".//collision/gazebo/mu2").text
     material = physx.PhysxMaterial(
         static_friction=np.clip(float(static_fric), 0.1, 0.7),
         # decomposition_params=dict(
         #     threshold=0.05, max_convex_hull=64, verbose=False
         # ),
+        pose=collision_pose,
+    )
+    builder.add_visual_from_file(
+        visual_file,
+        scale=visual_scale,
+        pose=visual_pose,
     )
+    if pose is None:
+        pose = sapien.Pose(p=[0, 0, 0], q=[1, 0, 0, 0])
     builder.set_initial_pose(pose)
     if isinstance(scene, ManiSkillScene) and env_idx is not None:
         builder.set_scene_idxs([env_idx])
+    actor = builder.build(
+        name=node_name if env_idx is None else f"{node_name}-{env_idx}"
+    )
     if update_mass and hasattr(actor.components[1], "mass"):
+        node_mass = float(root.find(".//inertial/mass").get("value"))
         actor.components[1].set_mass(node_mass)
     return actor
 def load_assets_from_layout_file(
     scene: ManiSkillScene | sapien.Scene,
+    layout: str,
     z_offset: float = 0.0,
     init_quat: list[float] = [0, 0, 0, 1],
     env_idx: int = None,
     Args:
         scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into.
+        layout (str): The layout file path.
         z_offset (float): Offset to apply to the Z-coordinate of non-context objects.
         init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment.
         env_idx (int): Environment index for multi-environment setup.
     """
+    asset_root = os.path.dirname(layout)
+    layout = LayoutInfo.from_dict(json.load(open(layout, "r")))
     actors = dict()
     for node in layout.assets:
         file_dir = layout.assets[node]
         file_name = f"{node.replace(' ', '_')}.urdf"
+        urdf_file = os.path.join(asset_root, file_dir, file_name)
         if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value:
             continue

embodied_gen/utils/tags.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = "v0.1.4"


1	+ VERSION = "v0.1.5"

embodied_gen/validators/urdf_convertor.py CHANGED Viewed

@@ -24,6 +24,7 @@ from xml.dom.minidom import parseString
 import numpy as np
 import trimesh
 from embodied_gen.data.convex_decomposer import decompose_convex_mesh
 from embodied_gen.utils.gpt_clients import GPT_CLIENT, GPTclient
 from embodied_gen.utils.process_media import render_asset3d
@@ -40,11 +41,13 @@ URDF_TEMPLATE = """
 <robot name="template_robot">
     <link name="template_link">
         <visual>
             <geometry>
                 <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
             </geometry>
         </visual>
         <collision>
             <geometry>
                 <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
             </geometry>
@@ -86,6 +89,7 @@ class URDFGenerator(object):
         render_dir: str = "urdf_renders",
         render_view_num: int = 4,
         decompose_convex: bool = False,
     ) -> None:
         if mesh_file_list is None:
             mesh_file_list = []
@@ -160,6 +164,8 @@ class URDFGenerator(object):
             ]
         self.attrs_name = attrs_name
         self.decompose_convex = decompose_convex
     def parse_response(self, response: str) -> dict[str, any]:
         lines = response.split("\n")
@@ -251,6 +257,14 @@ class URDFGenerator(object):
             raise ValueError("URDF template is missing 'link' element.")
         link.set("name", output_name)
         # Update visual geometry
         visual = link.find("visual/geometry/mesh")
         if visual is not None:
@@ -273,7 +287,11 @@ class URDFGenerator(object):
                     decompose_convex_mesh(
                         mesh_output_path, output_path, **d_params
                     )
-                    collision_mesh = f"{self.output_mesh_dir}/{filename}"
                 except Exception as e:
                     logger.warning(
                         f"Convex decomposition failed for {output_path}, {e}."
@@ -436,6 +454,7 @@ class URDFGenerator(object):
 if __name__ == "__main__":
     urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
     urdf_path = urdf_gen(
         mesh_path="outputs/layout2/asset3d/marker/result/mesh/marker.obj",

 import numpy as np
 import trimesh
+from scipy.spatial.transform import Rotation
 from embodied_gen.data.convex_decomposer import decompose_convex_mesh
 from embodied_gen.utils.gpt_clients import GPT_CLIENT, GPTclient
 from embodied_gen.utils.process_media import render_asset3d
 <robot name="template_robot">
     <link name="template_link">
         <visual>
+            <origin xyz="0 0 0" rpy="0 0 0"/>
             <geometry>
                 <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
             </geometry>
         </visual>
         <collision>
+            <origin xyz="0 0 0" rpy="0 0 0"/>
             <geometry>
                 <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
             </geometry>
         render_dir: str = "urdf_renders",
         render_view_num: int = 4,
         decompose_convex: bool = False,
+        rotate_xyzw: list[float] = (0.7071, 0, 0, 0.7071),
     ) -> None:
         if mesh_file_list is None:
             mesh_file_list = []
             ]
         self.attrs_name = attrs_name
         self.decompose_convex = decompose_convex
+        # Rotate 90 degrees around the X-axis from blender to align with simulators.
+        self.rotate_xyzw = rotate_xyzw
     def parse_response(self, response: str) -> dict[str, any]:
         lines = response.split("\n")
             raise ValueError("URDF template is missing 'link' element.")
         link.set("name", output_name)
+        if self.rotate_xyzw is not None:
+            rpy = Rotation.from_quat(self.rotate_xyzw).as_euler(
+                "xyz", degrees=False
+            )
+            rpy = [str(round(num, 4)) for num in rpy]
+            link.find("visual/origin").set("rpy", " ".join(rpy))
+            link.find("collision/origin").set("rpy", " ".join(rpy))
         # Update visual geometry
         visual = link.find("visual/geometry/mesh")
         if visual is not None:
                     decompose_convex_mesh(
                         mesh_output_path, output_path, **d_params
                     )
+                    obj_filename = filename.replace(".ply", ".obj")
+                    trimesh.load(output_path).export(
+                        f"{mesh_folder}/{obj_filename}"
+                    )
+                    collision_mesh = f"{self.output_mesh_dir}/{obj_filename}"
                 except Exception as e:
                     logger.warning(
                         f"Convex decomposition failed for {output_path}, {e}."
 if __name__ == "__main__":
+    # Rotate 90 degrees around the X-axis to align with simulators.
     urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
     urdf_path = urdf_gen(
         mesh_path="outputs/layout2/asset3d/marker/result/mesh/marker.obj",