xinjie.wang commited on
Commit
1d2e276
·
1 Parent(s): 5c8b822
embodied_gen/data/asset_converter.py ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import xml.etree.ElementTree as ET
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+ from shutil import copy
9
+
10
+ import trimesh
11
+ from scipy.spatial.transform import Rotation
12
+
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ __all__ = [
18
+ "AssetConverterFactory",
19
+ "AssetType",
20
+ "MeshtoMJCFConverter",
21
+ "MeshtoUSDConverter",
22
+ "URDFtoUSDConverter",
23
+ ]
24
+
25
+
26
+ @dataclass
27
+ class AssetType(str):
28
+ """Asset type enumeration."""
29
+
30
+ MJCF = "mjcf"
31
+ USD = "usd"
32
+ URDF = "urdf"
33
+ MESH = "mesh"
34
+
35
+
36
+ class AssetConverterBase(ABC):
37
+ """Converter abstract base class."""
38
+
39
+ @abstractmethod
40
+ def convert(self, urdf_path: str, output_path: str, **kwargs) -> str:
41
+ pass
42
+
43
+ def transform_mesh(
44
+ self, input_mesh: str, output_mesh: str, mesh_origin: ET.Element
45
+ ) -> None:
46
+ """Apply transform to the mesh based on the origin element in URDF."""
47
+ mesh = trimesh.load(input_mesh)
48
+ rpy = list(map(float, mesh_origin.get("rpy").split(" ")))
49
+ rotation = Rotation.from_euler("xyz", rpy, degrees=False)
50
+ offset = list(map(float, mesh_origin.get("xyz").split(" ")))
51
+ mesh.vertices = (mesh.vertices @ rotation.as_matrix().T) + offset
52
+
53
+ os.makedirs(os.path.dirname(output_mesh), exist_ok=True)
54
+ _ = mesh.export(output_mesh)
55
+
56
+ return
57
+
58
+ def __enter__(self):
59
+ return self
60
+
61
+ def __exit__(self, exc_type, exc_val, exc_tb):
62
+ return False
63
+
64
+
65
+ class MeshtoMJCFConverter(AssetConverterBase):
66
+ """Convert URDF files into MJCF format."""
67
+
68
+ def __init__(
69
+ self,
70
+ **kwargs,
71
+ ) -> None:
72
+ self.kwargs = kwargs
73
+
74
+ def _copy_asset_file(self, src: str, dst: str) -> None:
75
+ if os.path.exists(dst):
76
+ return
77
+ os.makedirs(os.path.dirname(dst), exist_ok=True)
78
+ copy(src, dst)
79
+
80
+ def add_geometry(
81
+ self,
82
+ mujoco_element: ET.Element,
83
+ link: ET.Element,
84
+ body: ET.Element,
85
+ tag: str,
86
+ input_dir: str,
87
+ output_dir: str,
88
+ mesh_name: str,
89
+ material: ET.Element | None = None,
90
+ is_collision: bool = False,
91
+ ) -> None:
92
+ """Add geometry to the MJCF body from the URDF link."""
93
+ element = link.find(tag)
94
+ geometry = element.find("geometry")
95
+ mesh = geometry.find("mesh")
96
+ filename = mesh.get("filename")
97
+ scale = mesh.get("scale", "1.0 1.0 1.0")
98
+
99
+ mesh_asset = ET.SubElement(
100
+ mujoco_element, "mesh", name=mesh_name, file=filename, scale=scale
101
+ )
102
+ geom = ET.SubElement(body, "geom", type="mesh", mesh=mesh_name)
103
+
104
+ self._copy_asset_file(
105
+ f"{input_dir}/{filename}",
106
+ f"{output_dir}/{filename}",
107
+ )
108
+
109
+ # Preprocess the mesh by applying rotation.
110
+ input_mesh = f"{input_dir}/{filename}"
111
+ output_mesh = f"{output_dir}/{filename}"
112
+ mesh_origin = element.find("origin")
113
+ if mesh_origin is not None:
114
+ self.transform_mesh(input_mesh, output_mesh, mesh_origin)
115
+
116
+ if material is not None:
117
+ geom.set("material", material.get("name"))
118
+
119
+ if is_collision:
120
+ geom.set("contype", "1")
121
+ geom.set("conaffinity", "1")
122
+ geom.set("rgba", "1 1 1 0")
123
+
124
+ def add_materials(
125
+ self,
126
+ mujoco_element: ET.Element,
127
+ link: ET.Element,
128
+ tag: str,
129
+ input_dir: str,
130
+ output_dir: str,
131
+ name: str,
132
+ reflectance: float = 0.2,
133
+ ) -> ET.Element:
134
+ """Add materials to the MJCF asset from the URDF link."""
135
+ element = link.find(tag)
136
+ geometry = element.find("geometry")
137
+ mesh = geometry.find("mesh")
138
+ filename = mesh.get("filename")
139
+ dirname = os.path.dirname(filename)
140
+
141
+ material = ET.SubElement(
142
+ mujoco_element,
143
+ "material",
144
+ name=f"material_{name}",
145
+ texture=f"texture_{name}",
146
+ reflectance=str(reflectance),
147
+ )
148
+ ET.SubElement(
149
+ mujoco_element,
150
+ "texture",
151
+ name=f"texture_{name}",
152
+ type="2d",
153
+ file=f"{dirname}/material_0.png",
154
+ )
155
+
156
+ self._copy_asset_file(
157
+ f"{input_dir}/{dirname}/material_0.png",
158
+ f"{output_dir}/{dirname}/material_0.png",
159
+ )
160
+
161
+ return material
162
+
163
+ def convert(self, urdf_path: str, mjcf_path: str):
164
+ """Convert a URDF file to MJCF format."""
165
+ tree = ET.parse(urdf_path)
166
+ root = tree.getroot()
167
+
168
+ mujoco_struct = ET.Element("mujoco")
169
+ mujoco_struct.set("model", root.get("name"))
170
+ mujoco_asset = ET.SubElement(mujoco_struct, "asset")
171
+ mujoco_worldbody = ET.SubElement(mujoco_struct, "worldbody")
172
+
173
+ input_dir = os.path.dirname(urdf_path)
174
+ output_dir = os.path.dirname(mjcf_path)
175
+ os.makedirs(output_dir, exist_ok=True)
176
+ for idx, link in enumerate(root.findall("link")):
177
+ link_name = link.get("name", "unnamed_link")
178
+ body = ET.SubElement(mujoco_worldbody, "body", name=link_name)
179
+
180
+ material = self.add_materials(
181
+ mujoco_asset,
182
+ link,
183
+ "visual",
184
+ input_dir,
185
+ output_dir,
186
+ name=str(idx),
187
+ )
188
+ self.add_geometry(
189
+ mujoco_asset,
190
+ link,
191
+ body,
192
+ "visual",
193
+ input_dir,
194
+ output_dir,
195
+ f"visual_mesh_{idx}",
196
+ material,
197
+ )
198
+ self.add_geometry(
199
+ mujoco_asset,
200
+ link,
201
+ body,
202
+ "collision",
203
+ input_dir,
204
+ output_dir,
205
+ f"collision_mesh_{idx}",
206
+ is_collision=True,
207
+ )
208
+
209
+ tree = ET.ElementTree(mujoco_struct)
210
+ ET.indent(tree, space=" ", level=0)
211
+
212
+ tree.write(mjcf_path, encoding="utf-8", xml_declaration=True)
213
+ logger.info(f"Successfully converted {urdf_path} → {mjcf_path}")
214
+
215
+
216
+ class MeshtoUSDConverter(AssetConverterBase):
217
+ """Convert Mesh file from URDF into USD format."""
218
+
219
+ DEFAULT_BIND_APIS = [
220
+ "MaterialBindingAPI",
221
+ "PhysicsMeshCollisionAPI",
222
+ "PhysicsCollisionAPI",
223
+ "PhysxCollisionAPI",
224
+ "PhysicsMassAPI",
225
+ "PhysicsRigidBodyAPI",
226
+ "PhysxRigidBodyAPI",
227
+ ]
228
+
229
+ def __init__(
230
+ self,
231
+ force_usd_conversion: bool = True,
232
+ make_instanceable: bool = False,
233
+ simulation_app=None,
234
+ **kwargs,
235
+ ):
236
+ self.usd_parms = dict(
237
+ force_usd_conversion=force_usd_conversion,
238
+ make_instanceable=make_instanceable,
239
+ **kwargs,
240
+ )
241
+ if simulation_app is not None:
242
+ self.simulation_app = simulation_app
243
+
244
+ def __enter__(self):
245
+ from isaaclab.app import AppLauncher
246
+
247
+ if not hasattr(self, "simulation_app"):
248
+ launch_args = dict(
249
+ headless=True,
250
+ no_splash=True,
251
+ fast_shutdown=True,
252
+ disable_gpu=True,
253
+ )
254
+ self.app_launcher = AppLauncher(launch_args)
255
+ self.simulation_app = self.app_launcher.app
256
+
257
+ return self
258
+
259
+ def __exit__(self, exc_type, exc_val, exc_tb):
260
+ # Close the simulation app if it was created here
261
+ if hasattr(self, "app_launcher"):
262
+ self.simulation_app.close()
263
+
264
+ if exc_val is not None:
265
+ logger.error(f"Exception occurred: {exc_val}.")
266
+
267
+ return False
268
+
269
+ def convert(self, urdf_path: str, output_file: str):
270
+ """Convert a URDF file to USD and post-process collision meshes."""
271
+ from isaaclab.sim.converters import MeshConverter, MeshConverterCfg
272
+ from pxr import PhysxSchema, Sdf, Usd, UsdShade
273
+
274
+ tree = ET.parse(urdf_path)
275
+ root = tree.getroot()
276
+ mesh_file = root.find("link/visual/geometry/mesh").get("filename")
277
+ input_mesh = os.path.join(os.path.dirname(urdf_path), mesh_file)
278
+ output_dir = os.path.abspath(os.path.dirname(output_file))
279
+ output_mesh = f"{output_dir}/mesh/{os.path.basename(mesh_file)}"
280
+ mesh_origin = root.find("link/visual/origin")
281
+ if mesh_origin is not None:
282
+ self.transform_mesh(input_mesh, output_mesh, mesh_origin)
283
+
284
+ cfg = MeshConverterCfg(
285
+ asset_path=output_mesh,
286
+ usd_dir=output_dir,
287
+ usd_file_name=os.path.basename(output_file),
288
+ **self.usd_parms,
289
+ )
290
+ urdf_converter = MeshConverter(cfg)
291
+ usd_path = urdf_converter.usd_path
292
+
293
+ stage = Usd.Stage.Open(usd_path)
294
+ layer = stage.GetRootLayer()
295
+ with Usd.EditContext(stage, layer):
296
+ for prim in stage.Traverse():
297
+ # Change texture path to relative path.
298
+ if prim.GetName() == "material_0":
299
+ shader = UsdShade.Shader(prim).GetInput("diffuse_texture")
300
+ if shader.Get() is not None:
301
+ relative_path = shader.Get().path.replace(
302
+ f"{output_dir}/", ""
303
+ )
304
+ shader.Set(Sdf.AssetPath(relative_path))
305
+
306
+ # Add convex decomposition collision and set ShrinkWrap.
307
+ elif prim.GetName() == "mesh":
308
+ approx_attr = prim.GetAttribute("physics:approximation")
309
+ if not approx_attr:
310
+ approx_attr = prim.CreateAttribute(
311
+ "physics:approximation", Sdf.ValueTypeNames.Token
312
+ )
313
+ approx_attr.Set("convexDecomposition")
314
+
315
+ physx_conv_api = (
316
+ PhysxSchema.PhysxConvexDecompositionCollisionAPI.Apply(
317
+ prim
318
+ )
319
+ )
320
+ physx_conv_api.GetShrinkWrapAttr().Set(True)
321
+
322
+ api_schemas = prim.GetMetadata("apiSchemas")
323
+ if api_schemas is None:
324
+ api_schemas = Sdf.TokenListOp()
325
+
326
+ api_list = list(api_schemas.GetAddedOrExplicitItems())
327
+ for api in self.DEFAULT_BIND_APIS:
328
+ if api not in api_list:
329
+ api_list.append(api)
330
+
331
+ api_schemas.appendedItems = api_list
332
+ prim.SetMetadata("apiSchemas", api_schemas)
333
+
334
+ layer.Save()
335
+ logger.info(f"Successfully converted {urdf_path} → {usd_path}")
336
+
337
+
338
+ class URDFtoUSDConverter(MeshtoUSDConverter):
339
+ """Convert URDF files into USD format.
340
+
341
+ Args:
342
+ fix_base (bool): Whether to fix the base link.
343
+ merge_fixed_joints (bool): Whether to merge fixed joints.
344
+ make_instanceable (bool): Whether to make prims instanceable.
345
+ force_usd_conversion (bool): Force conversion to USD.
346
+ collision_from_visuals (bool): Generate collisions from visuals if not provided.
347
+ """
348
+
349
+ def __init__(
350
+ self,
351
+ fix_base: bool = False,
352
+ merge_fixed_joints: bool = False,
353
+ make_instanceable: bool = True,
354
+ force_usd_conversion: bool = True,
355
+ collision_from_visuals: bool = True,
356
+ joint_drive=None,
357
+ rotate_wxyz: tuple[float] | None = None,
358
+ simulation_app=None,
359
+ **kwargs,
360
+ ):
361
+ self.usd_parms = dict(
362
+ fix_base=fix_base,
363
+ merge_fixed_joints=merge_fixed_joints,
364
+ make_instanceable=make_instanceable,
365
+ force_usd_conversion=force_usd_conversion,
366
+ collision_from_visuals=collision_from_visuals,
367
+ joint_drive=joint_drive,
368
+ **kwargs,
369
+ )
370
+ self.rotate_wxyz = rotate_wxyz
371
+ if simulation_app is not None:
372
+ self.simulation_app = simulation_app
373
+
374
+ def convert(self, urdf_path: str, output_file: str):
375
+ """Convert a URDF file to USD and post-process collision meshes."""
376
+ from isaaclab.sim.converters import UrdfConverter, UrdfConverterCfg
377
+ from pxr import Gf, PhysxSchema, Sdf, Usd, UsdGeom
378
+
379
+ cfg = UrdfConverterCfg(
380
+ asset_path=urdf_path,
381
+ usd_dir=os.path.abspath(os.path.dirname(output_file)),
382
+ usd_file_name=os.path.basename(output_file),
383
+ **self.usd_parms,
384
+ )
385
+
386
+ urdf_converter = UrdfConverter(cfg)
387
+ usd_path = urdf_converter.usd_path
388
+
389
+ stage = Usd.Stage.Open(usd_path)
390
+ layer = stage.GetRootLayer()
391
+ with Usd.EditContext(stage, layer):
392
+ for prim in stage.Traverse():
393
+ if prim.GetName() == "collisions":
394
+ approx_attr = prim.GetAttribute("physics:approximation")
395
+ if not approx_attr:
396
+ approx_attr = prim.CreateAttribute(
397
+ "physics:approximation", Sdf.ValueTypeNames.Token
398
+ )
399
+ approx_attr.Set("convexDecomposition")
400
+
401
+ physx_conv_api = (
402
+ PhysxSchema.PhysxConvexDecompositionCollisionAPI.Apply(
403
+ prim
404
+ )
405
+ )
406
+ physx_conv_api.GetShrinkWrapAttr().Set(True)
407
+
408
+ api_schemas = prim.GetMetadata("apiSchemas")
409
+ if api_schemas is None:
410
+ api_schemas = Sdf.TokenListOp()
411
+
412
+ api_list = list(api_schemas.GetAddedOrExplicitItems())
413
+ for api in self.DEFAULT_BIND_APIS:
414
+ if api not in api_list:
415
+ api_list.append(api)
416
+
417
+ api_schemas.appendedItems = api_list
418
+ prim.SetMetadata("apiSchemas", api_schemas)
419
+
420
+ if self.rotate_wxyz is not None:
421
+ inner_prim = next(
422
+ p
423
+ for p in stage.GetDefaultPrim().GetChildren()
424
+ if p.IsA(UsdGeom.Xform)
425
+ )
426
+ xformable = UsdGeom.Xformable(inner_prim)
427
+ xformable.ClearXformOpOrder()
428
+ orient_op = xformable.AddOrientOp(UsdGeom.XformOp.PrecisionDouble)
429
+ orient_op.Set(Gf.Quatd(*self.rotate_wxyz))
430
+
431
+ layer.Save()
432
+ logger.info(f"Successfully converted {urdf_path} → {usd_path}")
433
+
434
+
435
+ class AssetConverterFactory:
436
+ """Factory class for creating asset converters based on target and source types."""
437
+
438
+ @staticmethod
439
+ def create(
440
+ target_type: AssetType, source_type: AssetType = "urdf", **kwargs
441
+ ) -> AssetConverterBase:
442
+ """Create an asset converter instance based on target and source types."""
443
+ if target_type == AssetType.MJCF and source_type == AssetType.URDF:
444
+ converter = MeshtoMJCFConverter(**kwargs)
445
+ elif target_type == AssetType.USD and source_type == AssetType.URDF:
446
+ converter = URDFtoUSDConverter(**kwargs)
447
+ elif target_type == AssetType.USD and source_type == AssetType.MESH:
448
+ converter = MeshtoUSDConverter(**kwargs)
449
+ else:
450
+ raise ValueError(
451
+ f"Unsupported converter type: {source_type} -> {target_type}."
452
+ )
453
+
454
+ return converter
455
+
456
+
457
+ if __name__ == "__main__":
458
+ # target_asset_type = AssetType.MJCF
459
+ target_asset_type = AssetType.USD
460
+
461
+ urdf_paths = [
462
+ "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf",
463
+ ]
464
+
465
+ if target_asset_type == AssetType.MJCF:
466
+ output_files = [
467
+ "outputs/embodiedgen_assets/demo_assets/remote_control/mjcf/remote_control.mjcf",
468
+ ]
469
+ asset_converter = AssetConverterFactory.create(
470
+ target_type=AssetType.MJCF,
471
+ source_type=AssetType.URDF,
472
+ )
473
+
474
+ elif target_asset_type == AssetType.USD:
475
+ output_files = [
476
+ "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd",
477
+ ]
478
+ asset_converter = AssetConverterFactory.create(
479
+ target_type=AssetType.USD,
480
+ source_type=AssetType.MESH,
481
+ )
482
+
483
+ with asset_converter:
484
+ for urdf_path, output_file in zip(urdf_paths, output_files):
485
+ asset_converter.convert(urdf_path, output_file)
486
+
487
+ # urdf_path = "outputs/embodiedgen_assets/demo_assets/remote_control/result/remote_control.urdf"
488
+ # output_file = "outputs/embodiedgen_assets/demo_assets/remote_control/usd/remote_control.usd"
489
+
490
+ # asset_converter = AssetConverterFactory.create(
491
+ # target_type=AssetType.USD,
492
+ # source_type=AssetType.URDF,
493
+ # rotate_wxyz=(0.7071, 0.7071, 0, 0), # rotate 90 deg around the X-axis
494
+ # )
495
+
496
+ # with asset_converter:
497
+ # asset_converter.convert(urdf_path, output_file)
embodied_gen/data/backproject_v2.py CHANGED
@@ -545,7 +545,7 @@ def parse_args():
545
  "--color_path",
546
  nargs="+",
547
  type=str,
548
- help="Multiview color image in 6x512x512 file paths",
549
  )
550
  parser.add_argument(
551
  "--mesh_path",
@@ -625,7 +625,7 @@ def parse_args():
625
  action="store_true",
626
  help="Disable saving delight image",
627
  )
628
-
629
  args, unknown = parser.parse_known_args()
630
 
631
  return args
@@ -687,6 +687,14 @@ def entrypoint(
687
  num_views=1000,
688
  norm_mesh_ratio=0.5,
689
  )
 
 
 
 
 
 
 
 
690
  # Restore scale.
691
  mesh.vertices = mesh.vertices / scale
692
  mesh.vertices = mesh.vertices + center
 
545
  "--color_path",
546
  nargs="+",
547
  type=str,
548
+ help="Multiview color image in grid file paths",
549
  )
550
  parser.add_argument(
551
  "--mesh_path",
 
625
  action="store_true",
626
  help="Disable saving delight image",
627
  )
628
+ parser.add_argument("--n_max_faces", type=int, default=30000)
629
  args, unknown = parser.parse_known_args()
630
 
631
  return args
 
687
  num_views=1000,
688
  norm_mesh_ratio=0.5,
689
  )
690
+ if len(mesh.faces) > args.n_max_faces:
691
+ mesh.vertices, mesh.faces = mesh_fixer(
692
+ filter_ratio=0.8,
693
+ max_hole_size=0.04,
694
+ resolution=1024,
695
+ num_views=1000,
696
+ norm_mesh_ratio=0.5,
697
+ )
698
  # Restore scale.
699
  mesh.vertices = mesh.vertices / scale
700
  mesh.vertices = mesh.vertices + center
embodied_gen/envs/pick_embodiedgen.py CHANGED
@@ -16,7 +16,6 @@
16
 
17
  import json
18
  import os
19
- from copy import deepcopy
20
 
21
  import numpy as np
22
  import sapien
@@ -26,6 +25,7 @@ from mani_skill.envs.sapien_env import BaseEnv
26
  from mani_skill.sensors.camera import CameraConfig
27
  from mani_skill.utils import sapien_utils
28
  from mani_skill.utils.building import actors
 
29
  from mani_skill.utils.registration import register_env
30
  from mani_skill.utils.structs.actor import Actor
31
  from mani_skill.utils.structs.pose import Pose
@@ -78,6 +78,14 @@ class PickEmbodiedGen(BaseEnv):
78
  # Add small offset in z-axis to avoid collision.
79
  self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
80
  self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
 
 
 
 
 
 
 
 
81
 
82
  self.layouts = self.init_env_layouts(
83
  layout_file, num_envs, replace_objs
@@ -106,22 +114,30 @@ class PickEmbodiedGen(BaseEnv):
106
  def init_env_layouts(
107
  layout_file: str, num_envs: int, replace_objs: bool
108
  ) -> list[LayoutInfo]:
109
- layout = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
110
  layouts = []
111
  for env_idx in range(num_envs):
112
  if replace_objs and env_idx > 0:
113
- layout = bfs_placement(deepcopy(layout))
114
- layouts.append(layout)
 
 
 
 
 
 
 
 
115
 
116
  return layouts
117
 
118
  @staticmethod
119
  def compute_robot_init_pose(
120
- layouts: list[LayoutInfo], num_envs: int, z_offset: float = 0.0
121
  ) -> list[list[float]]:
122
  robot_pose = []
123
  for env_idx in range(num_envs):
124
- layout = layouts[env_idx]
 
125
  robot_node = layout.relation[Scene3DItemEnum.ROBOT.value]
126
  x, y, z, qx, qy, qz, qw = layout.position[robot_node]
127
  robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz])
@@ -154,25 +170,32 @@ class PickEmbodiedGen(BaseEnv):
154
  @property
155
  def _default_human_render_camera_configs(self):
156
  pose = sapien_utils.look_at(
157
- eye=[0.9, 0.0, 1.1], target=[0.0, 0.0, 0.9]
 
158
  )
159
 
160
  return CameraConfig(
161
- "render_camera", pose, 256, 256, np.deg2rad(75), 0.01, 100
 
 
 
 
 
 
162
  )
163
 
164
  def _load_agent(self, options: dict):
 
165
  super()._load_agent(options, sapien.Pose(p=[-10, 0, 10]))
166
 
167
  def _load_scene(self, options: dict):
168
  all_objects = []
169
- logger.info(f"Loading assets and decomposition mesh collisions...")
170
  for env_idx in range(self.num_envs):
171
  env_actors = load_assets_from_layout_file(
172
  self.scene,
173
  self.layouts[env_idx],
174
  z_offset=self.objs_z_offset,
175
- init_quat=self.init_quat,
176
  env_idx=env_idx,
177
  )
178
  self.env_actors[f"env{env_idx}"] = env_actors
@@ -229,7 +252,7 @@ class PickEmbodiedGen(BaseEnv):
229
  self.agent.controller.controllers["gripper"].reset()
230
 
231
  def render_gs3d_images(
232
- self, layouts: list[LayoutInfo], num_envs: int, init_quat: list[float]
233
  ) -> dict[str, np.ndarray]:
234
  sim_coord_align = (
235
  torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device)
@@ -237,12 +260,18 @@ class PickEmbodiedGen(BaseEnv):
237
  cameras = self.scene.sensors.copy()
238
  cameras.update(self.scene.human_render_cameras)
239
 
240
- bg_node = layouts[0].relation[Scene3DItemEnum.BACKGROUND.value]
241
- gs_path = os.path.join(layouts[0].assets[bg_node], "gs_model.ply")
 
 
 
 
 
242
  raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
243
  bg_images = dict()
244
  for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"):
245
- layout = layouts[env_idx]
 
246
  x, y, z, qx, qy, qz, qw = layout.position[bg_node]
247
  qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat)
248
  init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])
 
16
 
17
  import json
18
  import os
 
19
 
20
  import numpy as np
21
  import sapien
 
25
  from mani_skill.sensors.camera import CameraConfig
26
  from mani_skill.utils import sapien_utils
27
  from mani_skill.utils.building import actors
28
+ from mani_skill.utils.building.ground import build_ground
29
  from mani_skill.utils.registration import register_env
30
  from mani_skill.utils.structs.actor import Actor
31
  from mani_skill.utils.structs.pose import Pose
 
78
  # Add small offset in z-axis to avoid collision.
79
  self.objs_z_offset = kwargs.pop("objs_z_offset", 0.002)
80
  self.robot_z_offset = kwargs.pop("robot_z_offset", 0.002)
81
+ self.camera_cfg = kwargs.pop("camera_cfg", None)
82
+ if self.camera_cfg is None:
83
+ self.camera_cfg = dict(
84
+ camera_eye=[0.9, 0.0, 1.1],
85
+ camera_target_pt=[0.0, 0.0, 0.9],
86
+ image_hw=[256, 256],
87
+ fovy_deg=75,
88
+ )
89
 
90
  self.layouts = self.init_env_layouts(
91
  layout_file, num_envs, replace_objs
 
114
  def init_env_layouts(
115
  layout_file: str, num_envs: int, replace_objs: bool
116
  ) -> list[LayoutInfo]:
 
117
  layouts = []
118
  for env_idx in range(num_envs):
119
  if replace_objs and env_idx > 0:
120
+ layout_info = bfs_placement(layout_file)
121
+ else:
122
+ layout_info = json.load(open(layout_file, "r"))
123
+ layout_info = LayoutInfo.from_dict(layout_info)
124
+
125
+ layout_path = layout_file.replace(".json", f"_env{env_idx}.json")
126
+ with open(layout_path, "w") as f:
127
+ json.dump(layout_info.to_dict(), f, indent=4)
128
+
129
+ layouts.append(layout_path)
130
 
131
  return layouts
132
 
133
  @staticmethod
134
  def compute_robot_init_pose(
135
+ layouts: list[str], num_envs: int, z_offset: float = 0.0
136
  ) -> list[list[float]]:
137
  robot_pose = []
138
  for env_idx in range(num_envs):
139
+ layout = json.load(open(layouts[env_idx], "r"))
140
+ layout = LayoutInfo.from_dict(layout)
141
  robot_node = layout.relation[Scene3DItemEnum.ROBOT.value]
142
  x, y, z, qx, qy, qz, qw = layout.position[robot_node]
143
  robot_pose.append([x, y, z + z_offset, qw, qx, qy, qz])
 
170
  @property
171
  def _default_human_render_camera_configs(self):
172
  pose = sapien_utils.look_at(
173
+ eye=self.camera_cfg["camera_eye"],
174
+ target=self.camera_cfg["camera_target_pt"],
175
  )
176
 
177
  return CameraConfig(
178
+ "render_camera",
179
+ pose,
180
+ self.camera_cfg["image_hw"][1],
181
+ self.camera_cfg["image_hw"][0],
182
+ np.deg2rad(self.camera_cfg["fovy_deg"]),
183
+ 0.01,
184
+ 100,
185
  )
186
 
187
  def _load_agent(self, options: dict):
188
+ self.ground = build_ground(self.scene)
189
  super()._load_agent(options, sapien.Pose(p=[-10, 0, 10]))
190
 
191
  def _load_scene(self, options: dict):
192
  all_objects = []
193
+ logger.info(f"Loading EmbodiedGen assets...")
194
  for env_idx in range(self.num_envs):
195
  env_actors = load_assets_from_layout_file(
196
  self.scene,
197
  self.layouts[env_idx],
198
  z_offset=self.objs_z_offset,
 
199
  env_idx=env_idx,
200
  )
201
  self.env_actors[f"env{env_idx}"] = env_actors
 
252
  self.agent.controller.controllers["gripper"].reset()
253
 
254
  def render_gs3d_images(
255
+ self, layouts: list[str], num_envs: int, init_quat: list[float]
256
  ) -> dict[str, np.ndarray]:
257
  sim_coord_align = (
258
  torch.tensor(SIM_COORD_ALIGN).to(torch.float32).to(self.device)
 
260
  cameras = self.scene.sensors.copy()
261
  cameras.update(self.scene.human_render_cameras)
262
 
263
+ # Preload the background Gaussian Splatting model.
264
+ asset_root = os.path.dirname(layouts[0])
265
+ layout = LayoutInfo.from_dict(json.load(open(layouts[0], "r")))
266
+ bg_node = layout.relation[Scene3DItemEnum.BACKGROUND.value]
267
+ gs_path = os.path.join(
268
+ asset_root, layout.assets[bg_node], "gs_model.ply"
269
+ )
270
  raw_gs: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
271
  bg_images = dict()
272
  for env_idx in tqdm(range(num_envs), desc="Pre-rendering Background"):
273
+ layout = json.load(open(layouts[env_idx], "r"))
274
+ layout = LayoutInfo.from_dict(layout)
275
  x, y, z, qx, qy, qz, qw = layout.position[bg_node]
276
  qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], init_quat)
277
  init_pose = torch.tensor([x, y, z, qx, qy, qz, qw])
embodied_gen/models/gs_model.py CHANGED
@@ -39,6 +39,8 @@ __all__ = [
39
  "GaussianOperator",
40
  ]
41
 
 
 
42
 
43
  @dataclass
44
  class RenderResult:
@@ -210,9 +212,7 @@ class GaussianBase:
210
  device=device,
211
  )
212
 
213
- def save_to_ply(
214
- self, path: str, colors: torch.Tensor = None, enable_mask: bool = False
215
- ):
216
  os.makedirs(os.path.dirname(path), exist_ok=True)
217
  numpy_data = self.get_numpy_data()
218
  means = numpy_data["_means"]
@@ -249,7 +249,6 @@ class GaussianBase:
249
  shN = shN[~invalid_mask]
250
 
251
  num_points = means.shape[0]
252
-
253
  with open(path, "wb") as f:
254
  # Write PLY header
255
  f.write(b"ply\n")
@@ -258,18 +257,11 @@ class GaussianBase:
258
  f.write(b"property float x\n")
259
  f.write(b"property float y\n")
260
  f.write(b"property float z\n")
261
- f.write(b"property float nx\n")
262
- f.write(b"property float ny\n")
263
- f.write(b"property float nz\n")
264
-
265
- if colors is not None:
266
- for j in range(colors.shape[1]):
267
- f.write(f"property float f_dc_{j}\n".encode())
268
- else:
269
- for i, data in enumerate([sh0, shN]):
270
- prefix = "f_dc" if i == 0 else "f_rest"
271
- for j in range(data.shape[1]):
272
- f.write(f"property float {prefix}_{j}\n".encode())
273
 
274
  f.write(b"property float opacity\n")
275
 
@@ -283,24 +275,19 @@ class GaussianBase:
283
  # Write vertex data
284
  for i in range(num_points):
285
  f.write(struct.pack("<fff", *means[i])) # x, y, z
286
- f.write(struct.pack("<fff", 0, 0, 0)) # nx, ny, nz (zeros)
287
 
288
- if colors is not None:
289
- color = colors.detach().cpu().numpy()
290
- for j in range(color.shape[1]):
291
- f_dc = (color[i, j] - 0.5) / 0.2820947917738781
292
- f.write(struct.pack("<f", f_dc))
293
- else:
294
- for data in [sh0, shN]:
295
- for j in range(data.shape[1]):
296
- f.write(struct.pack("<f", data[i, j]))
297
 
298
- f.write(struct.pack("<f", opacities[i])) # opacity
299
 
300
  for data in [scales, quats]:
301
  for j in range(data.shape[1]):
302
  f.write(struct.pack("<f", data[i, j]))
303
 
 
 
304
 
305
  @dataclass
306
  class GaussianOperator(GaussianBase):
@@ -508,8 +495,8 @@ class GaussianOperator(GaussianBase):
508
 
509
 
510
  if __name__ == "__main__":
511
- input_gs = "outputs/test/debug.ply"
512
- output_gs = "./debug_v3.ply"
513
  gs_model: GaussianOperator = GaussianOperator.load_from_ply(input_gs)
514
 
515
  # 绕 x 轴旋转 180°
 
39
  "GaussianOperator",
40
  ]
41
 
42
+ SH_C0 = 0.2820947917738781
43
+
44
 
45
  @dataclass
46
  class RenderResult:
 
212
  device=device,
213
  )
214
 
215
+ def save_to_ply(self, path: str, enable_mask: bool = False) -> None:
 
 
216
  os.makedirs(os.path.dirname(path), exist_ok=True)
217
  numpy_data = self.get_numpy_data()
218
  means = numpy_data["_means"]
 
249
  shN = shN[~invalid_mask]
250
 
251
  num_points = means.shape[0]
 
252
  with open(path, "wb") as f:
253
  # Write PLY header
254
  f.write(b"ply\n")
 
257
  f.write(b"property float x\n")
258
  f.write(b"property float y\n")
259
  f.write(b"property float z\n")
260
+
261
+ for i, data in enumerate([sh0, shN]):
262
+ prefix = "f_dc" if i == 0 else "f_rest"
263
+ for j in range(data.shape[1]):
264
+ f.write(f"property float {prefix}_{j}\n".encode())
 
 
 
 
 
 
 
265
 
266
  f.write(b"property float opacity\n")
267
 
 
275
  # Write vertex data
276
  for i in range(num_points):
277
  f.write(struct.pack("<fff", *means[i])) # x, y, z
 
278
 
279
+ for data in [sh0, shN]:
280
+ for j in range(data.shape[1]):
281
+ f.write(struct.pack("<f", data[i, j]))
 
 
 
 
 
 
282
 
283
+ f.write(struct.pack("<f", opacities[i].item())) # opacity
284
 
285
  for data in [scales, quats]:
286
  for j in range(data.shape[1]):
287
  f.write(struct.pack("<f", data[i, j]))
288
 
289
+ return
290
+
291
 
292
  @dataclass
293
  class GaussianOperator(GaussianBase):
 
495
 
496
 
497
  if __name__ == "__main__":
498
+ input_gs = "outputs/layouts_gens_demo/task_0000/background/gs_model.ply"
499
+ output_gs = "./gs_model.ply"
500
  gs_model: GaussianOperator = GaussianOperator.load_from_ply(input_gs)
501
 
502
  # 绕 x 轴旋转 180°
embodied_gen/scripts/compose_layout.py CHANGED
@@ -50,10 +50,7 @@ def entrypoint(**kwargs):
50
  out_scene_path = f"{output_dir}/Iscene.glb"
51
  out_layout_path = f"{output_dir}/layout.json"
52
 
53
- with open(args.layout_path, "r") as f:
54
- layout_info = LayoutInfo.from_dict(json.load(f))
55
-
56
- layout_info = bfs_placement(layout_info, seed=args.seed)
57
  with open(out_layout_path, "w") as f:
58
  json.dump(layout_info.to_dict(), f, indent=4)
59
 
@@ -63,7 +60,7 @@ def entrypoint(**kwargs):
63
  sim_cli(
64
  layout_path=out_layout_path,
65
  output_dir=output_dir,
66
- robot_name="franka" if args.insert_robot else None,
67
  )
68
 
69
  logger.info(f"Layout placement completed in {output_dir}")
 
50
  out_scene_path = f"{output_dir}/Iscene.glb"
51
  out_layout_path = f"{output_dir}/layout.json"
52
 
53
+ layout_info = bfs_placement(args.layout_path, seed=args.seed)
 
 
 
54
  with open(out_layout_path, "w") as f:
55
  json.dump(layout_info.to_dict(), f, indent=4)
56
 
 
60
  sim_cli(
61
  layout_path=out_layout_path,
62
  output_dir=output_dir,
63
+ insert_robot=args.insert_robot,
64
  )
65
 
66
  logger.info(f"Layout placement completed in {output_dir}")
embodied_gen/scripts/gen_layout.py CHANGED
@@ -119,11 +119,15 @@ def entrypoint() -> None:
119
  match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
120
  bg_save_dir = os.path.join(output_root, "background")
121
  copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
122
- layout_info.assets[bg_node] = bg_save_dir
123
 
124
  # BFS layout placement.
 
 
 
 
125
  layout_info = bfs_placement(
126
- layout_info,
127
  limit_reach_range=True if args.insert_robot else False,
128
  seed=args.seed_layout,
129
  )
 
119
  match_scene_path = f"{os.path.dirname(args.bg_list)}/{match_key}"
120
  bg_save_dir = os.path.join(output_root, "background")
121
  copytree(match_scene_path, bg_save_dir, dirs_exist_ok=True)
122
+ layout_info.assets[bg_node] = "background"
123
 
124
  # BFS layout placement.
125
+ layout_path = f"{output_root}/layout.json"
126
+ with open(layout_path, "w") as f:
127
+ json.dump(layout_info.to_dict(), f, indent=4)
128
+
129
  layout_info = bfs_placement(
130
+ layout_path,
131
  limit_reach_range=True if args.insert_robot else False,
132
  seed=args.seed_layout,
133
  )
embodied_gen/scripts/imageto3d.py CHANGED
@@ -220,8 +220,8 @@ def entrypoint(**kwargs):
220
  )
221
 
222
  color_img = Image.open(color_path)
223
- half_height = int(color_img.height * 2 / 3)
224
- crop_img = color_img.crop((0, 0, color_img.width, half_height))
225
  geo_flag, geo_result = GEO_CHECKER([crop_img], text=asset_node)
226
  logger.warning(
227
  f"{GEO_CHECKER.__class__.__name__}: {geo_result} for {seg_path}"
 
220
  )
221
 
222
  color_img = Image.open(color_path)
223
+ keep_height = int(color_img.height * 2 / 3)
224
+ crop_img = color_img.crop((0, 0, color_img.width, keep_height))
225
  geo_flag, geo_result = GEO_CHECKER([crop_img], text=asset_node)
226
  logger.warning(
227
  f"{GEO_CHECKER.__class__.__name__}: {geo_result} for {seg_path}"
embodied_gen/scripts/parallel_sim.py CHANGED
@@ -20,7 +20,7 @@ from embodied_gen.utils.monkey_patches import monkey_patch_maniskill
20
  monkey_patch_maniskill()
21
  import json
22
  from collections import defaultdict
23
- from dataclasses import dataclass
24
  from typing import Literal
25
 
26
  import gymnasium as gym
@@ -69,6 +69,18 @@ class ParallelSimConfig:
69
  reach_target_only: bool = True
70
  """Whether to only reach target without full action"""
71
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  def entrypoint(**kwargs):
74
  if kwargs is None or len(kwargs) == 0:
@@ -83,6 +95,12 @@ def entrypoint(**kwargs):
83
  enable_shadow=cfg.enable_shadow,
84
  layout_file=cfg.layout_file,
85
  control_mode=cfg.control_mode,
 
 
 
 
 
 
86
  )
87
  env = RecordEpisode(
88
  env,
 
20
  monkey_patch_maniskill()
21
  import json
22
  from collections import defaultdict
23
+ from dataclasses import dataclass, field
24
  from typing import Literal
25
 
26
  import gymnasium as gym
 
69
  reach_target_only: bool = True
70
  """Whether to only reach target without full action"""
71
 
72
+ # Camera settings
73
+ camera_eye: list[float] = field(default_factory=lambda: [0.9, 0.0, 1.1])
74
+ """Camera eye position [x, y, z] in global coordiante system"""
75
+ camera_target_pt: list[float] = field(
76
+ default_factory=lambda: [0.0, 0.0, 0.9]
77
+ )
78
+ """Camera target(look-at) point [x, y, z] in global coordiante system"""
79
+ image_hw: list[int] = field(default_factory=lambda: [256, 256])
80
+ """Rendered image height and width [height, width]"""
81
+ fovy_deg: float = 75
82
+ """Camera vertical field of view in degrees"""
83
+
84
 
85
  def entrypoint(**kwargs):
86
  if kwargs is None or len(kwargs) == 0:
 
95
  enable_shadow=cfg.enable_shadow,
96
  layout_file=cfg.layout_file,
97
  control_mode=cfg.control_mode,
98
+ camera_cfg=dict(
99
+ camera_eye=cfg.camera_eye,
100
+ camera_target_pt=cfg.camera_target_pt,
101
+ image_hw=cfg.image_hw,
102
+ fovy_deg=cfg.fovy_deg,
103
+ ),
104
  )
105
  env = RecordEpisode(
106
  env,
embodied_gen/scripts/simulate_sapien.py CHANGED
@@ -91,17 +91,15 @@ def entrypoint(**kwargs):
91
  fovy_deg=cfg.fovy_deg,
92
  )
93
  with open(cfg.layout_path, "r") as f:
94
- layout_data = json.load(f)
95
- layout_data: LayoutInfo = LayoutInfo.from_dict(layout_data)
96
 
97
  actors = load_assets_from_layout_file(
98
  scene_manager.scene,
99
- layout_data,
100
  cfg.z_offset,
101
- cfg.init_quat,
102
  )
103
  agent = load_mani_skill_robot(
104
- scene_manager.scene, layout_data, cfg.control_freq
105
  )
106
 
107
  frames = defaultdict(list)
@@ -134,8 +132,9 @@ def entrypoint(**kwargs):
134
  if "Foreground" not in cfg.render_keys:
135
  return
136
 
 
137
  bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value]
138
- gs_path = f"{layout_data.assets[bg_node]}/gs_model.ply"
139
  gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
140
  x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
141
  qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)
@@ -170,7 +169,8 @@ def entrypoint(**kwargs):
170
  for node in actions:
171
  if actions[node] is None:
172
  continue
173
- for action in tqdm(actions[node]):
 
174
  grasp_frames = scene_manager.step_action(
175
  agent,
176
  torch.Tensor(action[None, ...]),
 
91
  fovy_deg=cfg.fovy_deg,
92
  )
93
  with open(cfg.layout_path, "r") as f:
94
+ layout_data: LayoutInfo = LayoutInfo.from_dict(json.load(f))
 
95
 
96
  actors = load_assets_from_layout_file(
97
  scene_manager.scene,
98
+ cfg.layout_path,
99
  cfg.z_offset,
 
100
  )
101
  agent = load_mani_skill_robot(
102
+ scene_manager.scene, cfg.layout_path, cfg.control_freq
103
  )
104
 
105
  frames = defaultdict(list)
 
132
  if "Foreground" not in cfg.render_keys:
133
  return
134
 
135
+ asset_root = os.path.dirname(cfg.layout_path)
136
  bg_node = layout_data.relation[Scene3DItemEnum.BACKGROUND.value]
137
+ gs_path = f"{asset_root}/{layout_data.assets[bg_node]}/gs_model.ply"
138
  gs_model: GaussianOperator = GaussianOperator.load_from_ply(gs_path)
139
  x, y, z, qx, qy, qz, qw = layout_data.position[bg_node]
140
  qx, qy, qz, qw = quaternion_multiply([qx, qy, qz, qw], cfg.init_quat)
 
169
  for node in actions:
170
  if actions[node] is None:
171
  continue
172
+ logger.info(f"Render SIM grasping in camera {idx} for {node}...")
173
+ for action in actions[node]:
174
  grasp_frames = scene_manager.step_action(
175
  agent,
176
  torch.Tensor(action[None, ...]),
embodied_gen/scripts/textto3d.py CHANGED
@@ -187,7 +187,7 @@ def text_to_3d(**kwargs) -> dict:
187
  logger.warning(
188
  f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}"
189
  )
190
- results["assets"][node] = f"{node_save_dir}/result"
191
  results["quality"][node] = qa_result
192
 
193
  if qa_flag is None or qa_flag is True:
 
187
  logger.warning(
188
  f"Node {node}, {TXTGEN_CHECKER.__class__.__name__}: {qa_result}"
189
  )
190
+ results["assets"][node] = f"asset3d/{save_node}/result"
191
  results["quality"][node] = qa_result
192
 
193
  if qa_flag is None or qa_flag is True:
embodied_gen/utils/geometry.py CHANGED
@@ -14,6 +14,7 @@
14
  # implied. See the License for the specific language governing
15
  # permissions and limitations under the License.
16
 
 
17
  import os
18
  import random
19
  from collections import defaultdict, deque
@@ -32,7 +33,6 @@ from embodied_gen.utils.enum import LayoutInfo, Scene3DItemEnum
32
  from embodied_gen.utils.log import logger
33
 
34
  __all__ = [
35
- "bfs_placement",
36
  "with_seed",
37
  "matrix_to_pose",
38
  "pose_to_matrix",
@@ -222,7 +222,7 @@ def check_reachable(
222
 
223
  @with_seed("seed")
224
  def bfs_placement(
225
- layout_info: LayoutInfo,
226
  floor_margin: float = 0,
227
  beside_margin: float = 0.1,
228
  max_attempts: int = 3000,
@@ -232,6 +232,8 @@ def bfs_placement(
232
  robot_dim: float = 0.12,
233
  seed: int = None,
234
  ) -> LayoutInfo:
 
 
235
  object_mapping = layout_info.objs_mapping
236
  position = {} # node: [x, y, z, qx, qy, qz, qw]
237
  parent_bbox_xy = {}
@@ -254,6 +256,7 @@ def bfs_placement(
254
  mesh_path = (
255
  f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj"
256
  )
 
257
  mesh_info[node]["path"] = mesh_path
258
  mesh = trimesh.load(mesh_path)
259
  vertices = mesh.vertices
@@ -282,10 +285,9 @@ def bfs_placement(
282
  # For manipulated and distractor objects, apply random rotation
283
  angle_rad = np.random.uniform(0, 2 * np.pi)
284
  object_quat = compute_axis_rotation_quat(
285
- axis="y", angle_rad=angle_rad
286
  )
287
- object_quat_scipy = np.roll(object_quat, 1) # [w, x, y, z]
288
- rotation = R.from_quat(object_quat_scipy).as_matrix()
289
  vertices = np.dot(mesh.vertices, rotation.T)
290
  z1 = np.percentile(vertices[:, 1], 1)
291
  z2 = np.percentile(vertices[:, 1], 99)
 
14
  # implied. See the License for the specific language governing
15
  # permissions and limitations under the License.
16
 
17
+ import json
18
  import os
19
  import random
20
  from collections import defaultdict, deque
 
33
  from embodied_gen.utils.log import logger
34
 
35
  __all__ = [
 
36
  "with_seed",
37
  "matrix_to_pose",
38
  "pose_to_matrix",
 
222
 
223
  @with_seed("seed")
224
  def bfs_placement(
225
+ layout_file: str,
226
  floor_margin: float = 0,
227
  beside_margin: float = 0.1,
228
  max_attempts: int = 3000,
 
232
  robot_dim: float = 0.12,
233
  seed: int = None,
234
  ) -> LayoutInfo:
235
+ layout_info = LayoutInfo.from_dict(json.load(open(layout_file, "r")))
236
+ asset_dir = os.path.dirname(layout_file)
237
  object_mapping = layout_info.objs_mapping
238
  position = {} # node: [x, y, z, qx, qy, qz, qw]
239
  parent_bbox_xy = {}
 
256
  mesh_path = (
257
  f"{layout_info.assets[node]}/mesh/{node.replace(' ', '_')}.obj"
258
  )
259
+ mesh_path = os.path.join(asset_dir, mesh_path)
260
  mesh_info[node]["path"] = mesh_path
261
  mesh = trimesh.load(mesh_path)
262
  vertices = mesh.vertices
 
285
  # For manipulated and distractor objects, apply random rotation
286
  angle_rad = np.random.uniform(0, 2 * np.pi)
287
  object_quat = compute_axis_rotation_quat(
288
+ axis="z", angle_rad=angle_rad
289
  )
290
+ rotation = R.from_quat(object_quat).as_matrix()
 
291
  vertices = np.dot(mesh.vertices, rotation.T)
292
  z1 = np.percentile(vertices[:, 1], 1)
293
  z2 = np.percentile(vertices[:, 1], 99)
embodied_gen/utils/monkey_patches.py CHANGED
@@ -175,7 +175,7 @@ def monkey_patch_maniskill():
175
  seg_labels = camera.get_obs(
176
  rgb=False, depth=False, segmentation=True, position=False
177
  )["segmentation"]
178
- masks = np.where((seg_labels.cpu() > 0), 255, 0).astype(
179
  np.uint8
180
  )
181
  masks = torch.tensor(masks).to(color.device)
 
175
  seg_labels = camera.get_obs(
176
  rgb=False, depth=False, segmentation=True, position=False
177
  )["segmentation"]
178
+ masks = np.where((seg_labels.cpu() > 1), 255, 0).astype(
179
  np.uint8
180
  )
181
  masks = torch.tensor(masks).to(color.device)
embodied_gen/utils/simulation.py CHANGED
@@ -15,7 +15,6 @@
15
  # permissions and limitations under the License.
16
 
17
  import json
18
- import logging
19
  import os
20
  import xml.etree.ElementTree as ET
21
  from collections import defaultdict
@@ -62,32 +61,48 @@ __all__ = [
62
 
63
 
64
  def load_actor_from_urdf(
65
- scene: ManiSkillScene | sapien.Scene,
66
  file_path: str,
67
- pose: sapien.Pose,
68
  env_idx: int = None,
69
  use_static: bool = False,
70
  update_mass: bool = False,
 
71
  ) -> sapien.pysapien.Entity:
 
 
 
 
 
 
 
 
 
 
72
  tree = ET.parse(file_path)
73
  root = tree.getroot()
74
  node_name = root.get("name")
75
  file_dir = os.path.dirname(file_path)
76
 
77
- visual_mesh = root.find('.//visual/geometry/mesh')
78
  visual_file = visual_mesh.get("filename")
79
  visual_scale = visual_mesh.get("scale", "1.0 1.0 1.0")
80
  visual_scale = np.array([float(x) for x in visual_scale.split()])
 
81
 
82
- collision_mesh = root.find('.//collision/geometry/mesh')
83
  collision_file = collision_mesh.get("filename")
84
  collision_scale = collision_mesh.get("scale", "1.0 1.0 1.0")
85
  collision_scale = np.array([float(x) for x in collision_scale.split()])
 
 
 
 
86
 
87
  visual_file = os.path.join(file_dir, visual_file)
88
  collision_file = os.path.join(file_dir, collision_file)
89
- static_fric = root.find('.//collision/gazebo/mu1').text
90
- dynamic_fric = root.find('.//collision/gazebo/mu2').text
91
 
92
  material = physx.PhysxMaterial(
93
  static_friction=np.clip(float(static_fric), 0.1, 0.7),
@@ -106,17 +121,27 @@ def load_actor_from_urdf(
106
  # decomposition_params=dict(
107
  # threshold=0.05, max_convex_hull=64, verbose=False
108
  # ),
 
 
 
 
 
 
 
109
  )
 
 
110
 
111
- builder.add_visual_from_file(visual_file, scale=visual_scale)
112
  builder.set_initial_pose(pose)
113
  if isinstance(scene, ManiSkillScene) and env_idx is not None:
114
  builder.set_scene_idxs([env_idx])
115
 
116
- actor = builder.build(name=f"{node_name}-{env_idx}")
 
 
117
 
118
  if update_mass and hasattr(actor.components[1], "mass"):
119
- node_mass = float(root.find('.//inertial/mass').get("value"))
120
  actor.components[1].set_mass(node_mass)
121
 
122
  return actor
@@ -124,7 +149,7 @@ def load_actor_from_urdf(
124
 
125
  def load_assets_from_layout_file(
126
  scene: ManiSkillScene | sapien.Scene,
127
- layout: LayoutInfo | str,
128
  z_offset: float = 0.0,
129
  init_quat: list[float] = [0, 0, 0, 1],
130
  env_idx: int = None,
@@ -133,19 +158,18 @@ def load_assets_from_layout_file(
133
 
134
  Args:
135
  scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into.
136
- layout (LayoutInfo): The layout information data.
137
  z_offset (float): Offset to apply to the Z-coordinate of non-context objects.
138
  init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment.
139
  env_idx (int): Environment index for multi-environment setup.
140
  """
141
- if isinstance(layout, str) and layout.endswith(".json"):
142
- layout = LayoutInfo.from_dict(json.load(open(layout, "r")))
143
-
144
  actors = dict()
145
  for node in layout.assets:
146
  file_dir = layout.assets[node]
147
  file_name = f"{node.replace(' ', '_')}.urdf"
148
- urdf_file = os.path.join(file_dir, file_name)
149
 
150
  if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value:
151
  continue
 
15
  # permissions and limitations under the License.
16
 
17
  import json
 
18
  import os
19
  import xml.etree.ElementTree as ET
20
  from collections import defaultdict
 
61
 
62
 
63
  def load_actor_from_urdf(
64
+ scene: sapien.Scene | ManiSkillScene,
65
  file_path: str,
66
+ pose: sapien.Pose | None = None,
67
  env_idx: int = None,
68
  use_static: bool = False,
69
  update_mass: bool = False,
70
+ scale: float | np.ndarray = 1.0,
71
  ) -> sapien.pysapien.Entity:
72
+ def _get_local_pose(origin_tag: ET.Element | None) -> sapien.Pose:
73
+ local_pose = sapien.Pose(p=[0, 0, 0], q=[1, 0, 0, 0])
74
+ if origin_tag is not None:
75
+ xyz = list(map(float, origin_tag.get("xyz", "0 0 0").split()))
76
+ rpy = list(map(float, origin_tag.get("rpy", "0 0 0").split()))
77
+ qx, qy, qz, qw = R.from_euler("xyz", rpy, degrees=False).as_quat()
78
+ local_pose = sapien.Pose(p=xyz, q=[qw, qx, qy, qz])
79
+
80
+ return local_pose
81
+
82
  tree = ET.parse(file_path)
83
  root = tree.getroot()
84
  node_name = root.get("name")
85
  file_dir = os.path.dirname(file_path)
86
 
87
+ visual_mesh = root.find(".//visual/geometry/mesh")
88
  visual_file = visual_mesh.get("filename")
89
  visual_scale = visual_mesh.get("scale", "1.0 1.0 1.0")
90
  visual_scale = np.array([float(x) for x in visual_scale.split()])
91
+ visual_scale *= np.array(scale)
92
 
93
+ collision_mesh = root.find(".//collision/geometry/mesh")
94
  collision_file = collision_mesh.get("filename")
95
  collision_scale = collision_mesh.get("scale", "1.0 1.0 1.0")
96
  collision_scale = np.array([float(x) for x in collision_scale.split()])
97
+ collision_scale *= np.array(scale)
98
+
99
+ visual_pose = _get_local_pose(root.find(".//visual/origin"))
100
+ collision_pose = _get_local_pose(root.find(".//collision/origin"))
101
 
102
  visual_file = os.path.join(file_dir, visual_file)
103
  collision_file = os.path.join(file_dir, collision_file)
104
+ static_fric = root.find(".//collision/gazebo/mu1").text
105
+ dynamic_fric = root.find(".//collision/gazebo/mu2").text
106
 
107
  material = physx.PhysxMaterial(
108
  static_friction=np.clip(float(static_fric), 0.1, 0.7),
 
121
  # decomposition_params=dict(
122
  # threshold=0.05, max_convex_hull=64, verbose=False
123
  # ),
124
+ pose=collision_pose,
125
+ )
126
+
127
+ builder.add_visual_from_file(
128
+ visual_file,
129
+ scale=visual_scale,
130
+ pose=visual_pose,
131
  )
132
+ if pose is None:
133
+ pose = sapien.Pose(p=[0, 0, 0], q=[1, 0, 0, 0])
134
 
 
135
  builder.set_initial_pose(pose)
136
  if isinstance(scene, ManiSkillScene) and env_idx is not None:
137
  builder.set_scene_idxs([env_idx])
138
 
139
+ actor = builder.build(
140
+ name=node_name if env_idx is None else f"{node_name}-{env_idx}"
141
+ )
142
 
143
  if update_mass and hasattr(actor.components[1], "mass"):
144
+ node_mass = float(root.find(".//inertial/mass").get("value"))
145
  actor.components[1].set_mass(node_mass)
146
 
147
  return actor
 
149
 
150
  def load_assets_from_layout_file(
151
  scene: ManiSkillScene | sapien.Scene,
152
+ layout: str,
153
  z_offset: float = 0.0,
154
  init_quat: list[float] = [0, 0, 0, 1],
155
  env_idx: int = None,
 
158
 
159
  Args:
160
  scene (sapien.Scene | ManiSkillScene): The SAPIEN or ManiSkill scene to load assets into.
161
+ layout (str): The layout file path.
162
  z_offset (float): Offset to apply to the Z-coordinate of non-context objects.
163
  init_quat (List[float]): Initial quaternion (x, y, z, w) for orientation adjustment.
164
  env_idx (int): Environment index for multi-environment setup.
165
  """
166
+ asset_root = os.path.dirname(layout)
167
+ layout = LayoutInfo.from_dict(json.load(open(layout, "r")))
 
168
  actors = dict()
169
  for node in layout.assets:
170
  file_dir = layout.assets[node]
171
  file_name = f"{node.replace(' ', '_')}.urdf"
172
+ urdf_file = os.path.join(asset_root, file_dir, file_name)
173
 
174
  if layout.objs_mapping[node] == Scene3DItemEnum.BACKGROUND.value:
175
  continue
embodied_gen/utils/tags.py CHANGED
@@ -1 +1 @@
1
- VERSION = "v0.1.4"
 
1
+ VERSION = "v0.1.5"
embodied_gen/validators/urdf_convertor.py CHANGED
@@ -24,6 +24,7 @@ from xml.dom.minidom import parseString
24
 
25
  import numpy as np
26
  import trimesh
 
27
  from embodied_gen.data.convex_decomposer import decompose_convex_mesh
28
  from embodied_gen.utils.gpt_clients import GPT_CLIENT, GPTclient
29
  from embodied_gen.utils.process_media import render_asset3d
@@ -40,11 +41,13 @@ URDF_TEMPLATE = """
40
  <robot name="template_robot">
41
  <link name="template_link">
42
  <visual>
 
43
  <geometry>
44
  <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
45
  </geometry>
46
  </visual>
47
  <collision>
 
48
  <geometry>
49
  <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
50
  </geometry>
@@ -86,6 +89,7 @@ class URDFGenerator(object):
86
  render_dir: str = "urdf_renders",
87
  render_view_num: int = 4,
88
  decompose_convex: bool = False,
 
89
  ) -> None:
90
  if mesh_file_list is None:
91
  mesh_file_list = []
@@ -160,6 +164,8 @@ class URDFGenerator(object):
160
  ]
161
  self.attrs_name = attrs_name
162
  self.decompose_convex = decompose_convex
 
 
163
 
164
  def parse_response(self, response: str) -> dict[str, any]:
165
  lines = response.split("\n")
@@ -251,6 +257,14 @@ class URDFGenerator(object):
251
  raise ValueError("URDF template is missing 'link' element.")
252
  link.set("name", output_name)
253
 
 
 
 
 
 
 
 
 
254
  # Update visual geometry
255
  visual = link.find("visual/geometry/mesh")
256
  if visual is not None:
@@ -273,7 +287,11 @@ class URDFGenerator(object):
273
  decompose_convex_mesh(
274
  mesh_output_path, output_path, **d_params
275
  )
276
- collision_mesh = f"{self.output_mesh_dir}/{filename}"
 
 
 
 
277
  except Exception as e:
278
  logger.warning(
279
  f"Convex decomposition failed for {output_path}, {e}."
@@ -436,6 +454,7 @@ class URDFGenerator(object):
436
 
437
 
438
  if __name__ == "__main__":
 
439
  urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
440
  urdf_path = urdf_gen(
441
  mesh_path="outputs/layout2/asset3d/marker/result/mesh/marker.obj",
 
24
 
25
  import numpy as np
26
  import trimesh
27
+ from scipy.spatial.transform import Rotation
28
  from embodied_gen.data.convex_decomposer import decompose_convex_mesh
29
  from embodied_gen.utils.gpt_clients import GPT_CLIENT, GPTclient
30
  from embodied_gen.utils.process_media import render_asset3d
 
41
  <robot name="template_robot">
42
  <link name="template_link">
43
  <visual>
44
+ <origin xyz="0 0 0" rpy="0 0 0"/>
45
  <geometry>
46
  <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
47
  </geometry>
48
  </visual>
49
  <collision>
50
+ <origin xyz="0 0 0" rpy="0 0 0"/>
51
  <geometry>
52
  <mesh filename="mesh.obj" scale="1.0 1.0 1.0"/>
53
  </geometry>
 
89
  render_dir: str = "urdf_renders",
90
  render_view_num: int = 4,
91
  decompose_convex: bool = False,
92
+ rotate_xyzw: list[float] = (0.7071, 0, 0, 0.7071),
93
  ) -> None:
94
  if mesh_file_list is None:
95
  mesh_file_list = []
 
164
  ]
165
  self.attrs_name = attrs_name
166
  self.decompose_convex = decompose_convex
167
+ # Rotate 90 degrees around the X-axis from blender to align with simulators.
168
+ self.rotate_xyzw = rotate_xyzw
169
 
170
  def parse_response(self, response: str) -> dict[str, any]:
171
  lines = response.split("\n")
 
257
  raise ValueError("URDF template is missing 'link' element.")
258
  link.set("name", output_name)
259
 
260
+ if self.rotate_xyzw is not None:
261
+ rpy = Rotation.from_quat(self.rotate_xyzw).as_euler(
262
+ "xyz", degrees=False
263
+ )
264
+ rpy = [str(round(num, 4)) for num in rpy]
265
+ link.find("visual/origin").set("rpy", " ".join(rpy))
266
+ link.find("collision/origin").set("rpy", " ".join(rpy))
267
+
268
  # Update visual geometry
269
  visual = link.find("visual/geometry/mesh")
270
  if visual is not None:
 
287
  decompose_convex_mesh(
288
  mesh_output_path, output_path, **d_params
289
  )
290
+ obj_filename = filename.replace(".ply", ".obj")
291
+ trimesh.load(output_path).export(
292
+ f"{mesh_folder}/{obj_filename}"
293
+ )
294
+ collision_mesh = f"{self.output_mesh_dir}/{obj_filename}"
295
  except Exception as e:
296
  logger.warning(
297
  f"Convex decomposition failed for {output_path}, {e}."
 
454
 
455
 
456
  if __name__ == "__main__":
457
+ # Rotate 90 degrees around the X-axis to align with simulators.
458
  urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
459
  urdf_path = urdf_gen(
460
  mesh_path="outputs/layout2/asset3d/marker/result/mesh/marker.obj",