diff --git a/.gitattributes b/.gitattributes
index fc5a340abcf2e5202e33ecd96c02b09d67df318b..d4f40a3bb073a205f61f920e4d257f3de68ce09f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -32,6 +32,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
+*.db3 filter=lfs diff=lfs merge=lfs -text
+*.db3.zstd filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
*_model filter=lfs diff=lfs merge=lfs -text
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5935983ef50bbc0c982d34eefeb6ed796182b215
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc.py
@@ -0,0 +1,108 @@
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc
+
+import numpy as np
+
+from typing import Dict
+
+from kyonrlstepping.utils.sysutils import PathsGetter
+
+class B2WRhc(HybridQuadRhc):
+
+ def __init__(self,
+ srdf_path: str,
+ urdf_path: str,
+ robot_name: str, # used for shared memory namespaces
+ codegen_dir: str,
+ n_nodes: float = 31,
+ dt: float = 0.03,
+ injection_node: int = 10,
+ max_solver_iter = 1, # defaults to rt-iteration
+ open_loop: bool = True,
+ close_loop_all: bool = False,
+ dtype = np.float32,
+ verbose = False,
+ debug = False,
+ refs_in_hor_frame = True,
+ timeout_ms: int = 60000,
+ custom_opts: Dict = {}):
+
+ paths = PathsGetter()
+ self._files_suffix=""
+ if open_loop:
+ self._files_suffix="_open"
+ config_path=paths.RHCCONFIGPATH_B2W_CONTINUOUS+self._files_suffix+".yaml"
+
+ self._fix_wheels=False
+
+ super().__init__(srdf_path=srdf_path,
+ urdf_path=urdf_path,
+ config_path=config_path,
+ robot_name=robot_name, # used for shared memory namespaces
+ codegen_dir=codegen_dir,
+ n_nodes=n_nodes,
+ dt=dt,
+ injection_node=injection_node,
+ max_solver_iter=max_solver_iter, # defaults to rt-iteration
+ open_loop=open_loop,
+ close_loop_all=close_loop_all,
+ dtype=dtype,
+ verbose=verbose,
+ debug=debug,
+ refs_in_hor_frame=refs_in_hor_frame,
+ timeout_ms=timeout_ms,
+ custom_opts=custom_opts)
+
+ self._fail_idx_scale=1e-9
+ self._fail_idx_thresh_open_loop=1e0
+ self._fail_idx_thresh_closed_loop=10
+
+ if open_loop:
+ self._fail_idx_thresh=self._fail_idx_thresh_open_loop
+ else:
+ self._fail_idx_thresh=self._fail_idx_thresh_closed_loop
+
+ # adding some additional config files for jnt imp control
+ self._rhc_fpaths.append(paths.JNT_IMP_CONFIG_XBOT+".yaml")
+ self._rhc_fpaths.append(paths.JNT_IMP_CONFIG+".yaml")
+
+ def _set_rhc_pred_idx(self):
+ self._pred_node_idx=round((self._n_nodes-1)*2/3)
+
+ def _set_rhc_cmds_idx(self):
+ self._rhc_cmds_node_idx=2
+
+ def _config_override(self):
+ paths = PathsGetter()
+ # if ("control_wheels" in self._custom_opts) and \
+ # ("false" in self._custom_opts["control_wheels"] or \
+ # "False" in self._custom_opts["control_wheels"]):
+ # self.config_path = paths.RHCCONFIGPATH_B2W_NO_WHEELS+self._files_suffix+".yaml"
+ # self._fix_wheels=True
+
+ def _init_problem(self):
+
+ fixed_jnts_patterns=None
+ wheels_patterns=["foot_joint"]
+ if self._fix_wheels:
+ fixed_jnts_patterns=[]
+ fixed_jnts_patterns.append("foot_joint")
+ wheels_patterns=None
+
+ flight_duration_sec=0.4 # [s]
+ flight_duration=int(flight_duration_sec/self._dt)
+ post_flight_duration_sec=0.2 # [s]
+ post_flight_duration=int(post_flight_duration_sec/self._dt)
+
+ step_height=0.15
+ if ("step_height" in self._custom_opts):
+ step_height=self._custom_opts["step_height"]
+
+ super()._init_problem(fixed_jnt_patterns=fixed_jnts_patterns,
+ wheels_patterns=wheels_patterns,
+ foot_linkname="FL_foot",
+ flight_duration=flight_duration,
+ post_flight_stance=post_flight_duration,
+ step_height=step_height,
+ keep_yaw_vert=False,
+ phase_force_reg=2e-2,
+ vel_bounds_weight=1.0)
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc_continuous.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc_continuous.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22018d950c1c754514e76107e24adbf7c8aac0f3
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc_continuous.yaml
@@ -0,0 +1,335 @@
+solver:
+ type: ilqr
+ ipopt.linear_solver: ma57
+ ipopt.tol: 0.1
+# ilqr.merit_der_threshold: 1e-3
+# ilqr.defect_norm_threshold: 1e-3
+ ipopt.constr_viol_tol: 0.01
+ ilqr.constraint_violation_threshold: 1e-2
+# ipopt.hessian_approximation: exact
+ ipopt.print_level: 5
+ ipopt.suppress_all_output: 'yes'
+ ipopt.sb: 'yes'
+ ilqr.suppress_all_output: 'yes'
+ ilqr.codegen_enabled: true
+ ilqr.codegen_workdir: /tmp/tyhio
+ ilqr.enable_gn: true
+ ilqr.hxx_reg_base: 0.0
+ ilqr.n_threads: 0
+ print_time: 0
+
+constraints:
+ - FL_foot_ball
+ - FR_foot_ball
+ - RR_foot_ball
+ - RL_foot_ball
+
+costs:
+ - z_FL_foot_ball
+ - z_FR_foot_ball
+ - z_RR_foot_ball
+ - z_RL_foot_ball
+ # - vz_FL_foot_ball
+ # - vz_FR_foot_ball
+ # - vz_RR_foot_ball
+ # - vz_RL_foot_ball
+ # - xy_FL_foot_ball
+ # - xy_FR_foot_ball
+ # - xy_RR_foot_ball
+ # - xy_RL_foot_ball
+ # - vxy_FL_foot_ball
+ # - vxy_FR_foot_ball
+ # - vxy_RR_foot_ball
+ # - vxy_RL_foot_ball
+ - base_lin_velxy
+ - base_lin_velz
+ - base_omega
+ # - base_horizontal_capture
+ - base_capture
+ - joint_posture_capture
+ - v_reg
+ - a_reg
+
+#closed loop
+.define:
+ - &w_base_omega 20
+ - &w_base_vxy 15
+ - &w_base_vz 35
+ - &w_base_z 5
+ - &w_base_hor_capture 200.
+ - &w_base_capture 200.
+ - &w_contact_z 400.0
+ - &w_contact_vz 1000.0
+ - &w_contact_xy 80.0
+ - &w_contact_vxy 80.0
+ - &wheel_radius 0.1125
+
+base_lin_velxy:
+ type: Cartesian
+ distal_link: base
+ indices: [0, 1]
+ nodes: ${range(1, N-5)}
+ cartesian_type: velocity
+ weight: *w_base_vxy
+
+base_lin_velz:
+ type: Cartesian
+ distal_link: base
+ indices: [2]
+ nodes: ${range(1, N-5)}
+ cartesian_type: velocity
+ weight: *w_base_vz
+
+base_omega:
+ type: Cartesian
+ distal_link: base
+ indices: [3, 4, 5]
+ nodes: ${range(1, N-5)}
+ cartesian_type: velocity
+ weight: *w_base_omega
+
+base_capture:
+ type: Cartesian
+ distal_link: base
+ indices: [0, 1, 2, 3, 4, 5]
+ nodes: ${range(N-5, N+1)}
+ cartesian_type: velocity
+ weight: *w_base_capture
+
+base_horizontal_capture:
+ type: Cartesian
+ distal_link: base_link
+ indices: [3, 4]
+ nodes: ${range(N-5, N+1)}
+ # cartesian_type: position
+ weight: *w_base_hor_capture
+
+# ==================================
+rolling_contact_FL_foot:
+ type: Rolling
+ frame: FL_foot
+ radius: *wheel_radius
+
+rolling_contact_FR_foot:
+ type: Rolling
+ frame: FR_foot
+ radius: *wheel_radius
+
+rolling_contact_RR_foot:
+ type: Rolling
+ frame: RR_foot
+ radius: *wheel_radius
+
+rolling_contact_RL_foot:
+ type: Rolling
+ frame: RL_foot
+ radius: *wheel_radius
+
+# ==================================
+
+interaction_FL_foot:
+ type: VertexForce
+ frame: FL_foot_ball
+ fn_min: 50
+ enable_fc: true
+ friction_coeff: 0.2
+ vertex_frames:
+ - FL_foot
+
+interaction_FR_foot:
+ type: VertexForce
+ frame: FR_foot_ball
+ fn_min: 50
+ enable_fc: true
+ friction_coeff: 0.2
+ vertex_frames:
+ - FR_foot
+
+interaction_RR_foot:
+ type: VertexForce
+ frame: RR_foot_ball
+ fn_min: 50
+ enable_fc: true
+ friction_coeff: 0.2
+ vertex_frames:
+ - RR_foot
+
+interaction_RL_foot:
+ type: VertexForce
+ frame: RL_foot_ball
+ fn_min: 50
+ enable_fc: true
+ friction_coeff: 0.2
+ vertex_frames:
+ - RL_foot
+
+FL_foot_ball:
+ type: Contact
+ subtask: [interaction_FL_foot, rolling_contact_FL_foot]
+
+FR_foot_ball:
+ type: Contact
+ subtask: [interaction_FR_foot, rolling_contact_FR_foot]
+
+RR_foot_ball:
+ type: Contact
+ subtask: [interaction_RR_foot, rolling_contact_RR_foot]
+
+RL_foot_ball:
+ type: Contact
+ subtask: [interaction_RL_foot, rolling_contact_RL_foot]
+
+# when using continuous joints
+joint_posture_capture:
+ type: Postural
+ weight: [100.0, 80.0, 35.0,
+ 100.0, 80.0, 35.0,
+ 100.0, 80.0, 35.0,
+ 100.0, 80.0, 35.0]
+ indices: [0, 1, 2,
+ 5, 6, 7,
+ 10, 11, 12,
+ 15, 16, 17]
+ nodes: ${range(N-5, N+1)}
+
+v_reg:
+ type: Regularization
+ variable_name: v
+ nodes: ${range(0, N+1)}
+ indices: [0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9,
+ 10, 11, 12, 13,
+ 14, 15, 16, 17,
+ 18, 19, 20, 21]
+ weight: [3e-1, 3e-1, 3e-1, 3e-1, 3e-1, 3e-1,
+ 6e-1, 6e-1, 6e-1, 8e-2,
+ 6e-1, 6e-1, 6e-1, 8e-2,
+ 6e-1, 6e-1, 6e-1, 8e-2,
+ 6e-1, 6e-1, 6e-1, 8e-2]
+
+a_reg:
+ type: Regularization
+ variable_name: a
+ nodes: ${range(0, N+1)}
+ indices: [0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9,
+ 10, 11, 12, 13,
+ 14, 15, 16, 17,
+ 18, 19, 20, 21]
+ weight: [5e-1, 5e-1, 5e-1, 5e-1, 5e-1, 5e-1,
+ 8e-2, 8e-2, 8e-2, 1e-1,
+ 8e-2, 8e-2, 8e-2, 1e-1,
+ 8e-2, 8e-2, 8e-2, 1e-1,
+ 8e-2, 8e-2, 8e-2, 1e-1]
+
+z_FL_foot_ball:
+ type: Cartesian
+ distal_link: FL_foot
+ indices: [2]
+ cartesian_type: position
+ weight: *w_contact_z
+
+z_FR_foot_ball:
+ type: Cartesian
+ distal_link: FR_foot
+ indices: [2]
+ cartesian_type: position
+ weight: *w_contact_z
+
+z_RR_foot_ball:
+ type: Cartesian
+ distal_link: RR_foot
+ indices: [2]
+ cartesian_type: position
+ weight: *w_contact_z
+
+z_RL_foot_ball:
+ type: Cartesian
+ distal_link: RL_foot
+ indices: [2]
+ cartesian_type: position
+ weight: *w_contact_z
+
+vz_FL_foot_ball:
+ type: Cartesian
+ distal_link: FL_foot
+ indices: [2]
+ cartesian_type: velocity
+ weight: *w_contact_vz
+
+vz_FR_foot_ball:
+ type: Cartesian
+ distal_link: FR_foot
+ indices: [2]
+ cartesian_type: velocity
+ weight: *w_contact_vz
+
+vz_RR_foot_ball:
+ type: Cartesian
+ distal_link: RR_foot
+ indices: [2]
+ cartesian_type: velocity
+ weight: *w_contact_vz
+
+vz_RL_foot_ball:
+ type: Cartesian
+ distal_link: RL_foot
+ indices: [2]
+ cartesian_type: velocity
+ weight: *w_contact_vz
+
+xy_FL_foot_ball:
+ type: Cartesian
+ distal_link: FL_foot
+ indices: [0, 1]
+ cartesian_type: position
+ weight: *w_contact_xy
+
+xy_FR_foot_ball:
+ type: Cartesian
+ distal_link: FR_foot
+ indices: [0, 1]
+ cartesian_type: position
+ weight: *w_contact_xy
+
+xy_RR_foot_ball:
+ type: Cartesian
+ distal_link: RR_foot
+ indices: [0, 1]
+ cartesian_type: position
+ weight: *w_contact_xy
+
+xy_RL_foot_ball:
+ type: Cartesian
+ distal_link: RL_foot
+ indices: [0, 1]
+ cartesian_type: position
+ weight: *w_contact_xy
+
+vxy_FL_foot_ball:
+ type: Cartesian
+ distal_link: FL_foot
+ indices: [0, 1]
+ cartesian_type: velocity
+ weight: *w_contact_vxy
+
+vxy_FR_foot_ball:
+ type: Cartesian
+ distal_link: FR_foot
+ indices: [0, 1]
+ cartesian_type: velocity
+ weight: *w_contact_vxy
+
+vxy_RR_foot_ball:
+ type: Cartesian
+ distal_link: RR_foot
+ indices: [0, 1]
+ cartesian_type: velocity
+ weight: *w_contact_vxy
+
+vxy_RL_foot_ball:
+ type: Cartesian
+ distal_link: RL_foot
+ indices: [0, 1]
+ cartesian_type: velocity
+ weight: *w_contact_vxy
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/bundle.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/bundle.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..552ab2939e273e6b594c11a328e81a1825dda834
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/bundle.yaml
@@ -0,0 +1,92 @@
+bundle_format: augmpc_model_bundle_v1
+bundle_name: d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv
+checkpoint_file: d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model
+preserved_training_cfgs:
+ - ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh
+framework:
+ repos:
+ adarl:
+ commit: 72c1a8eeb0723f86d1ba8af52a4777eed7677632
+ branch: master
+ remote: https://gitlab.com/crzz/adarl.git
+ dirty: false
+ AugMPC:
+ commit: 909ab6c0bab771ee6080e6d5bc43c142477084d6
+ branch: ibrido
+ remote: git@github.com:AndrePatri/AugMPC.git
+ dirty: true
+ AugMPCEnvs:
+ commit: 9785e0bd27939ebfdbfec112a29fee189371fb97
+ branch: ibrido
+ remote: git@github.com:AndrePatri/AugMPCEnvs.git
+ dirty: true
+ casadi:
+ commit: 79cebe3b416dee22abc87de0076b80a5b97bd345
+ branch: optional_float
+ remote: git@github.com:AndrePatri/casadi.git
+ dirty: false
+ CentauroHybridMPC:
+ commit: 9262cb617a6ef1b75887b806181f32ed2566b9af
+ branch: ibrido
+ remote: git@github.com:ADVRHumanoids/CentauroHybridMPC.git
+ dirty: false
+ EigenIPC:
+ commit: 7c1c3ecd08716e61ed164a6e0bb788aa716705ca
+ branch: devel
+ remote: git@github.com:AndrePatri/EigenIPC.git
+ dirty: false
+ horizon:
+ commit: 7f9e7bc89061fd1776d7578aca5566c0d401c150
+ branch: ibrido
+ remote: git@github.com:AndrePatri/horizon.git
+ dirty: false
+ IBRIDO:
+ commit: 2cb0ec5366b4f46cbb09933c35b91b9613f42071
+ branch: main
+ remote: git@github.com:AndrePatri/IBRIDO.git
+ dirty: false
+ iit-centauro-ros-pkg:
+ commit: 6069807ebb37a6d7df39430a02685e09ed9b167a
+ branch: ibrido_ros2
+ remote: git@github.com:AndrePatri/iit-centauro-ros-pkg.git
+ dirty: false
+ iit-dagana-ros-pkg:
+ commit: f7ecd6c84a0b7f3320c1b7de6449cbcd4445d2fe
+ branch: ibrido_ros2
+ remote: git@github.com:AndrePatri/iit-dagana-ros-pkg.git
+ dirty: false
+ iit-kyon-description:
+ commit: 50fd7c8909b3ddfd1ebbe67c61d6b775b86df6b1
+ branch: ibrido_ros2
+ remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git
+ dirty: false
+ iit-kyon-ros-pkg:
+ commit: 298917efffb63dbca540e0aedbd21b41bf393fbf
+ branch: ibrido_ros2_simple
+ remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git
+ dirty: false
+ KyonRLStepping:
+ commit: ffbb9e1db6ed501cce16043337ff0c0702a4164a
+ branch: ibrido
+ remote: git@github.com:ADVRHumanoids/KyonRLStepping.git
+ dirty: false
+ MPCHive:
+ commit: 45b4fc692850cef607020dda2e32fd708e7fff62
+ branch: devel
+ remote: git@github.com:AndrePatri/MPCHive.git
+ dirty: false
+ MPCViz:
+ commit: 806d03efcc9d8ab1fb04991a159c19ba89bfb85b
+ branch: ros2_humble
+ remote: git@github.com:AndrePatri/MPCViz.git
+ dirty: false
+ phase_manager:
+ commit: 9925d18c0d7a55d013cbaa4998c61d85a3a8944f
+ branch: ibrido
+ remote: git@github.com:AndrePatri/phase_manager.git
+ dirty: false
+ unitree_ros:
+ commit: e5904dc2f181981dfbb23d4287c6823a7f4afed4
+ branch: ibrido
+ remote: git@github.com:AndrePatri/unitree_ros.git
+ dirty: false
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model
new file mode 100644
index 0000000000000000000000000000000000000000..0a37e2718bb68aae0edb1af72f47e961278b30bc
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b2ac3966e2ada1097268085a9149415e96da2e3ad707245200dc27902db83ba
+size 1651565
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/fake_pos_tracking_env.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/fake_pos_tracking_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c5acd9248a46d3a9b2c3669fe43ae8ed502e6b6
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/fake_pos_tracking_env.py
@@ -0,0 +1,202 @@
+import os
+
+from typing import Dict
+
+import torch
+
+from EigenIPC.PyEigenIPC import VLevel
+
+from mpc_hive.utilities.math_utils_torch import world2base_frame
+
+from aug_mpc_envs.training_envs.twist_tracking_env import TwistTrackingEnv
+
+class FakePosTrackingEnv(TwistTrackingEnv):
+ """Converts random planar position goals into twist references so the agent learns to drive the robot toward targets while managing contact scheduling."""
+
+ def __init__(self,
+ namespace: str,
+ actions_dim: int = 10,
+ verbose: bool = False,
+ vlevel: VLevel = VLevel.V1,
+ use_gpu: bool = True,
+ dtype: torch.dtype = torch.float32,
+ debug: bool = True,
+ override_agent_refs: bool = False,
+ timeout_ms: int = 60000,
+ env_opts: Dict = {}):
+
+ self._add_env_opt(env_opts, "max_distance", default=10.0) # [m]
+ self._add_env_opt(env_opts, "min_distance", default=0.0) # [m]
+ self._add_env_opt(env_opts, "max_vref", default=1.0) # [m/s]
+ self._add_env_opt(env_opts, "max_dp", default=5.0) # [m] after this, v ref saturates
+ self._add_env_opt(env_opts, "max_dt", default=env_opts["max_dp"]/ env_opts["max_vref"])
+
+ TwistTrackingEnv.__init__(self,
+ namespace=namespace,
+ actions_dim=actions_dim, # twist + contact flags
+ verbose=verbose,
+ vlevel=vlevel,
+ use_gpu=use_gpu,
+ dtype=dtype,
+ debug=debug,
+ override_agent_refs=override_agent_refs,
+ timeout_ms=timeout_ms,
+ env_opts=env_opts)
+
+ def get_file_paths(self):
+ paths=TwistTrackingEnv.get_file_paths(self)
+ paths.append(os.path.abspath(__file__))
+ return paths
+
+ def _custom_post_init(self):
+ TwistTrackingEnv._custom_post_init(self)
+
+ # position targets to be reached (wrt robot's pos at ep start)
+ self._p_trgt_w=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2].detach().clone()
+ self._p_delta_w=self._p_trgt_w.detach().clone()
+ self._dp_norm=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device)
+ self._dp_versor=self._p_trgt_w.detach().clone()
+
+ self._trgt_d=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device)
+ self._trgt_theta=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device)
+
+ def _update_loc_twist_refs(self):
+ # this is called at each env substep
+
+ self._compute_twist_ref_w()
+
+ if not self._override_agent_refs:
+ agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p",
+ gpu=self._use_gpu)
+ agent_p_ref_current[:, 0:2]=self._p_trgt_w
+
+ # then convert it to base ref local for the agent
+ robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+ # rotate agent ref from world to robot base
+ world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q,
+ t_out=self._agent_twist_ref_current_base_loc)
+ # write it to agent refs tensors
+ self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc,
+ gpu=self._use_gpu)
+
+ def _compute_twist_ref_w(self, env_indxs: torch.Tensor = None):
+
+ # angular refs are not altered
+ if env_indxs is None:
+ # we update the position error using the current base position
+ self._p_delta_w[:, :]=self._p_trgt_w-\
+ self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2]
+
+ self._dp_norm[:, :]=self._p_delta_w.norm(dim=1,keepdim=True)+1e-6
+ self._dp_versor[:, :]=self._p_delta_w/self._dp_norm
+
+ # apply for vref saturation
+ to_be_saturated=self._dp_norm[:, :]>self._env_opts["max_dp"]
+ self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"]
+
+ # we compute the twist refs for the agent depending of the position error
+ self._agent_twist_ref_current_w[:, 0:2]=self._dp_norm*self._dp_versor/self._env_opts["max_dt"]
+ self._agent_twist_ref_current_w[:, 2:3]=0 # no vertical vel
+
+ # apply pof0 using last value of bernoully coeffs
+ self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel
+ self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega
+ else:
+ self._p_delta_w[env_indxs, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[env_indxs, 0:2] -\
+ self._p_trgt_w[env_indxs, :]
+
+ # apply for vref saturation
+ to_be_saturated=torch.logical_and((self._dp_norm[:, :]>self._env_opts["max_dp"]).flatten(),env_indxs)
+ self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"]
+
+ self._dp_norm[env_indxs, :]=self._p_delta_w[env_indxs, :].norm(dim=1,keepdim=True)+1e-6
+ self._dp_versor[env_indxs, :]=self._p_delta_w[env_indxs, :]/self._dp_norm[env_indxs, :]
+
+ self._agent_twist_ref_current_w[env_indxs, 0:2]=self._dp_norm[env_indxs, :]*self._dp_versor[env_indxs, :]/self._env_opts["max_dt"]
+ self._agent_twist_ref_current_w[env_indxs, 2:3]=0 # no vertical vel
+
+ # apply pof0 using last value of bernoully coeffs
+ self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :]
+ self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega
+
+ def _override_refs(self,
+ env_indxs: torch.Tensor = None):
+
+ # runs at every post_step
+ self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem
+ if self._use_gpu:
+ # copies latest refs to GPU
+ self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False)
+
+ agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p",
+ gpu=self._use_gpu)
+
+ agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega",
+ gpu=self._use_gpu)
+
+ # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \
+ # agent_p_ref_current[:, 0:2]
+ self._p_trgt_w[:, :]=agent_p_ref_current[:, 0:2] # set p target target from shared mem
+
+ self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem
+
+ def _debug_agent_refs(self):
+ if self._use_gpu:
+ self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False)
+ self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True)
+
+ def _randomize_task_refs(self,
+ env_indxs: torch.Tensor = None):
+
+ # we randomize the target position/omega in world frame
+ if env_indxs is None:
+ self._trgt_d.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"])
+ self._trgt_theta.uniform_(0.0, 2*torch.pi)
+
+ self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] +\
+ torch.cat((self._trgt_d*torch.cos(self._trgt_theta)
+ ,self._trgt_d*torch.sin(self._trgt_theta)), dim=1)
+
+ # randomize just omega
+ random_uniform=torch.full_like(self._agent_twist_ref_current_w[:, 3:6], fill_value=0.0)
+ torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+ self._agent_twist_ref_current_w[:, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6]
+
+ # sample for all envs pof0
+ if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients
+ torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"]
+ torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega)
+
+ else:
+
+ if env_indxs.any():
+ integer_idxs=torch.nonzero(env_indxs).flatten()
+
+ trgt_d_selected=self._trgt_d[integer_idxs, :]
+ trgt_d_selected.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"])
+ self._trgt_d[integer_idxs, :]=trgt_d_selected
+
+ trgt_theta_selected=self._trgt_theta[integer_idxs, :]
+ trgt_theta_selected.uniform_(0.0, 2*torch.pi)
+ self._trgt_theta[integer_idxs, :]=trgt_theta_selected
+
+ self._p_trgt_w[integer_idxs, 0:1]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 0:1] +\
+ self._trgt_d[integer_idxs, :]*torch.cos(self._trgt_theta[integer_idxs, :])
+ self._p_trgt_w[integer_idxs, 1:2]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 1:2] +\
+ self._trgt_d[integer_idxs, :]*torch.sin(self._trgt_theta[integer_idxs, :])
+
+ # randomize just omega
+ random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, 3:6], fill_value=0.0)
+ torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+ self._agent_twist_ref_current_w[env_indxs, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6]
+
+ # sample for all envs pof0, then reset to 1 for envs which are not to be randomized
+ if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients
+ torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"]
+ torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega)
+ self._bernoulli_coeffs_linvel[~env_indxs, :]=1
+ self._bernoulli_coeffs_omega[~env_indxs, :]=1
+
+ self._compute_twist_ref_w(env_indxs=env_indxs) # update linear vel twist refs based on pos error
+
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/hybrid_quad_rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/hybrid_quad_rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/hybrid_quad_rhc.py
@@ -0,0 +1,1324 @@
+from mpc_hive.controllers.rhc import RHController
+
+from aug_mpc.controllers.rhc.horizon_based.horizon_imports import *
+
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs
+from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+import numpy as np
+
+import os
+
+import time
+
+from typing import Dict, List
+
+class HybridQuadRhc(RHController):
+
+ def __init__(self,
+ srdf_path: str,
+ urdf_path: str,
+ config_path: str,
+ robot_name: str, # used for shared memory namespaces
+ codegen_dir: str,
+ n_nodes:float = 25,
+ injection_node:int = 10,
+ dt: float = 0.02,
+ max_solver_iter = 1, # defaults to rt-iteration
+ open_loop: bool = True,
+ close_loop_all: bool = False,
+ dtype = np.float32,
+ verbose = False,
+ debug = False,
+ refs_in_hor_frame = True,
+ timeout_ms: int = 60000,
+ custom_opts: Dict = {}):
+
+ self._refs_in_hor_frame = refs_in_hor_frame
+
+ self._injection_node = injection_node
+
+ self._open_loop = open_loop
+ self._close_loop_all = close_loop_all
+
+ self._codegen_dir = codegen_dir
+ if not os.path.exists(self._codegen_dir):
+ os.makedirs(self._codegen_dir)
+ # else:
+ # # Directory already exists, delete it and recreate
+ # shutil.rmtree(self._codegen_dir)
+ # os.makedirs(self._codegen_dir)
+
+ self.step_counter = 0
+ self.sol_counter = 0
+
+ self.max_solver_iter = max_solver_iter
+
+ self._timer_start = time.perf_counter()
+ self._prb_update_time = time.perf_counter()
+ self._phase_shift_time = time.perf_counter()
+ self._task_ref_update_time = time.perf_counter()
+ self._rti_time = time.perf_counter()
+
+ self.robot_name = robot_name
+
+ self.config_path = config_path
+
+ self.urdf_path = urdf_path
+ # read urdf and srdf files
+ with open(self.urdf_path, 'r') as file:
+ self.urdf = file.read()
+ self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
+
+ self._c_timelines = dict()
+ self._f_reg_timelines = dict()
+
+ self._custom_opts={"replace_continuous_joints": False,
+ "use_force_feedback": False,
+ "lin_a_feedback": False,
+ "is_open_loop": self._open_loop, # fully open (just for db)
+ "fully_closed": False, # closed loop with full feedback (just for db)
+ "closed_partial": False, # closed loop with partial feedback
+ "adaptive_is": True, # closed loop with adaptation
+ "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase
+ "alpha_from_outside": False, # alpha set ext. from shared memory
+ "alpha_half": 1.0,
+ "only_vel_wheels": True, # whether wheels (if present) are just vel controlled
+ "use_jnt_v_feedback": False
+ }
+
+ self._custom_opts.update(custom_opts)
+
+ self._alpha_half=self._custom_opts["alpha_half"]
+
+ if self._open_loop:
+ self._custom_opts["fully_closed"]=False
+ self._custom_opts["adaptive_is"]=False
+ self._custom_opts["closed_partial"]=False
+ else:
+ self._custom_opts["is_open_loop"]=False
+ if self._custom_opts["fully_closed"]:
+ self._custom_opts["adaptive_is"]=False
+ self._custom_opts["closed_partial"]=False
+ self._custom_opts["lin_a_feedback"]=False
+ if self._custom_opts["closed_partial"]:
+ self._custom_opts["adaptive_is"]=False
+ self._custom_opts["fully_closed"]=False
+ self._custom_opts["lin_a_feedback"]=False
+ if self._custom_opts["adaptive_is"]:
+ self._custom_opts["closed_partial"]=False
+ self._custom_opts["fully_closed"]=False
+
+ super().__init__(srdf_path=srdf_path,
+ n_nodes=n_nodes,
+ dt=dt,
+ namespace=self.robot_name,
+ dtype=dtype,
+ verbose=verbose,
+ debug=debug,
+ timeout_ms=timeout_ms)
+
+ self._rhc_fpaths.append(self.config_path)
+
+ def _config_override(self):
+ pass
+
+ def _post_problem_init(self):
+
+ self.rhc_costs={}
+ self.rhc_constr={}
+
+ self._fail_idx_scale=0.0
+ self._expl_idx_window_size=int(1*self._n_nodes)
+ self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size))
+ self._expl_idx_counter=0
+ self._expl_idx_buffer_counter=0
+
+ self._pred_node_idx=self._n_nodes-1
+
+ self._nq=self.nq()
+ self._nq_jnts=self._nq-7# assuming floating base
+ self._nv=self.nv()
+ self._nv_jnts=self._nv-6
+
+ self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype)
+ self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype)
+ self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype)
+ self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype)
+ self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype)
+ self._alphas_q_root[:, :]=1.0 # default to all open
+ self._alphas_q_jnts[:, :]=1.0
+ self._alphas_twist_root[:, :]=1.0
+ self._alphas_v_jnts[:, :]=1.0
+ self._alphas_a[:, :]=1.0
+
+ def _init_problem(self,
+ fixed_jnt_patterns: List[str] = None,
+ wheels_patterns: List[str] = None,
+ foot_linkname: str = None,
+ flight_duration: int = 10,
+ post_flight_stance: int = 3,
+ step_height: float = 0.12,
+ keep_yaw_vert: bool = False,
+ yaw_vertical_weight: float = 2.0,
+ vertical_landing: bool = False,
+ vertical_land_weight: float = 1.0,
+ phase_force_reg: float = 1e-2,
+ vel_bounds_weight: float = 1.0):
+
+ self._fixed_jnt_patterns=fixed_jnt_patterns
+
+ self._config_override()
+
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f" Will use horizon config file at {self.config_path}",
+ LogType.INFO,
+ throw_when_excep=True)
+
+ self._vel_bounds_weight=vel_bounds_weight
+ self._phase_force_reg=phase_force_reg
+ self._yaw_vertical_weight=yaw_vertical_weight
+ self._vertical_land_weight=vertical_land_weight
+ # overrides parent
+ self._prb = Problem(self._n_intervals,
+ receding=True,
+ casadi_type=cs.SX)
+ self._prb.setDt(self._dt)
+
+ if "replace_continuous_joints" in self._custom_opts:
+ # continous joints are parametrized in So2
+ if self._custom_opts["replace_continuous_joints"]:
+ self.urdf = self.urdf.replace('continuous', 'revolute')
+ else:
+ self.urdf = self.urdf.replace('continuous', 'revolute')
+
+ self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names
+ self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+
+ self._init_robot_homer()
+
+ # handle fixed joints
+ fixed_joint_map={}
+ if self._fixed_jnt_patterns is not None:
+ for jnt_name in self._get_robot_jnt_names():
+ for fixed_jnt_pattern in self._fixed_jnt_patterns:
+ if fixed_jnt_pattern in jnt_name:
+ fixed_joint_map.update({f"{jnt_name}":
+ self._homer.get_homing_val(jnt_name=jnt_name)})
+ break # do not search for other pattern matches
+
+ if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f"Will fix following joints: \n{str(fixed_joint_map)}",
+ LogType.INFO,
+ throw_when_excep=True)
+ # with the fixed joint map
+ self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map)
+ # assign again controlled joints names
+ self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+ # updated robot homer for controlled joints
+ self._init_robot_homer()
+
+ # handle continuous joints (need to change homing and retrieve
+ # cont jnts indexes) and homing
+ self._continuous_joints=self._get_continuous_jnt_names()
+ # reduced
+ self._continuous_joints_idxs=[]
+ self._continuous_joints_idxs_cos=[]
+ self._continuous_joints_idxs_sin=[]
+ self._continuous_joints_idxs_red=[]
+ self._rev_joints_idxs=[]
+ self._rev_joints_idxs_red=[]
+ # qfull
+ self._continuous_joints_idxs_qfull=[]
+ self._continuous_joints_idxs_cos_qfull=[]
+ self._continuous_joints_idxs_sin_qfull=[]
+ self._continuous_joints_idxs_red_qfull=[]
+ self._rev_joints_idxs_qfull=[]
+ self._rev_joints_idxs_red_qfull=[]
+ jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints))
+ jnt_names=self._get_robot_jnt_names()
+ for i in range(len(jnt_names)):
+ jnt=jnt_names[i]
+ index=self._get_jnt_id(jnt)# accounting for floating joint
+ homing_idx=index-7 # homing is only for actuated joints
+ homing_value=self._homer.get_homing_val(jnt)
+ if jnt in self._continuous_joints:
+ jnt_homing[homing_idx]=np.cos(homing_value).item()
+ jnt_homing[homing_idx+1]=np.sin(homing_value).item()
+ # just actuated joints
+ self._continuous_joints_idxs.append(homing_idx) # cos
+ self._continuous_joints_idxs.append(homing_idx+1) # sin
+ self._continuous_joints_idxs_cos.append(homing_idx)
+ self._continuous_joints_idxs_sin.append(homing_idx+1)
+ self._continuous_joints_idxs_red.append(i)
+ # q full
+ self._continuous_joints_idxs_qfull.append(index) # cos
+ self._continuous_joints_idxs_qfull.append(index+1) # sin
+ self._continuous_joints_idxs_cos_qfull.append(index)
+ self._continuous_joints_idxs_sin_qfull.append(index+1)
+ self._continuous_joints_idxs_red_qfull.append(i+7)
+ else:
+ jnt_homing[homing_idx]=homing_value
+ # just actuated joints
+ self._rev_joints_idxs.append(homing_idx)
+ self._rev_joints_idxs_red.append(i)
+ # q full
+ self._rev_joints_idxs_qfull.append(index)
+ self._rev_joints_idxs_red_qfull.append(i+7)
+
+ self._jnts_q_reduced=None
+ if not len(self._continuous_joints)==0:
+ cont_joints=", ".join(self._continuous_joints)
+
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f"The following continuous joints were found: \n{cont_joints}",
+ LogType.INFO,
+ throw_when_excep=True)
+ # preallocating data
+ self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype)
+ self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+ self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype)
+ self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+ else:
+ self._custom_opts["replace_continuous_joints"]=True
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f"No continuous joints were found.",
+ LogType.INFO,
+ throw_when_excep=True)
+
+ # retrieve wheels indexes (not considering continuous joints,
+ # ok just for v, eff, etc..)
+ self._wheel_patterns=wheels_patterns
+ self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns)
+ self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81]
+
+ # we can create an init for the base
+ init = self._base_init.tolist() + jnt_homing
+
+ if foot_linkname is not None:
+ FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height
+ ground_level = FK(q=init)['ee_pos']
+ self._base_init[2] = -ground_level[2] # override init
+
+ self._model = FullModelInverseDynamics(problem=self._prb,
+ kd=self._kin_dyn,
+ q_init=self._homer.get_homing_map(),
+ base_init=self._base_init)
+
+ self._ti = TaskInterface(prb=self._prb,
+ model=self._model,
+ max_solver_iter=self.max_solver_iter,
+ debug = self._debug,
+ verbose = self._verbose,
+ codegen_workdir = self._codegen_dir)
+ self._ti.setTaskFromYaml(self.config_path)
+
+ # setting initial base pos ref if exists
+ base_pos = self._ti.getTask('base_height')
+ if base_pos is not None:
+ base_pos.setRef(np.atleast_2d(self._base_init).T)
+
+ self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes?????
+
+ self._gm = GaitManager(self._ti,
+ self._pm,
+ self._injection_node,
+ keep_yaw_vert=keep_yaw_vert,
+ yaw_vertical_weight=self._yaw_vertical_weight,
+ vertical_landing=vertical_landing,
+ landing_vert_weight=self._vertical_land_weight,
+ phase_force_reg=self._phase_force_reg,
+ custom_opts=self._custom_opts,
+ flight_duration=flight_duration,
+ post_flight_stance=post_flight_stance,
+ step_height=step_height,
+ dh=0.0)
+
+ self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0)
+ self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0)
+ self._ti.model.q.setInitialGuess(self._ti.model.q0)
+ self._ti.model.v.setInitialGuess(self._ti.model.v0)
+ for _, cforces in self._ti.model.cmap.items():
+ n_contact_f=len(cforces)
+ for c in cforces:
+ c.setInitialGuess(np.array(self._f0)/n_contact_f)
+
+ vel_lims = self._model.kd.velocityLimits()
+ import horizon.utils as utils
+ self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:]))
+ self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:]))
+
+ self._meas_lin_a_par=None
+ # if self._custom_opts["lin_a_feedback"]:
+ # # acceleration feedback on first node
+ # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback",
+ # dim=3, nodes=0)
+ # base_lin_a_prb=self._prb.getInput().getVars()[0:3]
+ # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par,
+ # nodes=[0])
+
+ # if not self._open_loop:
+ # # we create a residual cost to be used as an attractor to the measured state on the first node
+ # # hard constraints injecting meas. states are pure EVIL!
+ # prb_state=self._prb.getState()
+ # full_state=prb_state.getVars()
+ # state_dim=prb_state.getBounds()[0].shape[0]
+ # meas_state=self._prb.createParameter(name="measured_state",
+ # dim=state_dim, nodes=0)
+ # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state),
+ # nodes=[0])
+
+ self._ti.finalize()
+ self._ti.bootstrap()
+
+ self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes
+ self._ti.load_initial_guess()
+
+ self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we
+ # know n_dofs -> we assign it (by default = None)
+
+ self.n_contacts = len(self._model.cmap.keys())
+
+ # remove variables bounds (before they were necessary to have a good
+ # quality bootstrap solution)
+ self._q_inf=np.zeros((self.nq(), 1))
+ self._q_inf[:, :]=np.inf
+ self._v_inf=np.zeros((self.nv(), 1))
+ self._v_inf[:, :]=np.inf
+ self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0)
+ self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0)
+
+ # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb)
+
+ def get_file_paths(self):
+ # can be overriden by child
+ paths = super().get_file_paths()
+ return paths
+
+ def _get_quat_remap(self):
+ # overrides parent
+ return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention
+
+ def _zmp(self, model):
+
+ num = cs.SX([0, 0])
+ den = cs.SX([0])
+ pos_contact = dict()
+ force_val = dict()
+
+ q = cs.SX.sym('q', model.nq)
+ v = cs.SX.sym('v', model.nv)
+ a = cs.SX.sym('a', model.nv)
+
+ com = model.kd.centerOfMass()(q=q, v=v, a=a)['com']
+
+ n = cs.SX([0, 0, 1])
+ for c in model.fmap.keys():
+ pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos']
+ force_val[c] = cs.SX.sym('force_val', 3)
+ num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n)
+ den += cs.dot(force_val[c], n)
+
+ zmp = com[0:2] + (num / den)
+ input_list = []
+ input_list.append(q)
+ input_list.append(v)
+ input_list.append(a)
+
+ for elem in force_val.values():
+ input_list.append(elem)
+
+ f = cs.Function('zmp', input_list, [zmp])
+ return f
+
+ def _add_zmp(self):
+
+ input_zmp = []
+
+ input_zmp.append(self._model.q)
+ input_zmp.append(self._model.v)
+ input_zmp.append(self._model.a)
+
+ for f_var in self._model.fmap.values():
+ input_zmp.append(f_var)
+
+ c_mean = cs.SX([0, 0, 0])
+ for c_name, f_var in self._model.fmap.items():
+ fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos']
+ c_mean += fk_c_pos
+
+ c_mean /= len(self._model.cmap.keys())
+
+ zmp_nominal_weight = 10.
+ zmp_fun = self._zmp(self._model)(*input_zmp)
+
+ if 'wheel_joint_1' in self._model.kd.joint_names():
+ zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2]))
+
+ def _quaternion_multiply(self,
+ q1, q2):
+ x1, y1, z1, w1 = q1
+ x2, y2, z2, w2 = q2
+ w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+ x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+ y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+ z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+ return np.array([x, y, z, w])
+
+ def _get_continuous_jnt_names(self):
+ import xml.etree.ElementTree as ET
+ root = ET.fromstring(self.urdf)
+ continuous_joints = []
+ for joint in root.findall('joint'):
+ joint_type = joint.get('type')
+ if joint_type == 'continuous':
+ joint_name = joint.get('name')
+ continuous_joints.append(joint_name)
+ return continuous_joints
+
+ def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]):
+ jnt_names=self._get_robot_jnt_names()
+ wheels_idxs=[]
+ for i in range(len(jnt_names)):
+ jnt_name=jnt_names[i]
+ for wheel_pattern in wheel_patterns:
+ if wheel_pattern in jnt_name:
+ wheels_idxs.append(i)
+ break
+ return wheels_idxs
+
+ def _get_jnt_id(self, jnt_name):
+ return self._kin_dyn.joint_iq(jnt_name)
+
+ def _init_rhc_task_cmds(self):
+
+ rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm,
+ robot_index_shm=self.controller_index,
+ robot_index_view=0, # when using optimize_mem the view if always of shape 1x...
+ namespace=self.namespace,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ use_force_feedback=self._custom_opts["use_force_feedback"],
+ optimize_mem=True)
+
+ rhc_refs.run()
+
+ rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller)
+ rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap())
+
+ rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3],
+ q_ref=np.atleast_2d(self._base_init)[:, 3:7])
+
+ return rhc_refs
+
+ def get_vertex_fnames_from_ti(self):
+ tasks=self._ti.task_list
+ contact_f_names=[]
+ for task in tasks:
+ if isinstance(task, ContactTask):
+ interaction_task=task.dynamics_tasks[0]
+ contact_f_names.append(interaction_task.vertex_frames[0])
+ return contact_f_names
+
+ def _get_contact_names(self):
+ # should get contact names from vertex frames
+ # list(self._ti.model.cmap.keys())
+ return self.get_vertex_fnames_from_ti()
+
+ def _get_robot_jnt_names(self):
+
+ joints_names = self._kin_dyn.joint_names()
+ to_be_removed = ["universe",
+ "reference",
+ "world",
+ "floating",
+ "floating_base"]
+ for name in to_be_removed:
+ if name in joints_names:
+ joints_names.remove(name)
+
+ return joints_names
+
+ def _get_ndofs(self):
+ return len(self._model.joint_names)
+
+ def nq(self):
+ return self._kin_dyn.nq()
+
+ def nv(self):
+ return self._kin_dyn.nv()
+
+ def _get_robot_mass(self):
+
+ return self._kin_dyn.mass()
+
+ def _get_root_full_q_from_sol(self, node_idx=1):
+
+ root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype)
+
+ np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False)
+ np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full)
+
+ return root_q_full
+
+ def _get_full_q_from_sol(self, node_idx=1):
+
+ return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype)
+
+ def _get_root_twist_from_sol(self, node_idx=1):
+ # provided in world frame
+ twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6)
+ # if world_aligned:
+ # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+ # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+ # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3]
+ # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6]
+ return twist_base_local
+
+ def _get_root_a_from_sol(self, node_idx=0):
+ # provided in world frame
+ a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6)
+ # if world_aligned:
+ # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+ # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+ # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3]
+ # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6]
+ return a_base_local
+
+ def _get_jnt_q_from_sol(self, node_idx=0,
+ reduce: bool = True,
+ clamp: bool = True):
+
+ full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype)
+
+ np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place
+ np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place
+
+ if self._custom_opts["replace_continuous_joints"] or (not reduce):
+ if clamp:
+ return np.fmod(full_jnts_q, 2*np.pi)
+ else:
+ return full_jnts_q
+ else:
+ cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2)
+ # copy rev joint vals
+ self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1)
+ # and continuous
+ self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1)
+ return self._jnts_q_reduced
+
+ def _get_jnt_v_from_sol(self, node_idx=1):
+
+ jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1,
+ self._nv_jnts)
+ np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place
+ # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place
+
+ return jnt_v_sol
+
+ def _get_jnt_a_from_sol(self, node_idx=1):
+
+ return self._get_a_from_sol()[6:, node_idx].reshape(1,
+ self._nv_jnts)
+
+ def _get_jnt_eff_from_sol(self, node_idx=0):
+
+ efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx)
+
+ # if self._custom_opts["only_vel_wheels"]:
+
+ jnt_efforts=efforts_on_node[6:, 0]
+
+ if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v:
+ jnt_efforts[self._wheels_idxs_v]=0.0
+
+ return jnt_efforts.reshape(1,
+ self._nv_jnts).astype(self._dtype)
+
+ def _get_rhc_cost(self):
+
+ return self._ti.solution["opt_cost"]
+
+ def _get_rhc_constr_viol(self):
+
+ return self._ti.solution["residual_norm"]
+
+ def _get_rhc_nodes_cost(self):
+
+ cost = self._ti.solver_rti.getCostValOnNodes()
+ return cost.reshape((1, -1))
+
+ def _get_rhc_nodes_constr_viol(self):
+
+ constr_viol = self._ti.solver_rti.getConstrValOnNodes()
+ return constr_viol.reshape((1, -1))
+
+ def _get_rhc_niter_to_sol(self):
+
+ return self._ti.solution["n_iter2sol"]
+
+ def _set_ig_bootstrap(self,
+ q_state: np.ndarray = None,
+ v_state: np.ndarray = None):
+
+ xig = self._ti.solution['x_opt'].copy()
+ uig = self._ti.solution['u_opt'].copy()
+
+ # Normalize and keep quaternion in the same hemisphere as the previous
+ # solution to avoid artificial 180-deg jumps in the bootstrap warm start.
+ q_state_boot = q_state.copy()
+ q_prev = xig[3:7, 0]
+ q_now = q_state_boot[3:7, 0]
+
+ q_now_norm = np.linalg.norm(q_now)
+ if q_now_norm > 1e-9:
+ q_state_boot[3:7, :] /= q_now_norm
+ else:
+ q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype)
+
+ q_prev_norm = np.linalg.norm(q_prev)
+ if q_prev_norm > 1e-9:
+ q_prev = q_prev / q_prev_norm
+
+ q_now = q_state_boot[3:7, 0]
+ if np.dot(q_prev, q_now) < 0.0:
+ q_state_boot[3:7, :] *= -1.0
+
+ xig[0:self._nq, :] = q_state_boot
+ xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes
+ uig[0:self._nv, :]=0.0 # 0 acceleration
+
+ # assigning ig
+ self._prb.getState().setInitialGuess(xig)
+ self._prb.getInput().setInitialGuess(uig)
+ # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype))
+ for _, cforces in self._ti.model.cmap.items():
+ n_contact_f = len(cforces)
+ if n_contact_f == 0:
+ continue
+ f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f
+ for c in cforces:
+ c.setInitialGuess(f_guess)
+
+ # print("initial guesses")
+ # print(self._nq)
+ # print(self._nv)
+ # print("q")
+ # qig=self._ti.model.q.getInitialGuess()
+ # print(qig.shape)
+ # print(qig)
+ # print("v")
+ # print(self._ti.model.v.getInitialGuess())
+ # print("a")
+ # print(self._ti.model.a.getInitialGuess())
+ # for _, cforces in self._ti.model.cmap.items():
+ # for c in cforces:
+ # print("force")
+ # print(c.getInitialGuess())
+
+ return xig, uig
+
+ def _set_ig(self):
+
+ shift_num = -1 # shift data by one node
+
+ x_opt = self._ti.solution['x_opt']
+ u_opt = self._ti.solution['u_opt']
+
+ # building ig for state
+ xig = np.roll(x_opt,
+ shift_num, axis=1) # rolling state sol.
+ for i in range(abs(shift_num)):
+ # state on last node is copied to the elements
+ # which are "lost" during the shift operation
+ xig[:, -1 - i] = x_opt[:, -1]
+ # building ig for inputs
+ uig = np.roll(u_opt,
+ shift_num, axis=1) # rolling state sol.
+ for i in range(abs(shift_num)):
+ # state on last node is copied to the elements
+ # which are "lost" during the shift operation
+ uig[:, -1 - i] = u_opt[:, -1]
+
+ # assigning ig
+ self._prb.getState().setInitialGuess(xig)
+ self._prb.getInput().setInitialGuess(uig)
+
+ return xig, uig
+
+ def _update_open_loop(self,
+ bootstrap: bool = False):
+
+ q_state, v_state, a_state=self._set_is_open()
+
+ if not bootstrap:
+ self._set_ig()
+ else:
+ self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+ # robot_state=xig[:, 0]
+ # # open loop update:
+ # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0
+ # # is node 1 in the last opt solution)
+
+ return q_state, v_state, a_state
+
+ def _update_closed_loop(self,
+ bootstrap: bool = False):
+
+ # set initial state
+ q_state=None
+ v_state=None
+ a_state=None
+ if self._custom_opts["adaptive_is"]:
+ # adaptive closed loop
+ q_state, v_state, a_state=self._set_is_adaptive()
+ elif self._custom_opts["fully_closed"]:
+ q_state, v_state, a_state=self._set_is_full()
+ elif self._custom_opts["closed_partial"]:
+ q_state, v_state, a_state=self._set_is_partial()
+ else:
+ Journal.log(self.__class__.__name__,
+ "_update_closed_loop",
+ "Neither adaptive_is, fully_closed, or closed_partial.",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ q_state, v_state, a_state=self._set_is()
+
+ # set initial guess for controller
+ if not bootstrap:
+ self._set_ig()
+ else:
+ self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+ return q_state, v_state, a_state
+
+ def _set_is_open(self):
+
+ # overriding states with rhc data
+ q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1)
+
+ twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1)
+ v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_q_full_rhc=q[0:7] # root full q
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_twist_rhc=vel[0:6] # lin v.
+ jnts_v_rhc=vel[6:] # jnts v
+
+ self.rhc_refs.set_alpha(alpha=1.0) # fully open
+
+ # close state on known quantities
+ root_q_full_rhc.setBounds(lb=q_full_root,
+ ub=q_full_root, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts,
+ ub=q_jnts, nodes=0)
+ root_twist_rhc.setBounds(lb=twist_root,
+ ub=twist_root, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts,
+ ub=v_jnts, nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((q_full_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((twist_root, v_jnts),
+ axis=0)
+
+ return (q_state, v_state, None)
+
+ def _set_is_full(self):
+
+ # measurements
+ q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+ self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+ self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+ self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+ q_jnts=self._jnts_q_expanded.reshape(-1,1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_full_q_rhc=q[0:7] # root p
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_v_rhc=vel[0:3] # lin v.
+ root_omega_rhc=vel[3:6] # omega
+ jnts_v_rhc=vel[6:] # jnts v
+ acc=self._prb.getVariables("a")
+ lin_a_prb=acc[0:3] # lin acc
+
+ self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+ root_full_q_rhc.setBounds(lb=q_full_root,
+ ub=q_full_root, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts,
+ ub=q_jnts, nodes=0)
+ root_v_rhc.setBounds(lb=v_root,
+ ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints)
+ root_omega_rhc.setBounds(lb=omega,
+ ub=omega, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts,
+ ub=v_jnts, nodes=0)
+ if self._custom_opts["lin_a_feedback"]:
+ # write base lin 13793197 from meas
+ lin_a_prb.setBounds(lb=a_root[0:3, :],
+ ub=a_root[0:3, :],
+ nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((q_full_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((v_root, omega, v_jnts),
+ axis=0)
+ a_state=np.concatenate((a_root, a_jnts),
+ axis=0)
+
+ return (q_state, v_state, a_state)
+
+ def _set_is_partial(self):
+
+ # measurements
+ p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+ self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+ self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+ self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+ q_jnts=self._jnts_q_expanded.reshape(-1,1)
+
+ # overriding states with rhc data (-> all overridden state are open looop)
+ root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+ p_root[:, :]=root_p_from_rhc # position is always open loop
+
+ if not self._custom_opts["estimate_v_root"]:
+ v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1)
+ # override v jnts with the ones from controller
+ if not self._custom_opts["use_jnt_v_feedback"]:
+ v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+ # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+ # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1)
+ # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1)
+ # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1)
+ # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1)
+ # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_p_rhc=q[0:3] # root p
+ root_q_rhc=q[3:7] # root orientation
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_v_rhc=vel[0:3] # lin v.
+ root_omega_rhc=vel[3:6] # omega
+ jnts_v_rhc=vel[6:] # jnts v
+ acc=self._prb.getVariables("a")
+ lin_a_prb=acc[0:3] # lin acc
+
+ self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+ root_p_rhc.setBounds(lb=p_root,
+ ub=p_root, nodes=0)
+ root_q_rhc.setBounds(lb=q_root,
+ ub=q_root, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts,
+ ub=q_jnts, nodes=0)
+
+ if self._custom_opts["estimate_v_root"]:
+ root_v_rhc.setBounds(lb=-self._v_inf[0:3],
+ ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints)
+ else: # get it from controller
+ root_v_rhc.setBounds(lb=v_root,
+ ub=v_root, nodes=0)
+ root_omega_rhc.setBounds(lb=omega,
+ ub=omega, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts,
+ ub=v_jnts, nodes=0)
+ if self._custom_opts["lin_a_feedback"]:
+ # write base lin 13793197 from meas
+ lin_a_prb.setBounds(lb=a_root[0:3, :],
+ ub=a_root[0:3, :],
+ nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((p_root, q_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((v_root, omega, v_jnts),
+ axis=0)
+ a_state=np.concatenate((a_root, a_jnts),
+ axis=0)
+
+ return (q_state, v_state, a_state)
+
+ def _set_is_adaptive(self):
+
+ # measurements
+ p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_p_rhc=q[0:3] # root p
+ root_q_rhc=q[3:7] # root orientation
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_v_rhc=vel[0:3] # lin v.
+ root_omega_rhc=vel[3:6] # omega
+ jnts_v_rhc=vel[6:] # jnts v
+ acc=self._prb.getVariables("a")
+ lin_a_prb=acc[0:3] # lin acc
+
+ # getting prediction defects
+ root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ # close state on known quantities, estimate some (e.g. lin vel) and
+ # open loop if thing start to explode
+ alpha_now=1.0
+ delta=0.0
+ if self._custom_opts["alpha_from_outside"]:
+ alpha_now=self.rhc_refs.get_alpha()
+ else: # "autotuned" alpha
+ if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.)
+ delta=np.max(np.abs(jnt_v_delta))
+ else:
+ delta=np.max(np.abs(omega_root_delta))
+ # fail_idx=self._get_failure_index()
+ # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh
+ alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0
+
+ bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1]
+ self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db
+ self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db
+
+ self._alphas_q_root[:]=alpha_now # for now single alpha for everything
+ self._alphas_q_jnts[:]=alpha_now
+ self._alphas_twist_root[:]=alpha_now
+ self._alphas_v_jnts[:]=alpha_now
+ self._alphas_a[:]=alpha_now
+ if not self._custom_opts["estimate_v_root"]:
+ self._alphas_twist_root[0:3]=1.0 # open
+ self._alphas_v_jnts[:]=1.0 # open
+
+ # position is always open loop
+ root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+ p_root[:, :]=root_p_from_rhc
+
+ # expaning meas q if continuous joints
+ if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+ self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+ self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+ self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+
+ # continous joints position is always open loop, but we need a delta vector of matching dimension
+ q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1)
+
+ self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:]
+
+ self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\
+ np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+ np.cos(q_jnts[self._continuous_joints_idxs_red, :])
+ self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\
+ np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+ np.sin(q_jnts[self._continuous_joints_idxs_red, :])
+
+ q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts
+ jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts
+
+ self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop
+ self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop
+
+ # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop
+
+ root_p_rhc.setBounds(lb=p_root,
+ ub=p_root, nodes=0)
+ root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta,
+ ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta,
+ ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0)
+ if self._custom_opts["estimate_v_root"]:
+ root_v_rhc.setBounds(lb=-self._v_inf[0:3],
+ ub=self._v_inf[0:3], nodes=0)
+ else:
+ root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta,
+ ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0)
+ root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta,
+ ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta,
+ ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0)
+ if self._custom_opts["lin_a_feedback"]:
+ lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :],
+ ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :],
+ nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((p_root, q_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((v_root, omega, v_jnts),
+ axis=0)
+ a_state=np.concatenate((a_root, a_jnts),
+ axis=0)
+
+ return (q_state, v_state, a_state)
+
+ def _solve(self):
+
+ if self._debug:
+ return self._db_solve(bootstrap=False)
+ else:
+ return self._min_solve(bootstrap=False)
+
+ def _bootstrap(self):
+
+ if self._debug:
+ return self._db_solve(bootstrap=True)
+ else:
+ return self._min_solve(bootstrap=True)
+
+ def _min_solve(self, bootstrap: bool = False):
+ # minimal solve version -> no debug
+ robot_qstate=None
+ robot_vstate=None
+ robot_astate=None
+ if self._open_loop:
+ robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using data from the solution itself
+ else:
+ robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using robot measurements
+
+ self._pm.shift() # shifts phases of one dt
+ if self._refs_in_hor_frame:
+ # q_base=self.robot_state.root_state.get(data_type="q",
+ # robot_idxs=self.controller_index).reshape(-1, 1)
+ # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ # using internal base pose from rhc. in case of closed loop, it will be the meas state
+ force_norm=None
+ if self._custom_opts["use_force_feedback"]:
+ contact_forces=self.robot_state.contact_wrenches.get(data_type="f",
+ robot_idxs=self.controller_index_np,
+ contact_name=None).reshape(self.n_contacts,3)
+ force_norm=np.linalg.norm(contact_forces, axis=1)
+ self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+ force_norm=force_norm)
+ else:
+ self.rhc_refs.step()
+
+ try:
+ if not bootstrap:
+ converged = self._ti.rti() # RTI step
+ else:
+ converged = self._ti.bootstrap() # full solve (to convergence)
+ self.sol_counter = self.sol_counter + 1
+ return not self._check_rhc_failure()
+ except Exception as e: # fail in case of exceptions
+ return False
+
+ def _db_solve(self, bootstrap: bool = False):
+
+ self._timer_start = time.perf_counter()
+
+ robot_qstate=None
+ robot_vstate=None
+ robot_astate=None
+ if self._open_loop:
+ robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using data from the solution itself
+ else:
+ robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using robot measurements
+
+ self._prb_update_time = time.perf_counter()
+ self._pm.shift() # shifts phases of one dt
+ self._phase_shift_time = time.perf_counter()
+
+ if self._refs_in_hor_frame:
+ # q_base=self.robot_state.root_state.get(data_type="q",
+ # robot_idxs=self.controller_index).reshape(-1, 1)
+ # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ # using internal base pose from rhc. in case of closed loop, it will be the meas state
+ force_norm=None
+ if self._custom_opts["use_force_feedback"]:
+ contact_forces=self.robot_state.contact_wrenches.get(data_type="f",
+ robot_idxs=self.controller_index_np,
+ contact_name=None).reshape(self.n_contacts,3)
+ force_norm=np.linalg.norm(contact_forces, axis=1)
+ self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+ force_norm=force_norm)
+ else:
+ self.rhc_refs.step()
+
+ self._task_ref_update_time = time.perf_counter()
+
+ try:
+ if not bootstrap:
+ converged = self._ti.rti() # RTI step
+ else:
+ converged = self._ti.bootstrap() # full solve bootstrap
+ self._rti_time = time.perf_counter()
+ self.sol_counter = self.sol_counter + 1
+ self._update_db_data()
+ return not self._check_rhc_failure()
+ except Exception as e: # fail in case of exceptions
+ if self._verbose:
+ solve_mode = "RTI" if not bootstrap else "Bootstrap"
+ exception = f"{solve_mode}() for controller {self.controller_index} failed" + \
+ f" with exception {type(e).__name__}"
+ Journal.log(self.__class__.__name__,
+ "solve",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = False)
+ self._update_db_data()
+ return False
+
+ def _get_fail_idx(self):
+
+ self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx()
+ self._expl_idx_buffer_counter+=1
+ self._expl_idx_counter+=1
+ if self._expl_idx_counter%self._expl_idx_window_size==0:
+ self._expl_idx_buffer_counter=0 # restart from 0
+
+ running_avrg=np.mean(self._explosion_idx_buffer).item()
+
+ return running_avrg
+
+ def _get_explosion_idx(self):
+ explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale
+ return explosion_index
+
+ def _update_db_data(self):
+
+ self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start
+ self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time
+ self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time
+ self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time
+ self.rhc_costs.update(self._ti.solver_rti.getCostsValues())
+ self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues())
+
+ def _reset(self):
+
+ # reset task interface (ig, solvers, etc..) +
+ # phase manager and sets bootstap as solution
+ self._gm.reset()
+ self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution
+ self._expl_idx_counter=0.0
+ self._expl_idx_buffer_counter=0
+
+ def _get_cost_info(self):
+
+ cost_dict = self._ti.solver_rti.getCostsValues()
+ cost_names = list(cost_dict.keys())
+ cost_dims = [1] * len(cost_names) # costs are always scalar
+ return cost_names, cost_dims
+
+ def _get_constr_info(self):
+
+ constr_dict = self._ti.solver_rti.getConstraintsValues()
+ constr_names = list(constr_dict.keys())
+ constr_dims = [-1] * len(constr_names)
+ i = 0
+ for constr in constr_dict:
+ constr_val = constr_dict[constr]
+ constr_shape = constr_val.shape
+ constr_dims[i] = constr_shape[0]
+ i+=1
+ return constr_names, constr_dims
+
+ def _get_q_from_sol(self):
+ full_q=self._ti.solution['q'].astype(self._dtype)
+ if self._custom_opts["replace_continuous_joints"]:
+ return full_q
+ else:
+ cont_jnts=full_q[self._continuous_joints_idxs_qfull, :]
+ cos=cont_jnts[::2, :]
+ sin=cont_jnts[1::2, :]
+ # copy root
+ self._full_q_reduced[0:7, :]=full_q[0:7, :]
+ # copy rev joint vals
+ self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :]
+ # and continuous
+ angle=np.arctan2(sin, cos)
+ self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle
+ return self._full_q_reduced
+
+ def _get_v_from_sol(self):
+ return self._ti.solution['v'].astype(self._dtype)
+
+ def _get_a_from_sol(self):
+ return self._ti.solution['a'].astype(self._dtype)
+
+ def _get_a_dot_from_sol(self):
+ return None
+
+ def _get_f_from_sol(self):
+ # to be overridden by child class
+ contact_names =self._get_contacts() # we use controller-side names
+ try:
+ data=[]
+ for key in contact_names:
+ contact_f=self._ti.solution["f_" + key].astype(self._dtype)
+ np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False)
+ np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f)
+ data.append(contact_f)
+ return np.concatenate(data, axis=0)
+ except:
+ return None
+
+ def _get_f_dot_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_eff_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_cost_from_sol(self,
+ cost_name: str):
+ return self.rhc_costs[cost_name]
+
+ def _get_constr_from_sol(self,
+ constr_name: str):
+ return self.rhc_constr[constr_name]
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh
new file mode 100644
index 0000000000000000000000000000000000000000..afdd7ea0925f57b483bdac7b0bde43dad50549d4
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+export EVAL=0
+export DET_EVAL=1
+export EVAL_ON_CPU=1
+export OVERRIDE_ENV=1
+export OVERRIDE_AGENT_REFS=1
+export MPATH="/root/training_data/"
+export MNAME=""
+
+export WANDB_KEY="25f235316292344cea6dfa68e7c95409b3374d03"
+export SHM_NS="unitree_b2w" # shared mem namespace used for all shared data on CPU
+export N_ENVS=800 # number of env to run in parallel
+export RNAME="B2WPartialCloopWheels" # a descriptive base name for this run
+export SEED=34 # random n generator seed to be used for this run
+export REMOTE_STEPPING=1
+export COMPRESSION_RATIO=0.6
+export ACTOR_LWIDTH=128
+export ACTOR_DEPTH=3
+export CRITIC_LWIDTH=256
+export CRITIC_DEPTH=4
+export OBS_NORM=1
+export OBS_RESCALING=0
+export CRITIC_ACTION_RESCALE=1
+export WEIGHT_NORM=1
+export LAYER_NORM=0
+export BATCH_NORM=0
+export IS_CLOSED_LOOP=1
+export DEBUG=1
+export RMDEBUG=1
+export DUMP_ENV_CHECKPOINTS=1
+export TOT_STEPS=20000000
+export USE_RND=0
+export DEMO_ENVS_PERC=0.0
+export DEMO_STOP_THRESH=2.0
+export EXPL_ENVS_PERC=0.0
+export ACTION_REPEAT=3
+export USE_SAC=1
+export USE_DUMMY=0
+export DISCOUNT_FACTOR=0.98
+export USE_PERIOD_RESETS=0
+export COMMENT='unitree b2w, CLOSED more vz tracking, new tuning, gamma 0.98, NO entropy annhealing [-0.3, -0.5], max pos 10 m, max_cmd_v 1.0, max_cmd_omega 0.5, task_track_omega_z_weight 1.0, CoT 0.1 0.1' # any training comment
+export URDF_PATH="${HOME}/ibrido_ws/src/unitree_ros/robots/b2w_description/xacro/robot.urdf.xacro" # name of the description package for the robot
+export SRDF_PATH="${HOME}/ibrido_ws/src/unitree_ros/robots/b2w_description/xacro/robot.srdf.xacro" # base path where the description package for the robot are located
+export JNT_IMP_CF_PATH="${HOME}/ibrido_ws/src/KyonRLStepping/kyonrlstepping/config/jnt_imp_config_open_b2w.yaml" # path to yaml file for jnt imp configuration
+if (( $IS_CLOSED_LOOP )); then
+ export JNT_IMP_CF_PATH="${HOME}/ibrido_ws/src/KyonRLStepping/kyonrlstepping/config/jnt_imp_config_b2w.yaml"
+fi
+
+export CLUSTER_CL_FNAME="kyonrlstepping.controllers.horizon_based.b2w_rhc_cluster_client" # base path where the description package for the robot are located
+export CLUSTER_DT=0.035
+export N_NODES=31
+export CLUSTER_DB=1
+export PHYSICS_DT=0.001
+export USE_GPU_SIM=1
+# export CODEGEN_OVERRIDE_BDIR="none"
+export CODEGEN_OVERRIDE_BDIR="${HOME}/aux_data/B2WRHCLusterClient_${SHM_NS}/CodeGen/${SHM_NS}Rhc"
+# export TRAIN_ENV_FNAME="twist_tracking_env"
+# export TRAIN_ENV_CNAME="TwistTrackingEnv"
+export TRAIN_ENV_FNAME="fake_pos_tracking_env"
+export TRAIN_ENV_CNAME="FakePosTrackingEnv"
+# export TRAIN_ENV_FNAME="fake_pos_track_env_phase_control_with_demo"
+# export TRAIN_ENV_CNAME="FakePosTrackEnvPhaseControlWithDemo"
+# export TRAIN_ENV_FNAME="fake_pos_tracking_with_demo"
+# export TRAIN_ENV_CNAME="FakePosTrackingEnvWithDemo"
+# export TRAIN_ENV_FNAME="linvel_env_with_demo"
+# export TRAIN_ENV_CNAME="TwistTrackingEnvWithDemo"
+# export TRAIN_ENV_FNAME="flight_phase_control_env"
+# export TRAIN_ENV_CNAME="FlightPhaseControl"
+export PUB_HEIGHTMAP=0
+export BAG_SDT=90.0
+export BRIDGE_DT=0.1
+export DUMP_DT=50.0
+export ENV_IDX_BAG=6
+export ENV_IDX_BAG_DEMO=-1
+export ENV_IDX_BAG_EXPL=-1
+export SRDF_PATH_ROSBAG="${HOME}/aux_data/B2WRHClusterClient_${SHM_NS}/$SHM_NS.srdf" # base path where the description package for the robot are located
+export CUSTOM_ARGS_NAMES="step_height rendering_dt wheels fixed_flights adaptive_is lin_a_feedback closed_partial use_flat_ground estimate_v_root fully_closed use_gpu control_wheels base_link_name deduce_base_link self_collide"
+export CUSTOM_ARGS_DTYPE="float float xacro bool bool bool bool bool bool bool bool bool str bool bool"
+export CUSTOM_ARGS_VALS="0.12 0.1 true true true false true true false false true true base true true"
+export SET_ULIM=1
+export ULIM_N=131072 # maximum number of open file descriptors for each process (shared memory)
+export TIMEOUT_MS=120000 # timeout after which each script autokills ([ms])
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/isaac_world_interface.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/isaac_world_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..51938e9ba9b4ffbdc9491e6df6326ec0223bd033
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/isaac_world_interface.py
@@ -0,0 +1,2355 @@
+# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com)
+#
+# This file is part of AugMPCEnvs and distributed under the General Public License version 2 license.
+#
+# AugMPCEnvs is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# AugMPCEnvs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with AugMPCEnvs. If not, see .
+#
+from isaacsim import SimulationApp
+
+import carb
+
+import os
+import math
+
+import torch
+import numpy as np
+
+from typing import Dict, List
+from typing_extensions import override
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+
+from aug_mpc_envs.utils.math_utils import quat_to_omega
+from aug_mpc_envs.utils.height_grid_visualizer import HeightGridVisualizer
+from aug_mpc_envs.utils.height_sensor import HeightGridSensor
+
+from aug_mpc.world_interfaces.world_interface_base import AugMPCWorldInterfaceBase
+from mpc_hive.utilities.math_utils_torch import world2base_frame,world2base_frame3D
+
+class IsaacSimEnv(AugMPCWorldInterfaceBase):
+
+ def __init__(self,
+ robot_names: List[str],
+ robot_urdf_paths: List[str],
+ robot_srdf_paths: List[str],
+ jnt_imp_config_paths: List[str],
+ n_contacts: List[int],
+ cluster_dt: List[float],
+ use_remote_stepping: List[bool],
+ name: str = "IsaacSimEnv",
+ num_envs: int = 1,
+ debug = False,
+ verbose: bool = False,
+ vlevel: VLevel = VLevel.V1,
+ n_init_step: int = 0,
+ timeout_ms: int = 60000,
+ env_opts: Dict = None,
+ use_gpu: bool = True,
+ dtype: torch.dtype = torch.float32,
+ override_low_lev_controller: bool = False):
+
+ self._render_step_counter = 0
+
+ super().__init__(name=name,
+ robot_names=robot_names,
+ robot_urdf_paths=robot_urdf_paths,
+ robot_srdf_paths=robot_srdf_paths,
+ jnt_imp_config_paths=jnt_imp_config_paths,
+ n_contacts=n_contacts,
+ cluster_dt=cluster_dt,
+ use_remote_stepping=use_remote_stepping,
+ num_envs=num_envs,
+ debug=debug,
+ verbose=verbose,
+ vlevel=vlevel,
+ n_init_step=n_init_step,
+ timeout_ms=timeout_ms,
+ env_opts=env_opts,
+ use_gpu=use_gpu,
+ dtype=dtype,
+ override_low_lev_controller=override_low_lev_controller)
+
+ def is_running(self):
+ return self._simulation_app.is_running()
+
+ def _pre_setup(self):
+ self._backend="torch"
+ enable_livestream = self._env_opts["enable_livestream"]
+ enable_viewport = self._env_opts["enable_viewport"]
+ base_isaac_exp = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.kit'
+ base_isaac_exp_headless = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.headless.kit'
+
+ experience=base_isaac_exp
+ if self._env_opts["headless"]:
+ info = f"Will run in headless mode."
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ info,
+ LogType.STAT,
+ throw_when_excep = True)
+ if enable_livestream:
+ experience = ""
+ elif enable_viewport:
+ exception = f"Using viewport is not supported yet."
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ else:
+ experience=base_isaac_exp_headless
+
+ self._simulation_app = SimulationApp({"headless": self._env_opts["headless"]},
+ experience=experience)
+ # all imports depending on isaac sim kits have to be done after simulationapp
+ # from omni.isaac.core.tasks.base_task import BaseTask
+ self._import_isaac_pkgs()
+ info = "Using IsaacSim experience file @ " + experience
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ info,
+ LogType.STAT,
+ throw_when_excep = True)
+ # carb.settings.get_settings().set("/persistent/omnihydra/useSceneGraphInstancing", True)
+
+ if enable_livestream:
+ info = "Livestream enabled"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ info,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ self._simulation_app.set_setting("/app/livestream/enabled", True)
+ self._simulation_app.set_setting("/app/window/drawMouse", True)
+ self._simulation_app.set_setting("/app/livestream/proto", "ws")
+ self._simulation_app.set_setting("/app/livestream/websocket/framerate_limit", 120)
+ self._simulation_app.set_setting("/ngx/enabled", False)
+ enable_extension("omni.kit.livestream.native")
+ enable_extension("omni.services.streaming.manager")
+ self._render = ((not self._env_opts["headless"]) or self._env_opts["render_to_file"]) or enable_livestream or enable_viewport
+
+ self._record = False
+ self._world = None
+ self._physics_context = None
+ self._scene = None
+ self._task = None
+ self._metadata = None
+
+ self._robots_art_views = {}
+ self._blink_rigid_prim_views = {}
+ self._robots_articulations = {}
+ self._robots_geom_prim_views = {}
+ self.omni_contact_sensors = {}
+
+ self._solver_position_iteration_count=self._env_opts["solver_position_iteration_count"]
+ self._solver_velocity_iteration_count=self._env_opts["solver_velocity_iteration_count"]
+ self._solver_stabilization_thresh=self._env_opts["stabilization_threshold"]
+ self._solver_position_iteration_counts={}
+ self._solver_velocity_iteration_counts={}
+ self._solver_stabilization_threshs={}
+ self._robot_bodynames={}
+ self._robot_n_links={}
+ self._robot_n_dofs={}
+ self._robot_dof_names={}
+ self._distr_offset={} # decribed how robots within each env are distributed
+ self._spawning_radius=self._env_opts["spawning_radius"] # [m] -> default distance between roots of robots in a single
+ self._height_sensors={}
+ self._height_imgs={}
+ self._height_vis={}
+ self._height_vis_step={}
+ self._height_vis_step={}
+ self._height_vis={}
+ self._terrain_hit_margin = 1.0
+ self._terrain_hit_log_period = 1000
+ self._terrain_hit_active = {}
+ self._terrain_hit_counts = {}
+ self._terrain_hit_counts_last_logged = {}
+ for robot_name in self._robot_names:
+ self._terrain_hit_active[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.bool)
+ self._terrain_hit_counts[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.int32)
+ self._terrain_hit_counts_last_logged[robot_name] = None
+
+ def _import_isaac_pkgs(self):
+ # we use global, so that we can create the simulation app inside (and so
+ # access Isaac's kit) and also expose to all methods the imports
+ global World, omni_kit, get_context, UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools, UsdShade, Vt
+ global enable_extension, set_camera_view, _urdf, move_prim, GridCloner, prim_utils
+ global get_current_stage, Scene, ArticulationView, RigidPrimView, rep
+ global OmniContactSensors, RlTerrains,OmniJntImpCntrl
+ global PhysxSchema, UsdGeom
+ global _sensor, _dynamic_control
+ global get_prim_at_path
+
+ from pxr import PhysxSchema, UsdGeom, UsdShade, Vt
+
+ from omni.isaac.core.world import World
+ from omni.usd import get_context
+ from pxr import UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools
+ from omni.isaac.core.utils.extensions import enable_extension
+ from omni.isaac.core.utils.viewports import set_camera_view
+ import omni.kit as omni_kit
+ from omni.importer.urdf import _urdf
+ from omni.isaac.core.utils.prims import move_prim
+ from omni.isaac.cloner import GridCloner
+ import omni.isaac.core.utils.prims as prim_utils
+ from omni.isaac.core.utils.stage import get_current_stage
+ from omni.isaac.core.scenes.scene import Scene
+ from omni.isaac.core.articulations import ArticulationView
+ from omni.isaac.core.prims import RigidPrimView
+
+ import omni.replicator.core as rep
+
+ from omni.isaac.core.utils.prims import get_prim_at_path
+
+ from omni.isaac.sensor import _sensor
+
+ from omni.isaac.dynamic_control import _dynamic_control
+
+ from aug_mpc_envs.utils.contact_sensor import OmniContactSensors
+ from aug_mpc_envs.utils.omni_jnt_imp_cntrl import OmniJntImpCntrl
+ from aug_mpc_envs.utils.terrains import RlTerrains
+
+ def _parse_env_opts(self):
+ isaac_opts={}
+ isaac_opts["envs_ns"]="/World/envs"
+ isaac_opts["template_env_ns"]=isaac_opts["envs_ns"] + "/env_0"
+ isaac_opts["base_linkname"]="base_link"
+ isaac_opts["deduce_base_link"]=False
+ isaac_opts["ground_plane_prim_path"]="/World/terrain"
+ isaac_opts["physics_prim_path"]="/physicsScene"
+ isaac_opts["use_gpu"]=True
+ isaac_opts["use_gpu_pipeline"]=True
+ isaac_opts["device"]="cuda"
+ isaac_opts["is_fixed_base"]=False
+ isaac_opts["merge_fixed_jnts"]=True
+ isaac_opts["self_collide"]=True
+ isaac_opts["sim_device"]="cuda" if isaac_opts["use_gpu"] else "cpu"
+ isaac_opts["physics_dt"]=1e-3
+ isaac_opts["gravity"] = np.array([0.0, 0.0, -9.81])
+ isaac_opts["use_fabric"]=True# Enable/disable reading of physics buffers directly. Default is True.
+ isaac_opts["replicate_physics"]=True
+ # isaac_opts["worker_thread_count"]=4
+ isaac_opts["solver_type"]=1 # 0: PGS, 1:TGS, defaults to TGS. PGS faster but TGS more stable
+ isaac_opts["enable_stabilization"]=True
+ # isaac_opts["bounce_threshold_velocity"] = 0.2
+ # isaac_opts["friction_offset_threshold"] = 0.04
+ # isaac_opts["friction_correlation_distance"] = 0.025
+ # isaac_opts["enable_sleeping"] = True
+ # Per-actor settings ( can override in actor_options )
+ isaac_opts["solver_position_iteration_count"] = 4 # defaults to 4
+ isaac_opts["solver_velocity_iteration_count"] = 3 # defaults to 1
+ isaac_opts["sleep_threshold"] = 1e-5 # Mass-normalized kinetic energy threshold below which an actor may go to sleep.
+ # Allowed range [0, max_float).
+ isaac_opts["stabilization_threshold"] = 1e-4
+ # Per-body settings ( can override in actor_options )
+ # isaac_opts["enable_gyroscopic_forces"] = True
+ # isaac_opts["density"] = 1000 # density to be used for bodies that do not specify mass or density
+ # isaac_opts["max_depenetration_velocity"] = 100.0
+ # isaac_opts["solver_velocity_iteration_count"] = 1
+ # GPU buffers settings
+ isaac_opts["gpu_max_rigid_contact_count"] = 512 * 2048
+ isaac_opts["gpu_max_rigid_patch_count"] = 80 * 2048
+ isaac_opts["gpu_found_lost_pairs_capacity"] = 102400
+ isaac_opts["gpu_found_lost_aggregate_pairs_capacity"] = 102400
+ isaac_opts["gpu_total_aggregate_pairs_capacity"] = 102400
+ # isaac_opts["gpu_max_soft_body_contacts"] = 1024 * 1024
+ # isaac_opts["gpu_max_particle_contacts"] = 1024 * 1024
+ # isaac_opts["gpu_heap_capacity"] = 64 * 1024 * 1024
+ # isaac_opts["gpu_temp_buffer_capacity"] = 16 * 1024 * 1024
+ # isaac_opts["gpu_max_num_partitions"] = 8
+
+ isaac_opts["env_spacing"]=15.0
+ isaac_opts["spawning_height"]=1.0
+ isaac_opts["spawning_radius"]=1.0
+ isaac_opts["spawn_height_check_half_extent"]=0.2
+ isaac_opts["spawn_height_cushion"]=0.03
+
+ isaac_opts["enable_height_vis"]=False
+ isaac_opts["height_vis_radius"]=0.03
+ isaac_opts["height_vis_update_period"]=1
+ isaac_opts["collision_refinement_level"]=3 # increase cylinder tesselation for smoother wheel contacts
+
+ # rendering helpers
+ isaac_opts["render_to_file"]=False
+ isaac_opts["use_follow_camera"]=False # if True, follow robot during rendering in human mode
+ isaac_opts["render_follow_env_idx"]=0
+ isaac_opts["render_follow_robot_idx"]=0
+ isaac_opts["render_follow_offset"]=[-0.2, 3.0, 0.1]
+ isaac_opts["render_follow_target_offset"]=[-0.2, -1.0, 0.0]
+ isaac_opts["rendering_dt"]=15*isaac_opts["physics_dt"]
+ isaac_opts["camera_prim_path"]="/OmniverseKit_Persp"
+ isaac_opts["render_resolution"]=[1280, 720] # [1024, 576]
+
+ isaac_opts["render_panoramic_cam"]=True
+ isaac_opts["render_panoramic_cam_height"]=2.0
+ isaac_opts["render_panoramic_cam_target_xy"]=[10.0, 14.]
+ isaac_opts["render_panoramic_cam_target_z"]=1.2
+
+ # ground opts
+ isaac_opts["use_flat_ground"]=True
+ isaac_opts["static_friction"]=0.5
+ isaac_opts["dynamic_friction"]=0.5
+ isaac_opts["restitution"]=0.1
+ isaac_opts["ground_type"]="random"
+ isaac_opts["ground_size"]=800
+ isaac_opts["terrain_border"]=isaac_opts["ground_size"]/2
+ isaac_opts["dh_ground"]=0.03
+ isaac_opts["step_height_lb"]=0.08
+ isaac_opts["step_height_ub"]=0.15
+ isaac_opts["step_width_lb"]=0.6
+ isaac_opts["step_width_ub"]= 1.5
+ isaac_opts["contact_prims"] = []
+ isaac_opts["sensor_radii"] = 0.1
+ isaac_opts["contact_offsets"] = {}
+
+ isaac_opts["enable_livestream"] = False
+ isaac_opts["enable_viewport"] = False
+
+ isaac_opts["use_diff_vels"] = False
+
+ # random perturbations (impulse, durations, directions are sampled uniformly, force/torque computed accordinly)
+ isaac_opts["use_random_pertub"]=False
+ isaac_opts["pert_planar_only"]=True # if True, linear pushes only in xy plane and no torques
+
+ isaac_opts["pert_wrenches_rate"]=10.0 # on average 1 pert every pert_wrenches_rate seconds
+ isaac_opts["pert_wrenches_min_duration"]=0.6
+ isaac_opts["pert_wrenches_max_duration"]=3.5 # [s]
+ isaac_opts["pert_force_max_weight_scale"]=1.2 # clip force norm to scale*weight
+ isaac_opts["pert_force_min_weight_scale"]=0.2 # optional min force norm as scale*weight
+ isaac_opts["pert_torque_max_weight_scale"]=1.0 # clip torque norm to scale*weight*max_ang_impulse_lever
+
+ isaac_opts["pert_target_delta_v"]=2.0 # [m/s] desired max impulse = m*delta_v
+ isaac_opts["det_pert_rate"]=True
+
+ # max impulse (unitless scale multiplied by weight to get N*s): delta_v/g
+ isaac_opts["max_lin_impulse_norm"]=isaac_opts["pert_target_delta_v"]/9.81
+ isaac_opts["lin_impulse_mag_min"]=1.0 # [0, 1] -> min fraction of max_lin_impulse_norm when sampling
+ isaac_opts["lin_impulse_mag_max"]=1.0 # [0, 1] -> max fraction of max_lin_impulse_norm when sampling
+
+ isaac_opts["max_ang_impulse_lever"]=0.2 # [m]
+ isaac_opts["max_ang_impulse_norm"]=isaac_opts["max_lin_impulse_norm"]*isaac_opts["max_ang_impulse_lever"]
+ isaac_opts["terrain_hit_log_period"]=1000
+
+ # opts definition end
+
+ isaac_opts.update(self._env_opts) # update defaults with provided opts
+ isaac_opts["rendering_freq"]=int(isaac_opts["rendering_dt"]/isaac_opts["physics_dt"])
+ # isaac_opts["rendering_dt"]=isaac_opts["physics_dt"] # forcing rendering_dt==physics_dt
+ # for some mystic reason simulation is infuenced by the rendering dt (why ??????)
+
+ # modify things
+ isaac_opts["cloning_offset"] = np.array([[0.0, 0.0, isaac_opts["spawning_height"]]]*self._num_envs)
+ if not isaac_opts["use_gpu"]: # don't use GPU at all
+ isaac_opts["use_gpu_pipeline"]=False
+ isaac_opts["device"]="cpu"
+ isaac_opts["sim_device"]="cpu"
+ else: # use GPU
+ if isaac_opts["use_gpu_pipeline"]:
+ isaac_opts["device"]="cuda"
+ isaac_opts["sim_device"]="cuda"
+ else: # cpu pipeline
+ isaac_opts["device"]="cpu"
+ isaac_opts["sim_device"]="cpu"
+ isaac_opts["use_gpu"]=False
+ # isaac_opts["sim_device"]=isaac_opts["device"]
+ # overwrite env opts in case some sim params were missing
+ self._env_opts=isaac_opts
+
+ # update device flag based on sim opts
+ self._device=isaac_opts["device"]
+ self._use_gpu=isaac_opts["use_gpu"]
+
+ def _calc_robot_distrib(self):
+
+ import math
+ # we distribute robots in a single env. along the
+ # circumference of a circle of given radius
+ n_robots = len(self._robot_names)
+ offset_baseangle = 2 * math.pi / n_robots
+ for i in range(n_robots):
+ offset_angle = offset_baseangle * (i + 1)
+ robot_offset_wrt_center = torch.tensor([self._spawning_radius * math.cos(offset_angle),
+ self._spawning_radius * math.sin(offset_angle), 0],
+ device=self._device,
+ dtype=self._dtype)
+ # list with n references to the original tensor
+ tensor_list = [robot_offset_wrt_center] * self._num_envs
+ self._distr_offset[self._robot_names[i]] = torch.stack(tensor_list, dim=0)
+
+ def _init_world(self):
+
+ self._cloner = GridCloner(spacing=self._env_opts["env_spacing"])
+ self._cloner.define_base_env(self._env_opts["envs_ns"])
+ prim_utils.define_prim(self._env_opts["template_env_ns"])
+ self._envs_prim_paths = self._cloner.generate_paths(self._env_opts["envs_ns"] + "/env",
+ self._num_envs)
+
+ # parse device based on sim_param settings
+
+ info = "Using sim device: " + str(self._env_opts["sim_device"])
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ info,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ self._world = World(
+ physics_dt=self._env_opts["physics_dt"],
+ rendering_dt=self._env_opts["physics_dt"], # == physics dt (rendering is actually done manually by this env)
+ backend=self._backend,
+ device=str(self._env_opts["sim_device"]),
+ physics_prim_path=self._env_opts["physics_prim_path"],
+ set_defaults = False, # set to True to use the defaults settings [physics_dt = 1.0/ 60.0,
+ # stage units in meters = 0.01 (i.e in cms), rendering_dt = 1.0 / 60.0, gravity = -9.81 m / s
+ # ccd_enabled, stabilization_enabled, gpu dynamics turned off,
+ # broadcast type is MBP, solver type is TGS]
+ sim_params=self._env_opts
+ )
+
+ big_info = "[World] Creating Isaac simulation " + self._name + "\n" + \
+ "use_gpu_pipeline: " + str(self._env_opts["use_gpu_pipeline"]) + "\n" + \
+ "device: " + str(self._env_opts["sim_device"]) + "\n" +\
+ "backend: " + str(self._backend) + "\n" +\
+ "integration_dt: " + str(self.physics_dt()) + "\n" + \
+ "rendering_dt: " + str(self.rendering_dt()) + "\n"
+ Journal.log(self.__class__.__name__,
+ "_init_world",
+ big_info,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ # we get the physics context to expose additional low-level ##
+ # settings of the simulation
+ self._physics_context = self._world.get_physics_context()
+ self._physics_scene_path = self._physics_context.prim_path
+ # self._physics_context.enable_gpu_dynamics(True)
+ self._physics_context.enable_stablization(True)
+ self._physics_scene_prim = self._physics_context.get_current_physics_scene_prim()
+ self._solver_type = self._physics_context.get_solver_type()
+
+ if "gpu_max_rigid_contact_count" in self._env_opts:
+ self._physics_context.set_gpu_max_rigid_contact_count(self._env_opts["gpu_max_rigid_contact_count"])
+ if "gpu_max_rigid_patch_count" in self._env_opts:
+ self._physics_context.set_gpu_max_rigid_patch_count(self._env_opts["gpu_max_rigid_patch_count"])
+ if "gpu_found_lost_pairs_capacity" in self._env_opts:
+ self._physics_context.set_gpu_found_lost_pairs_capacity(self._env_opts["gpu_found_lost_pairs_capacity"])
+ if "gpu_found_lost_aggregate_pairs_capacity" in self._env_opts:
+ self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(self._env_opts["gpu_found_lost_aggregate_pairs_capacity"])
+ if "gpu_total_aggregate_pairs_capacity" in self._env_opts:
+ self._physics_context.set_gpu_total_aggregate_pairs_capacity(self._env_opts["gpu_total_aggregate_pairs_capacity"])
+ if "gpu_max_soft_body_contacts" in self._env_opts:
+ self._physics_context.set_gpu_max_soft_body_contacts(self._env_opts["gpu_max_soft_body_contacts"])
+ if "gpu_max_particle_contacts" in self._env_opts:
+ self._physics_context.set_gpu_max_particle_contacts(self._env_opts["gpu_max_particle_contacts"])
+ if "gpu_heap_capacity" in self._env_opts:
+ self._physics_context.set_gpu_heap_capacity(self._env_opts["gpu_heap_capacity"])
+ if "gpu_temp_buffer_capacity" in self._env_opts:
+ self._physics_context.set_gpu_temp_buffer_capacity(self._env_opts["gpu_temp_buffer_capacity"])
+ if "gpu_max_num_partitions" in self._env_opts:
+ self._physics_context.set_gpu_max_num_partitions(self._env_opts["gpu_max_num_partitions"])
+
+ # overwriting defaults
+ # self._physics_context.set_gpu_max_rigid_contact_count(2 * self._physics_context.get_gpu_max_rigid_contact_count())
+ # self._physics_context.set_gpu_max_rigid_patch_count(2 * self._physics_context.get_gpu_max_rigid_patch_count())
+ # self._physics_context.set_gpu_found_lost_pairs_capacity(2 * self._physics_context.get_gpu_found_lost_pairs_capacity())
+ # self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity())
+ # self._physics_context.set_gpu_total_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_total_aggregate_pairs_capacity())
+ # self._physics_context.set_gpu_heap_capacity(2 * self._physics_context.get_gpu_heap_capacity())
+ # self._physics_context.set_gpu_temp_buffer_capacity(20 * self._physics_context.get_gpu_heap_capacity())
+ # self._physics_context.set_gpu_max_num_partitions(20 * self._physics_context.get_gpu_temp_buffer_capacity())
+
+ # GPU buffers
+ self._gpu_max_rigid_contact_count = self._physics_context.get_gpu_max_rigid_contact_count()
+ self._gpu_max_rigid_patch_count = self._physics_context.get_gpu_max_rigid_patch_count()
+ self._gpu_found_lost_pairs_capacity = self._physics_context.get_gpu_found_lost_pairs_capacity()
+ self._gpu_found_lost_aggregate_pairs_capacity = self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity()
+ self._gpu_total_aggregate_pairs_capacity = self._physics_context.get_gpu_total_aggregate_pairs_capacity()
+ self._gpu_max_soft_body_contacts = self._physics_context.get_gpu_max_soft_body_contacts()
+ self._gpu_max_particle_contacts = self._physics_context.get_gpu_max_particle_contacts()
+ self._gpu_heap_capacity = self._physics_context.get_gpu_heap_capacity()
+ self._gpu_temp_buffer_capacity = self._physics_context.get_gpu_temp_buffer_capacity()
+ # self._gpu_max_num_partitions = physics_context.get_gpu_max_num_partitions() # BROKEN->method does not exist
+
+ big_info2 = "[physics context]:" + "\n" + \
+ "gpu_max_rigid_contact_count: " + str(self._gpu_max_rigid_contact_count) + "\n" + \
+ "gpu_max_rigid_patch_count: " + str(self._gpu_max_rigid_patch_count) + "\n" + \
+ "gpu_found_lost_pairs_capacity: " + str(self._gpu_found_lost_pairs_capacity) + "\n" + \
+ "gpu_found_lost_aggregate_pairs_capacity: " + str(self._gpu_found_lost_aggregate_pairs_capacity) + "\n" + \
+ "gpu_total_aggregate_pairs_capacity: " + str(self._gpu_total_aggregate_pairs_capacity) + "\n" + \
+ "gpu_max_soft_body_contacts: " + str(self._gpu_max_soft_body_contacts) + "\n" + \
+ "gpu_max_particle_contacts: " + str(self._gpu_max_particle_contacts) + "\n" + \
+ "gpu_heap_capacity: " + str(self._gpu_heap_capacity) + "\n" + \
+ "gpu_temp_buffer_capacity: " + str(self._gpu_temp_buffer_capacity) + "\n" + \
+ "use_gpu_sim: " + str(self._world.get_physics_context().use_gpu_sim) + "\n" + \
+ "use_gpu_pipeline: " + str(self._world.get_physics_context().use_gpu_pipeline) + "\n" + \
+ "use_fabric: " + str(self._world.get_physics_context().use_fabric) + "\n" + \
+ "world device: " + str(self._world.get_physics_context().device) + "\n" + \
+ "physics context device: " + str(self._world.get_physics_context().device) + "\n"
+
+ Journal.log(self.__class__.__name__,
+ "set_task",
+ big_info2,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ self._scene = self._world.scene
+ self._physics_context = self._world.get_physics_context()
+
+ self._stage = get_context().get_stage()
+
+ # strong, uniform lighting: bright sun + dome fill to cover the whole terrain
+ prim_utils.define_prim("/World/Lighting", "Xform")
+ sun_path = "/World/Lighting/SunLight"
+ dome_path = "/World/Lighting/AmbientDome"
+
+ distantLight = UsdLux.DistantLight.Define(self._stage, Sdf.Path(sun_path))
+ distantLight.CreateIntensityAttr(450.0)
+ distantLight.CreateAngleAttr(0.5) # soften shadows a bit
+ distantLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0))
+ # Shadow attr naming differs across versions; set the underlying USD attribute directly.
+ distantLight.GetPrim().CreateAttribute("shadow:enable", Sdf.ValueTypeNames.Bool).Set(True)
+
+ domeLight = UsdLux.DomeLight.Define(self._stage, Sdf.Path(dome_path))
+ domeLight.CreateIntensityAttr(200.0)
+ domeLight.CreateExposureAttr(1.0)
+ domeLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0))
+
+ self._configure_scene()
+
+ # if "enable_viewport" in sim_params:
+ # self._render = sim_params["enable_viewport"]
+
+ def _get_baselink_candidate(self,
+ robot_name: str):
+
+ stage=get_current_stage()
+ all_prims = [prim.GetPath().pathString for prim in stage.Traverse()]
+ filtered_prims = [prim for prim in all_prims if f"/{robot_name}/" in prim]
+
+ matching=min(filtered_prims, key=len) if filtered_prims else None
+
+ return matching.split('/')[-1]
+
+ def _configure_scene(self):
+
+ # environment
+ self._fix_base = [self._env_opts["is_fixed_base"]] * len(self._robot_names)
+ self._self_collide = [self._env_opts["self_collide"]] * len(self._robot_names)
+ self._merge_fixed = [self._env_opts["merge_fixed_jnts"]] * len(self._robot_names)
+
+ Journal.log(self.__class__.__name__,
+ "_configure_scene",
+ "cloning environments...",
+ LogType.STAT,
+ throw_when_excep = True)
+
+ self._ground_plane_prim_paths=[]
+ self._ground_plane=None
+ self.terrain_generator = None
+ if not self._env_opts["use_flat_ground"]:
+ # ensure terrain is large enough to contain all env clones
+ spacing = float(self._env_opts["env_spacing"])
+ num_envs = self._num_envs
+ num_per_row = max(1, int(math.sqrt(num_envs)))
+ num_rows = int(math.ceil(num_envs / num_per_row))
+ num_cols = int(math.ceil(num_envs / num_rows))
+ row_offset = 0.5 * spacing * (num_rows - 1)
+ col_offset = 0.5 * spacing * (num_cols - 1)
+ margin = spacing # leave a full spacing as border cushion
+ required_size = 2.0 * (max(row_offset, col_offset) + margin)
+
+ if required_size > self._env_opts["ground_size"]:
+ old_size = self._env_opts["ground_size"]
+ self._env_opts["ground_size"] = required_size
+ self._env_opts["terrain_border"] = self._env_opts["ground_size"] / 2.0
+ Journal.log(self.__class__.__name__,
+ "_configure_scene",
+ f"Ground size increased from {old_size} to {required_size} to fit {num_envs} envs (spacing {spacing}).",
+ LogType.WARN,
+ throw_when_excep = True)
+
+ min_height=-self._env_opts["dh_ground"]
+ max_height=self._env_opts["dh_ground"]
+ step=max_height-min_height
+ if self._env_opts["ground_type"]=="random":
+ terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif"
+ self._ground_plane_prim_paths.append(terrain_prim_path)
+ self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+ self._ground_plane=self.terrain_generator.create_random_uniform_terrain(terrain_size=self._env_opts["ground_size"],
+ min_height=min_height,
+ max_height=max_height,
+ step=step,
+ position=np.array([0.0, 0.0,0.0]),
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"])
+ elif self._env_opts["ground_type"]=="random_patches":
+ terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif_patches"
+ self._ground_plane_prim_paths.append(terrain_prim_path)
+ self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+ self._ground_plane=self.terrain_generator.create_random_patched_terrain(terrain_size=self._env_opts["ground_size"],
+ min_height=min_height,
+ max_height=max_height,
+ step=step,
+ position=np.array([0.0, 0.0,0.0]),
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"],
+ patch_ratio=0.8,
+ patch_size=10
+ )
+ elif self._env_opts["ground_type"]=="slopes":
+ terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_slopes"
+ self._ground_plane_prim_paths.append(terrain_prim_path)
+ self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+ self._ground_plane=self.terrain_generator.create_sloped_terrain(terrain_size=self._env_opts["ground_size"],
+ slope=-0.5,
+ position=np.array([0.0, 0.0,0.0]),
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"]
+ )
+ elif self._env_opts["ground_type"]=="stairs":
+ terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stairs"
+ self._ground_plane_prim_paths.append(terrain_prim_path)
+ self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+ self._ground_plane=self.terrain_generator.create_stairs_terrain(terrain_size=self._env_opts["ground_size"],
+ position=np.array([0.0, 0.0,0.0]),
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"],
+ )
+ elif self._env_opts["ground_type"]=="stepup":
+ terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup"
+ self._ground_plane_prim_paths.append(terrain_prim_path)
+ self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+ self._ground_plane=self.terrain_generator.create_stepup_terrain(
+ terrain_size=self._env_opts["ground_size"],
+ stairs_ratio=0.3,
+ min_steps=1,
+ max_steps=1,
+ pyramid_platform_size=15.0,
+ position=np.array([0.0, 0.0, 0.0]),
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"],
+ step_height=0.15
+ )
+ elif self._env_opts["ground_type"]=="stepup_prim":
+ terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup_prim"
+ self._ground_plane_prim_paths.append(terrain_prim_path)
+ self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+ self._ground_plane=self.terrain_generator.create_stepup_prim_terrain(
+ terrain_size=self._env_opts["ground_size"],
+ stairs_ratio=0.9,
+ platform_size=50.0,
+ step_height_lb=self._env_opts["step_height_lb"],
+ step_height_ub=self._env_opts["step_height_ub"],
+ min_step_width=self._env_opts.get("step_width_lb", None),
+ max_step_width=self._env_opts.get("step_width_ub", None),
+ position=np.array([0.0, 0.0, 0.0]),
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"],
+ n_steps=25,
+ area_factor=0.7,
+ random_n_steps=False
+ )
+ # apply a custom checker material to the terrain primitives
+ mat_path = self._ensure_lightblue_checker_material()
+ self._apply_checker_material_to_terrain(terrain_root_path=terrain_prim_path, material_path=mat_path)
+ self._add_checker_overlay_plane(terrain_root_path=terrain_prim_path, material_path=mat_path)
+ self._add_checker_overlays_on_tiles(terrain_root_path=terrain_prim_path, material_path=mat_path)
+ else:
+ ground_type=self._env_opts["ground_type"]
+ Journal.log(self.__class__.__name__,
+ "_configure_scene",
+ f"Terrain type {ground_type} not supported. Will default to flat ground.",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ # add offsets to intial height depending on the terrain heightmap
+ if self.terrain_generator is not None:
+ stage = get_current_stage()
+ up_axis = UsdGeom.GetStageUpAxis(stage)
+
+ spacing = self._env_opts["env_spacing"]
+ num_envs = self._num_envs
+
+ num_per_row = max(1, int(np.sqrt(num_envs)))
+ num_rows = int(np.ceil(num_envs / num_per_row))
+ num_cols = int(np.ceil(num_envs / num_rows))
+
+ row_offset = 0.5 * spacing * (num_rows - 1)
+ col_offset = 0.5 * spacing * (num_cols - 1)
+
+ offsets = np.array(self._env_opts["cloning_offset"], dtype=float)
+
+ for env_idx in range(num_envs):
+ row = env_idx // num_cols
+ col = env_idx % num_cols
+ x = row_offset - row * spacing
+ y = col * spacing - col_offset
+
+ half_extent = self._env_opts["spawn_height_check_half_extent"]
+ if up_axis == UsdGeom.Tokens.z:
+ height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent)
+ else:
+ height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent)
+
+ offsets[env_idx][2] += height + self._env_opts["spawn_height_cushion"]
+
+ self._env_opts["cloning_offset"] = offsets
+
+ else:
+ defaul_prim_path=self._env_opts["ground_plane_prim_path"]+"_default"
+ self._ground_plane_prim_paths.append(defaul_prim_path)
+ self._ground_plane=self._scene.add_default_ground_plane(z_position=0,
+ name="terrain",
+ prim_path=defaul_prim_path,
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"])
+ self._terrain_collision=None
+
+ physics_material=UsdPhysics.MaterialAPI.Apply(self._ground_plane.prim)
+ physics_material.CreateDynamicFrictionAttr(self._env_opts["dynamic_friction"])
+ physics_material.CreateStaticFrictionAttr(self._env_opts["static_friction"])
+ physics_material.CreateRestitutionAttr(self._env_opts["restitution"])
+ # self._ground_plane.apply_physics_material(physics_material)
+
+ self._terrain_collision=PhysxSchema.PhysxCollisionAPI.Get(get_current_stage(), self._ground_plane.prim_path)
+ self._terrain_material=UsdPhysics.MaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path)
+ self._terrain_physix_material=PhysxSchema.PhysxMaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path)
+
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+
+ urdf_path = self._robot_urdf_paths[robot_name]
+ srdf_path = self._robot_srdf_paths[robot_name]
+ fix_base = self._fix_base[i]
+ self_collide = self._self_collide[i]
+ merge_fixed = self._merge_fixed[i]
+
+ self._generate_rob_descriptions(robot_name=robot_name,
+ urdf_path=urdf_path,
+ srdf_path=srdf_path)
+ self._import_urdf(robot_name,
+ fix_base=fix_base,
+ self_collide=self_collide,
+ merge_fixed=merge_fixed)
+
+ self._cloner.clone(
+ source_prim_path=self._env_opts["template_env_ns"],
+ prim_paths=self._envs_prim_paths,
+ replicate_physics=self._env_opts["replicate_physics"],
+ position_offsets=self._env_opts["cloning_offset"]
+ ) # we can clone the environment in which all the robos are
+
+ base_link_name=self._env_opts["base_linkname"]
+ if self._env_opts["deduce_base_link"]:
+ base_link_name=self._get_baselink_candidate(robot_name=robot_name)
+
+ self._robots_art_views[robot_name] = ArticulationView(name = robot_name + "ArtView",
+ prim_paths_expr = self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name,
+ reset_xform_properties=False)
+ self._robots_articulations[robot_name] = self._scene.add(self._robots_art_views[robot_name])
+
+ # height grid sensor (terrain may be None if using flat ground)
+ self._height_sensors[robot_name] = HeightGridSensor(
+ terrain_utils=self.terrain_generator if not self._env_opts["use_flat_ground"] else None,
+ grid_size=int(self._env_opts["height_sensor_pixels"]),
+ resolution=float(self._env_opts["height_sensor_resolution"]),
+ forward_offset=float(self._env_opts["height_sensor_forward_offset"]),
+ lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]),
+ n_envs=self._num_envs,
+ device=self._device,
+ dtype=self._dtype)
+ # ensure shared-data flags are set if a height sensor is active
+ self._env_opts["height_sensor_pixels"] = int(self._env_opts["height_sensor_pixels"])
+ self._env_opts["height_sensor_resolution"] = float(self._env_opts["height_sensor_resolution"])
+ self._enable_height_shared = True
+ self._height_sensor_pixels = self._env_opts["height_sensor_pixels"]
+ self._height_sensor_resolution = self._env_opts["height_sensor_resolution"]
+ self._height_vis_step[robot_name] = 0
+ if self._env_opts["enable_height_vis"]:
+ self._height_vis[robot_name] = HeightGridVisualizer(
+ robot_name=robot_name,
+ num_envs=self._num_envs,
+ grid_size=int(self._env_opts["height_sensor_pixels"]),
+ resolution=float(self._env_opts["height_sensor_resolution"]),
+ marker_radius=float(self._env_opts["height_vis_radius"]),
+ forward_offset=float(self._env_opts["height_sensor_forward_offset"]),
+ lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]),
+ device=self._device,
+ dtype=self._dtype)
+
+ self._blink_rigid_prim_views[robot_name] = RigidPrimView(prim_paths_expr=self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name,
+ name = robot_name + "RigidPrimView") # base link prim views
+ self._scene.add(self._blink_rigid_prim_views[robot_name]) # need to add so it is properly initialized when resetting world
+
+ # self._robots_geom_prim_views[robot_name] = GeometryPrimView(name = robot_name + "GeomView",
+ # prim_paths_expr = self._env_ns + "/env*"+ "/" + robot_name,
+ # # prepare_contact_sensors = True
+ # )
+ # self._robots_geom_prim_views[robot_name].apply_collision_apis() # to be able to apply contact sensors
+
+ # init contact sensors
+ self._init_contact_sensors(robot_name=robot_name) # IMPORTANT: this has to be called
+ # after calling the clone() method and initializing articulation views!!
+
+ # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_1/collisions/mesh_1")
+ # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_2/collisions/mesh_1")
+ # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_3/collisions/mesh_1")
+ # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_4/collisions/mesh_1")
+
+ # filter collisions between default ground plane and custom terrains
+ # self._cloner.filter_collisions(physicsscene_path = self._physics_context.prim_path,
+ # collision_root_path = "/World/terrain_collisions",
+ # prim_paths=[self._ground_plane_prim_paths[1]],
+ # global_paths=[self._ground_plane_prim_paths[0]]
+ # )
+
+ # delete_prim(self._env_opts["ground_plane_prim_path"] + "/SphereLight") # we remove the default spherical light
+
+ # set default camera viewport position and target
+ camera_position=[4.2, 4.2, 1.5]
+ camera_target=[0, 0, 0]
+ # use a single camera prim (configurable) for both viewport and render products
+ camera_prim = self._env_opts["camera_prim_path"]
+ self._set_initial_camera_params(camera_position=camera_position,
+ camera_target=camera_target,
+ camera_prim_path=camera_prim)
+
+ if self._env_opts["render_to_file"]:
+ # base output dir
+ from datetime import datetime
+ timestamp = datetime.now().strftime("h%H_m%M_s%S_%d_%m_%Y")
+ self._render_output_dir = f"/tmp/IsaacRenderings/{timestamp}"
+ res = tuple(int(x) for x in self._env_opts["render_resolution"])
+
+ # create render product from chosen camera prim
+ self._render_product = rep.create.render_product(camera_prim,
+ res, name='rendering_camera')
+ self._render_writer = rep.WriterRegistry.get("BasicWriter")
+ self._render_writer.initialize(output_dir=self._render_output_dir,
+ rgb=True)
+ self._render_writer.attach([self._render_product])
+ # optional top-down capture
+ if self._env_opts["render_panoramic_cam"]:
+ td_height = float(self._env_opts["render_panoramic_cam_height"])
+ td_dir = self._render_output_dir + "/panoramic_cam"
+ td_offset = self._env_opts["render_panoramic_cam_target_xy"]
+ td_target_z = float(self._env_opts["render_panoramic_cam_target_z"])
+ pos = [8.0, 11.0, td_height]
+ self._panoramic_cam_camera = rep.create.camera(focal_length=12,
+ name='rendering_camera_panoramic_cam',
+ clipping_range = (1, 200),
+ position = pos,
+ look_at = [td_offset[0], td_offset[1], td_target_z])
+ self._panoramic_cam_render_product = rep.create.render_product(self._panoramic_cam_camera,
+ res, name='rendering_camera_panoramic_cam')
+ self._panoramic_cam_writer = rep.WriterRegistry.get("BasicWriter")
+ self._panoramic_cam_writer.initialize(output_dir=td_dir, rgb=True)
+ self._panoramic_cam_writer.attach([self._panoramic_cam_render_product])
+
+ self.apply_collision_filters(self._physics_context.prim_path,
+ "/World/collisions")
+
+ self._reset_sim()
+
+ self._fill_robot_info_from_world()
+ # initializes robot state data
+
+ # update solver options
+ self._update_art_solver_options()
+ self._get_solver_info() # get again solver option before printing everything
+ self._print_envs_info() # debug print
+
+ # for n in range(self._n_init_steps): # run some initialization steps
+ # self._step_sim()
+
+ self._init_robots_state()
+
+ self.scene_setup_completed = True
+
+ Journal.log(self.__class__.__name__,
+ "set_up_scene",
+ "finished scene setup...",
+ LogType.STAT,
+ throw_when_excep = True)
+
+ self._is = _sensor.acquire_imu_sensor_interface()
+ self._dyn_control=_dynamic_control.acquire_dynamic_control_interface()
+
+ def _set_contact_links_material(self, prim_path: str):
+ prim=get_prim_at_path(prim_path)
+ physics_material=UsdPhysics.MaterialAPI.Apply(prim)
+ physics_material.CreateDynamicFrictionAttr(0)
+ physics_material.CreateStaticFrictionAttr(0)
+ physics_material.CreateRestitutionAttr(1.0)
+ physxMaterialAPI=PhysxSchema.PhysxMaterialAPI.Apply(prim)
+ physxMaterialAPI.CreateFrictionCombineModeAttr().Set("multiply") # average, min, multiply, max
+ physxMaterialAPI.CreateRestitutionCombineModeAttr().Set("multiply")
+
+ def _get_lightblue_checker_texture_path(self) -> str:
+ tex_rel_path = os.path.join(os.path.dirname(__file__), "..", "assets", "textures", "ibrido_terrain_texture.png")
+ return os.path.abspath(tex_rel_path)
+
+ def _ensure_lightblue_checker_material(self):
+ """Create (or reuse) a light-blue checker material for primitive terrains."""
+ stage = get_current_stage()
+ mat_path = "/World/Looks/IbridoCheckerMaterial"
+ mat_prim = stage.GetPrimAtPath(mat_path)
+ if mat_prim.IsValid():
+ return mat_path
+
+ texture_path = self._get_lightblue_checker_texture_path()
+
+ material = UsdShade.Material.Define(stage, mat_path)
+
+ st_reader = UsdShade.Shader.Define(stage, f"{mat_path}/PrimvarReader_st")
+ st_reader.CreateIdAttr("UsdPrimvarReader_float2")
+ st_reader.CreateInput("varname", Sdf.ValueTypeNames.Token).Set("st")
+ st_reader.CreateOutput("result", Sdf.ValueTypeNames.Float2)
+
+ uv_transform = UsdShade.Shader.Define(stage, f"{mat_path}/UVTransform")
+ uv_transform.CreateIdAttr("UsdTransform2d")
+ # keep UV scale at 1 here; tiling is controlled via mesh UVs
+ uv_transform.CreateInput("in", Sdf.ValueTypeNames.Float2).ConnectToSource(st_reader.GetOutput("result"))
+ uv_transform.CreateInput("scale", Sdf.ValueTypeNames.Float2).Set(Gf.Vec2f(1.0, 1.0))
+ uv_transform.CreateOutput("result", Sdf.ValueTypeNames.Float2)
+
+ tex = UsdShade.Shader.Define(stage, f"{mat_path}/CheckerTex")
+ tex.CreateIdAttr("UsdUVTexture")
+ tex.CreateInput("file", Sdf.ValueTypeNames.Asset).Set(Sdf.AssetPath(texture_path))
+ tex.CreateInput("st", Sdf.ValueTypeNames.Float2).ConnectToSource(uv_transform.GetOutput("result"))
+ tex.CreateInput("wrapS", Sdf.ValueTypeNames.Token).Set("repeat")
+ tex.CreateInput("wrapT", Sdf.ValueTypeNames.Token).Set("repeat")
+ tex.CreateInput("minFilter", Sdf.ValueTypeNames.Token).Set("nearest")
+ tex.CreateInput("magFilter", Sdf.ValueTypeNames.Token).Set("nearest")
+ # disable mipmaps to avoid blurring sharp edges
+ tex.CreateInput("mipFilter", Sdf.ValueTypeNames.Token).Set("nearest")
+ tex.CreateInput("enableMipMap", Sdf.ValueTypeNames.Bool).Set(False)
+ tex.CreateInput("fallback", Sdf.ValueTypeNames.Color4f).Set(Gf.Vec4f(0.69, 0.85, 1.0, 1.0))
+ tex.CreateOutput("rgb", Sdf.ValueTypeNames.Float3)
+ tex.CreateOutput("a", Sdf.ValueTypeNames.Float)
+
+ pbr = UsdShade.Shader.Define(stage, f"{mat_path}/PBRShader")
+ pbr.CreateIdAttr("UsdPreviewSurface")
+ pbr.CreateInput("diffuseColor", Sdf.ValueTypeNames.Color3f).ConnectToSource(tex.GetOutput("rgb"))
+ pbr.CreateInput("roughness", Sdf.ValueTypeNames.Float).Set(0.45)
+ pbr.CreateInput("metallic", Sdf.ValueTypeNames.Float).Set(0.0)
+ pbr.CreateOutput("surface", Sdf.ValueTypeNames.Token)
+
+ material.CreateSurfaceOutput().ConnectToSource(pbr.GetOutput("surface"))
+
+ return mat_path
+
+ def _ensure_groundplane_material(self):
+ """Guarantee a ground-plane material exists (default checker) and return its path."""
+ stage = get_current_stage()
+ mat_path = "/World/Looks/groundPlaneMaterial"
+ mat_prim = stage.GetPrimAtPath(mat_path)
+ if mat_prim.IsValid():
+ return mat_path
+
+ # create a temporary default ground plane to spawn the checker material, then delete the geom
+ tmp_gp_path = "/World/tmp_ground_for_material"
+ self._scene.add_default_ground_plane(z_position=-1000.0,
+ name="tmp_ground_for_material",
+ prim_path=tmp_gp_path,
+ static_friction=self._env_opts["static_friction"],
+ dynamic_friction=self._env_opts["dynamic_friction"],
+ restitution=self._env_opts["restitution"])
+
+ mat_prim = stage.GetPrimAtPath(mat_path)
+ prim_utils.delete_prim(tmp_gp_path)
+
+ return mat_path if mat_prim.IsValid() else None
+
+ def _apply_checker_material_to_terrain(self, terrain_root_path: str, material_path: str):
+ """Bind the checker material to all terrain prims (visual only)."""
+
+ stage = get_current_stage()
+ mat_prim = stage.GetPrimAtPath(material_path)
+ if not mat_prim.IsValid():
+ return
+ material = UsdShade.Material(mat_prim)
+ for prim in stage.Traverse():
+ path = prim.GetPath().pathString
+ if not path.startswith(terrain_root_path):
+ continue
+ binding = UsdShade.MaterialBindingAPI.Apply(prim)
+ binding.Bind(material, UsdShade.Tokens.strongerThanDescendants)
+
+ def _add_checker_overlay_plane(self, terrain_root_path: str, material_path: str):
+ """Create a thin visual-only mesh with UVs so the checker pattern shows up even on cube prims."""
+ stage = get_current_stage()
+ plane_path = f"{terrain_root_path}/visual_checker"
+ plane_prim = stage.GetPrimAtPath(plane_path)
+ if plane_prim.IsValid():
+ # if already exists, just (re)bind material
+ mat_prim = stage.GetPrimAtPath(material_path)
+ if mat_prim.IsValid():
+ UsdShade.MaterialBindingAPI.Apply(plane_prim).Bind(UsdShade.Material(mat_prim), UsdShade.Tokens.strongerThanDescendants)
+ return
+
+ # try to read base slab dimensions/position to size the overlay
+ slab_path = terrain_root_path + "_slab"
+ slab_prim = stage.GetPrimAtPath(slab_path)
+ center = Gf.Vec3f(0.0, 0.0, 0.0)
+ width = float(self._env_opts.get("ground_size", 50.0))
+ length = width
+ thickness = 0.1
+ if slab_prim.IsValid():
+ xformable = UsdGeom.Xformable(slab_prim)
+ for op in xformable.GetOrderedXformOps():
+ if op.GetOpType() == UsdGeom.XformOp.TypeTranslate:
+ center = Gf.Vec3f(op.Get())
+ elif op.GetOpType() == UsdGeom.XformOp.TypeScale:
+ scale = op.Get()
+ width = float(scale[0])
+ length = float(scale[1])
+ thickness = float(scale[2])
+
+ half_w = 0.5 * width
+ half_l = 0.5 * length
+ z = center[2] + 0.5 * thickness + 1e-3 # slightly above the slab to avoid z-fighting
+
+ plane = UsdGeom.Mesh.Define(stage, plane_path)
+ plane.CreatePointsAttr([
+ Gf.Vec3f(center[0] - half_w, center[1] - half_l, z),
+ Gf.Vec3f(center[0] + half_w, center[1] - half_l, z),
+ Gf.Vec3f(center[0] + half_w, center[1] + half_l, z),
+ Gf.Vec3f(center[0] - half_w, center[1] + half_l, z),
+ ])
+ plane.CreateFaceVertexCountsAttr([4])
+ plane.CreateFaceVertexIndicesAttr([0, 1, 2, 3])
+ plane.CreateDoubleSidedAttr(True)
+
+ # increase tiling density; adjustable via env opt
+ uv_repeats = max(1, int((width / 10.0) * 3.0))
+ primvars = UsdGeom.PrimvarsAPI(plane)
+ st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex)
+ st.Set(Vt.Vec2fArray([
+ Gf.Vec2f(0.0, 0.0),
+ Gf.Vec2f(uv_repeats, 0.0),
+ Gf.Vec2f(uv_repeats, uv_repeats),
+ Gf.Vec2f(0.0, uv_repeats),
+ ]))
+ st.SetInterpolation(UsdGeom.Tokens.vertex)
+
+ mat_prim = stage.GetPrimAtPath(material_path)
+ if mat_prim.IsValid():
+ material = UsdShade.Material(mat_prim)
+ UsdShade.MaterialBindingAPI.Apply(plane.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants)
+
+ def _add_checker_overlays_on_tiles(self, terrain_root_path: str, material_path: str):
+ """Add visual quads on top of each tile cube so the checker texture appears on raised steps."""
+ stage = get_current_stage()
+ mat_prim = stage.GetPrimAtPath(material_path)
+ if not mat_prim.IsValid():
+ return
+ material = UsdShade.Material(mat_prim)
+ for prim in stage.Traverse():
+ path = prim.GetPath().pathString
+ if not path.startswith(terrain_root_path):
+ continue
+ if prim.GetTypeName() != "Cube":
+ continue
+ name = path.split("/")[-1]
+ if "wall" in name or name.endswith("_slab"):
+ continue
+ xformable = UsdGeom.Xformable(prim)
+ center = Gf.Vec3f(0.0, 0.0, 0.0)
+ scale = Gf.Vec3f(1.0, 1.0, 1.0)
+ for op in xformable.GetOrderedXformOps():
+ if op.GetOpType() == UsdGeom.XformOp.TypeTranslate:
+ center = Gf.Vec3f(op.Get())
+ elif op.GetOpType() == UsdGeom.XformOp.TypeScale:
+ scale = Gf.Vec3f(op.Get())
+ width, length, height = float(scale[0]), float(scale[1]), float(scale[2])
+ half_w = 0.5 * width
+ half_l = 0.5 * length
+ z = center[2] + 0.5 * height + 1e-3
+ overlay_path = f"{path}_checker"
+ if stage.GetPrimAtPath(overlay_path).IsValid():
+ UsdShade.MaterialBindingAPI.Apply(stage.GetPrimAtPath(overlay_path)).Bind(material, UsdShade.Tokens.strongerThanDescendants)
+ continue
+ mesh = UsdGeom.Mesh.Define(stage, overlay_path)
+ mesh.CreatePointsAttr([
+ Gf.Vec3f(center[0] - half_w, center[1] - half_l, z),
+ Gf.Vec3f(center[0] + half_w, center[1] - half_l, z),
+ Gf.Vec3f(center[0] + half_w, center[1] + half_l, z),
+ Gf.Vec3f(center[0] - half_w, center[1] + half_l, z),
+ ])
+ mesh.CreateFaceVertexCountsAttr([4])
+ mesh.CreateFaceVertexIndicesAttr([0, 1, 2, 3])
+ mesh.CreateDoubleSidedAttr(True)
+ uv_repeats = max(1, int((width / 10.0) * float(self._env_opts.get("checker_uv_density", 3.0))))
+ primvars = UsdGeom.PrimvarsAPI(mesh)
+ st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex)
+ st.Set(Vt.Vec2fArray([
+ Gf.Vec2f(0.0, 0.0),
+ Gf.Vec2f(uv_repeats, 0.0),
+ Gf.Vec2f(uv_repeats, uv_repeats),
+ Gf.Vec2f(0.0, uv_repeats),
+ ]))
+ st.SetInterpolation(UsdGeom.Tokens.vertex)
+ UsdShade.MaterialBindingAPI.Apply(mesh.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants)
+
+ def _is_link(self, prim):
+ return prim.GetTypeName() == 'Xform'
+
+ def _is_joint(self, prim):
+ return prim.GetTypeName() == 'PhysicsRevoluteJoint'
+
+ def _create_collision_group(self, group_path, link_paths):
+ """
+ Create a collision group under the given group_path that contains the links.
+ Args:
+ group_path (str): Path to create the collision group.
+ link_paths (List[str]): List of link paths to include in this group.
+ """
+ collision_group = Sdf.PrimSpec(
+ self._stage.GetRootLayer().GetPrimAtPath(group_path),
+ group_path.split("/")[-1],
+ Sdf.SpecifierDef,
+ "PhysicsCollisionGroup"
+ )
+ # Add the links to the collision group
+ for link_path in link_paths:
+ includes_rel = Sdf.RelationshipSpec(collision_group, "collection:colliders:includes", False)
+ includes_rel.targetPathList.Append(link_path)
+
+ def _add_collision_filter(self, group_path, link1, link2):
+ """
+ Filters collision between two successive links.
+
+ Args:
+ group_path (str): Path of the collision group.
+ link1 (str): Path of the first link.
+ link2 (str): Path of the second link.
+ """
+ # Create a relationship to filter collisions between the two links
+ filtered_groups = Sdf.RelationshipSpec(
+ self._stage.GetPrimAtPath(group_path),
+ "physics:filteredGroups",
+ False
+ )
+ filtered_groups.targetPathList.Append(link1)
+ filtered_groups.targetPathList.Append(link2)
+
+ def _render_sim(self, mode="human"):
+
+ if mode == "human":
+ # follow requested robot/env
+ if self._env_opts["use_follow_camera"]:
+ ridx = int(self._env_opts["render_follow_robot_idx"])
+ eidx = int(self._env_opts["render_follow_env_idx"])
+ if ridx < len(self._robot_names) and eidx < self._num_envs:
+ rname = self._robot_names[ridx]
+ pos = self._root_p.get(rname, None)
+ if pos is not None and pos.shape[0] > eidx:
+ base = pos[eidx].detach().cpu()
+ offset = torch.as_tensor(self._env_opts["render_follow_offset"],
+ device=base.device, dtype=base.dtype)
+ target_offset = torch.as_tensor(self._env_opts["render_follow_target_offset"],
+ device=base.device, dtype=base.dtype)
+ quat = self._root_q.get(rname, None)
+ if quat is not None and quat.shape[0] > eidx:
+ q = quat[eidx].detach().cpu()
+ w, x, y, z = q.unbind(-1)
+ yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z))
+ cy, sy = torch.cos(yaw), torch.sin(yaw)
+ rot = torch.tensor([[cy, -sy], [sy, cy]], device=base.device, dtype=base.dtype)
+ offset_xy = torch.matmul(rot, offset[:2])
+ target_offset_xy = torch.matmul(rot, target_offset[:2])
+ offset = torch.stack((offset_xy[0], offset_xy[1], offset[2]))
+ target_offset = torch.stack((target_offset_xy[0], target_offset_xy[1], target_offset[2]))
+ eye = (base + offset).tolist()
+ target = (base + target_offset).tolist()
+ set_camera_view(eye=eye, target=target, camera_prim_path=self._env_opts["camera_prim_path"])
+
+ self._world.render()
+ # optional height grid visualization
+ if self._env_opts["enable_height_vis"]:
+ for robot_name, vis in self._height_vis.items():
+ # use latest stored states
+ if robot_name not in self._height_imgs or robot_name not in self._height_sensors:
+ continue
+ heights = self._height_imgs.get(robot_name, None)
+ if heights is None or heights.numel() == 0:
+ continue
+ pos_src = self._root_p.get(robot_name, None)
+ quat_src = self._root_q.get(robot_name, None)
+ if pos_src is None or quat_src is None:
+ continue
+ step = self._height_vis_step.get(robot_name, 0)
+ period = max(1, int(self._env_opts["height_vis_update_period"]))
+ if step % period == 0:
+ try:
+ vis.update(
+ base_positions=pos_src,
+ base_quats=quat_src,
+ heights=heights)
+ except Exception as exc:
+ print(f"[height_vis] update failed for {robot_name}: {exc}")
+ self._height_vis_step[robot_name] = step + 1
+ return None
+ elif mode == "rgb_array":
+ # check if viewport is enabled -- if not, then complain because we won't get any data
+ if not self._render or not self._record:
+ exception = f"Cannot render '{mode}' when rendering is not enabled. Please check the provided" + \
+ "arguments to the environment class at initialization."
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ # obtain the rgb data
+ rgb_data = self._rgb_annotator.get_data()
+ # convert to numpy array
+ rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape)
+ # return the rgb data
+ return rgb_data[:, :, :3]
+ else:
+ return None
+
+ def _create_viewport_render_product(self, resolution=None):
+ """Create a render product of the viewport for rendering."""
+
+ try:
+
+ # create render product
+ camera_prim = self._env_opts["camera_prim_path"]
+ res = resolution
+ if res is None:
+ res = tuple(int(x) for x in self._env_opts["render_resolution"])
+ self._render_product = rep.create.render_product(camera_prim, res)
+ # create rgb annotator -- used to read data from the render product
+ self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu")
+ self._rgb_annotator.attach([self._render_product])
+ self._record = True
+ except Exception as e:
+ carb.log_info("omni.replicator.core could not be imported. Skipping creation of render product.")
+ carb.log_info(str(e))
+
+ def _close(self):
+ if self._simulation_app.is_running():
+ self._simulation_app.close()
+
+ def _step_world(self):
+ self._world.step(render=False, step_sim=True)
+
+ if (self._render) and (self._render_step_counter%self._env_opts["rendering_freq"]==0):
+ # if self._env_opts["render_to_file"]:
+ # rep.orchestrator.step()
+ self._render_sim() # manually trigger rendering (World.step(render=True) for some reason
+ # will step the simulation for a dt==rendering_dt)
+ self._render_step_counter += 1
+
+ def _generate_jnt_imp_control(self, robot_name: str):
+
+ jnt_imp_controller = OmniJntImpCntrl(articulation=self._robots_articulations[robot_name],
+ device=self._device,
+ dtype=self._dtype,
+ enable_safety=True,
+ urdf_path=self._urdf_dump_paths[robot_name],
+ config_path=self._jnt_imp_config_paths[robot_name],
+ enable_profiling=False,
+ debug_checks=self._debug,
+ override_art_controller=self._override_low_lev_controller)
+
+ return jnt_imp_controller
+
+ def _reset_sim(self):
+ self._world.reset(soft=False)
+
+ def _reset_state(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None,
+ randomize: bool = False):
+
+ if env_indxs is not None:
+ if self._debug:
+ if self._use_gpu:
+ if not env_indxs.device.type == "cuda":
+ error = "Provided env_indxs should be on GPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ else:
+ if not env_indxs.device.type == "cpu":
+ error = "Provided env_indxs should be on CPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ if randomize:
+ self._randomize_yaw(robot_name=robot_name,env_indxs=env_indxs)
+
+ # root q
+ self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][env_indxs, :],
+ orientations=self._root_q_default[robot_name][env_indxs, :],
+ indices = env_indxs)
+ # jnts q
+ self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][env_indxs, :],
+ indices = env_indxs)
+ # root v and omega
+ self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][env_indxs, :],
+ indices = env_indxs)
+ # jnts v
+ concatenated_vel = torch.cat((self._root_v_default[robot_name][env_indxs, :],
+ self._root_omega_default[robot_name][env_indxs, :]), dim=1)
+ self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel,
+ indices = env_indxs)
+ # jnts eff
+ self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][env_indxs, :],
+ indices = env_indxs)
+ self._reset_perturbations(robot_name=robot_name, env_indxs=env_indxs)
+ else:
+
+ if randomize:
+ self._randomize_yaw(robot_name=robot_name,env_indxs=None)
+
+ # root q
+ self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][:, :],
+ orientations=self._root_q_default[robot_name][:, :],
+ indices = None)
+ # jnts q
+ self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][:, :],
+ indices = None)
+ # root v and omega
+ self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][:, :],
+ indices = None)
+ # jnts v
+ concatenated_vel = torch.cat((self._root_v_default[robot_name][:, :],
+ self._root_omega_default[robot_name][:, :]), dim=1)
+ self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel,
+ indices = None)
+ # jnts eff
+ self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][:, :],
+ indices = None)
+ self._reset_perturbations(robot_name=robot_name, env_indxs=None)
+
+ # we update the robots state
+ self._read_root_state_from_robot(env_indxs=env_indxs,
+ robot_name=robot_name)
+ self._read_jnts_state_from_robot(env_indxs=env_indxs,
+ robot_name=robot_name)
+
+ def _reset_perturbations(self, robot_name: str, env_indxs: torch.Tensor = None):
+ """Clear perturbation state and wrenches for selected envs."""
+ if robot_name not in self._pert_active:
+ return
+ if env_indxs is None:
+ self._pert_active[robot_name].zero_()
+ self._pert_steps_remaining[robot_name].zero_()
+ self._pert_forces_world[robot_name].zero_()
+ self._pert_torques_world[robot_name].zero_()
+ self._pert_det_counter[robot_name].zero_()
+ else:
+ self._pert_active[robot_name][env_indxs] = False
+ self._pert_steps_remaining[robot_name][env_indxs] = 0
+ self._pert_forces_world[robot_name][env_indxs, :] = 0
+ self._pert_torques_world[robot_name][env_indxs, :] = 0
+ self._pert_det_counter[robot_name][env_indxs] = 0
+
+ def _process_perturbations(self):
+
+ # Iterate over each robot view
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+
+ # Pre-fetch views for code clarity (references, not copies)
+ active = self._pert_active[robot_name]
+ steps_rem = self._pert_steps_remaining[robot_name]
+ forces_world = self._pert_forces_world[robot_name]
+ torques_world = self._pert_torques_world[robot_name]
+ planar_only = self._env_opts["pert_planar_only"] if "pert_planar_only" in self._env_opts else False
+
+ # --- 1. Update Active Counters (In-Place) ---
+ if active.any():
+ # In-place subtraction
+ steps_rem[active] -= 1
+
+ # --- 2. Reset Finished Perturbations (In-Place) ---
+ # Logic: Active AND (Steps <= 0)
+ # Note: Creating 'newly_ended' boolean mask is a tiny unavoidable allocation
+ newly_ended = active & (steps_rem <= 0)
+
+ if newly_ended.any():
+ # Use masked_fill_ for in-place zeroing
+ active.masked_fill_(newly_ended, False)
+ forces_world[newly_ended, :]=0
+ torques_world[newly_ended, :]=0
+ steps_rem.masked_fill_(newly_ended, 0)
+
+ # --- 3. Trigger New Perturbations ---
+
+ det_rate = self._env_opts["det_pert_rate"]
+ if det_rate:
+ # deterministic spacing: count physics steps and trigger when threshold reached (if not already active)
+ det_counter = self._pert_det_counter[robot_name]
+ det_counter += 1
+ trigger_mask = (det_counter >= self._pert_det_steps) & (~active)
+ else:
+ # Reuse scratch buffer for probability check
+ # Assumes self._pert_scratch is (num_envs, 1) pre-allocated
+ self._pert_scratch[robot_name].uniform_(0.0, 1.0) # used for triggering new perturbations
+ # Check probs against threshold
+ # Flatten scratch to (N,) to match 'active' mask
+ trigger_mask = (self._pert_scratch[robot_name].flatten() < self._pert_wrenches_prob) & (~active)
+
+ if trigger_mask.any():
+
+ # Cache weights (references)
+ weight = self._weights[robot_name]
+
+ # we now treat the configured "max_*_impulse_*" as the maximum **impulse** (N·s or N·m·s),
+ # and convert impulse -> force/torque by dividing by the sampled duration (seconds).
+ # Use per-robot weight scaling as before.
+ lin_impulse_max = self._env_opts["max_lin_impulse_norm"] * weight
+ ang_impulse_max = self._env_opts["max_ang_impulse_norm"] * weight
+
+ # --- Force (Impulse) Direction Generation (Reuse _pert_lindir buffer) ---
+ lindir = self._pert_lindir[robot_name] # (N, 3)
+
+ if planar_only:
+ # planar push direction from random yaw
+ yaw_angles = self._pert_scratch[robot_name].uniform_(0.0, 2*math.pi).flatten()
+ lindir[:, 0] = torch.cos(yaw_angles)
+ lindir[:, 1] = torch.sin(yaw_angles)
+ lindir[:, 2] = 0.0
+ else:
+ # 1. Fill with Standard Normal noise in-place
+ lindir.normal_()
+ # 2. Normalize in-place
+ norms = torch.norm(lindir, dim=1, keepdim=True).clamp_min_(1e-6)
+ lindir.div_(norms)
+
+ # 3. Sample linear impulse magnitudes (reuse scratch)
+ # scratch has shape (N,1) - uniform [0,1]
+ self._pert_scratch[robot_name].uniform_(self._env_opts["lin_impulse_mag_min"], self._env_opts["lin_impulse_mag_max"])
+ # impulse vectors = unit_dir * (rand * lin_impulse_max)
+
+ lindir.mul_(self._pert_scratch[robot_name] * lin_impulse_max) # now contains linear impulses (N,3)
+
+ # --- Angular (Impulse) Direction Generation (Reuse _pert_angdir buffer) ---
+ angdir = self._pert_angdir[robot_name] # (N, 3)
+
+ if planar_only:
+ angdir.zero_() # no torque when planar-only is requested
+ else:
+ # 1. Fill with Standard Normal noise
+ angdir.normal_()
+ # 2. Normalize
+ norms = torch.norm(angdir, dim=1, keepdim=True).clamp_min_(1e-6)
+ angdir.div_(norms)
+
+ # 3. Sample angular impulse magnitudes (reuse scratch)
+ self._pert_scratch[robot_name].uniform_(0.0, 1.0)
+ angdir.mul_(self._pert_scratch[robot_name] * ang_impulse_max) # now contains angular impulses (N,3)
+
+ # --- Duration Generation (Reuse _pert_durations) ---
+ # Keep integer steps sampling (same shape/device/dtype)
+ self._pert_durations[robot_name] = torch.randint_like(
+ self._pert_durations[robot_name],
+ low=self._pert_min_steps,
+ high=self._pert_max_steps + 1
+ )
+
+ # --- convert to float
+ duration_steps = self._pert_durations[robot_name].to(dtype=lindir.dtype, device=lindir.device)
+ # duration in seconds (shape (N,))
+ duration_seconds = duration_steps * self.physics_dt()
+ # avoid divide-by-zero
+ duration_seconds = duration_seconds.clamp_min_(1e-6)
+
+ # compute per-step forces/torques = impulse / duration_seconds
+ # lindir currently holds linear impulses, angdir holds angular impulses
+ forces_to_apply = lindir / duration_seconds
+ torques_to_apply = angdir / duration_seconds
+
+ # Optional clipping based on robot weight (min/max)
+ f_norm = torch.norm(forces_to_apply, dim=1, keepdim=True).clamp_min_(1e-9)
+ target_norm = f_norm
+
+ f_clip_scale_max = self._env_opts["pert_force_max_weight_scale"]
+ if f_clip_scale_max > 0.0:
+ force_max = self._weights[robot_name] * f_clip_scale_max # (N,1)
+ target_norm = torch.minimum(target_norm, force_max)
+
+ f_clip_scale_min = self._env_opts["pert_force_min_weight_scale"]
+ if f_clip_scale_min > 0.0:
+ force_min = self._weights[robot_name] * f_clip_scale_min
+ target_norm = torch.maximum(target_norm, force_min)
+
+ forces_to_apply = forces_to_apply * (target_norm / f_norm)
+
+ t_clip_scale = self._env_opts["pert_torque_max_weight_scale"]
+ if t_clip_scale > 0.0:
+ torque_max = self._weights[robot_name] * self._env_opts["max_ang_impulse_lever"] * t_clip_scale
+ t_norm = torch.norm(torques_to_apply, dim=1, keepdim=True).clamp_min_(1e-9)
+ t_scale = torch.minimum(torch.ones_like(t_norm), torque_max / t_norm)
+ torques_to_apply = torques_to_apply * t_scale
+
+ # --- Update State Buffers ---
+ # Use boolean indexing to scatter only triggered values
+ active[trigger_mask] = True
+ steps_rem[trigger_mask] = self._pert_durations[robot_name][trigger_mask, :].flatten()
+ forces_world[trigger_mask, :] = forces_to_apply[trigger_mask, :]
+ torques_world[trigger_mask, :] = torques_to_apply[trigger_mask, :]
+ if det_rate:
+ det_counter[trigger_mask] = 0
+
+ # --- 4. Apply Wrenches (Vectorized) ---
+ # Only call API if there are active perturbations to minimize overhead
+
+ forces_world[~active, :]=0
+ torques_world[~active, :]=0
+
+ self._blink_rigid_prim_views[robot_name].apply_forces_and_torques_at_pos(
+ forces=forces_world,
+ torques=torques_world,
+ positions=None, # body frame origin
+ is_global=True
+ )
+
+ @override
+ def _pre_step(self):
+
+ if self._env_opts["use_random_pertub"]:
+ self._process_perturbations()
+
+ success=super()._pre_step()
+
+ return success
+
+ @override
+ def _pre_step_db(self):
+
+ if self._env_opts["use_random_pertub"]:
+ self._process_perturbations()
+
+ success=super()._pre_step_db()
+
+ return success
+
+ @override
+ def _update_contact_state(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ super()._update_contact_state(robot_name, env_indxs)
+
+ if self._env_opts["use_random_pertub"]:
+ # write APPLIED perturbations to root wrench (mainly for debug)
+ self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_forces_world[robot_name][env_indxs, :],
+ data_type="f",
+ contact_name="root",
+ robot_idxs = env_indxs,
+ gpu=self._use_gpu)
+ self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_torques_world[robot_name][env_indxs, :],
+ data_type="t",
+ contact_name="root",
+ robot_idxs = env_indxs,
+ gpu=self._use_gpu)
+
+ def _post_warmup_validation(self, robot_name: str):
+ """Validate warmup state: base height, tilt, and (if available) contacts."""
+ envs = torch.arange(self._num_envs, device=self._device)
+
+ # terrain height query
+ def _terrain_height(xy: torch.Tensor):
+ if self._env_opts["use_flat_ground"] or self.terrain_generator is None:
+ return torch.zeros((xy.shape[0],), device=xy.device, dtype=self._dtype)
+ heights = []
+ half_extent = self._env_opts["spawn_height_check_half_extent"]
+ for k in range(xy.shape[0]):
+ h = self.terrain_generator.get_max_height_in_rect(
+ float(xy[k, 0]), float(xy[k, 1]), half_extent=half_extent)
+ heights.append(h)
+ return torch.as_tensor(heights, device=xy.device, dtype=self._dtype)
+
+ # base height check
+ base_xy = self._root_p[robot_name][:, 0:2]
+ base_z = self._root_p[robot_name][:, 2]
+ ground_z = _terrain_height(base_xy)
+ margin = float(self._env_opts["spawn_height_cushion"])
+ bad_z = base_z < (ground_z + margin)
+
+ # tilt check (angle between base up and world up)
+ q = self._root_q[robot_name]
+ # quaternion to up vector
+ w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3]
+ up = torch.stack([
+ 2 * (x * z - w * y),
+ 2 * (y * z + w * x),
+ 1 - 2 * (x * x + y * y)
+ ], dim=1)
+ cos_tilt = up[:, 2].clamp(-1.0, 1.0)
+ tilt_thresh_deg = 35.0
+ bad_tilt = cos_tilt < math.cos(math.radians(tilt_thresh_deg))
+
+ # contact check (only if sensors exist)
+ # bad_contact = torch.zeros_like(base_z, dtype=torch.bool)
+ # if robot_name in self.omni_contact_sensors and self.omni_contact_sensors[robot_name] is not None:
+ # counts = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device)
+ # for link in self._contact_names.get(robot_name, []):
+ # f_contact = self._get_contact_f(robot_name=robot_name,
+ # contact_link=link,
+ # env_indxs=None)
+ # if f_contact is None:
+ # continue
+ # # use normal component (assume z-up); ignore tangential forces
+ # active = f_contact[:, 2] > 1e-3
+ # counts += active.int()
+ # bad_contact = counts < 3
+
+ failing = torch.nonzero(bad_z | bad_tilt, as_tuple=False).flatten()
+ if failing.numel() > 0:
+ # remediate: lift to terrain+margin, upright (preserve yaw), zero root velocities
+ # yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z))
+ # safe_z = (ground_z + margin)[failing]
+ # self._root_p[robot_name][failing, 2] = safe_z
+ # cos_h = torch.cos(yaw[failing] / 2)
+ # sin_h = torch.sin(yaw[failing] / 2)
+ # upright = torch.zeros((failing.shape[0], 4), device=self._device, dtype=self._dtype)
+ # upright[:, 0] = cos_h
+ # upright[:, 3] = sin_h
+ # self._root_q[robot_name][failing, :] = upright
+ # self._root_v[robot_name][failing, :] = 0.0
+ # self._root_omega[robot_name][failing, :] = 0.0
+
+ msgs = [] # print db message
+ if bad_z.any():
+ msgs.append(f"low_z envs {torch.nonzero(bad_z, as_tuple=False).flatten().tolist()}")
+ if bad_tilt.any():
+ msgs.append(f"tilt envs {torch.nonzero(bad_tilt, as_tuple=False).flatten().tolist()}")
+ Journal.log(self.__class__.__name__,
+ "_post_warmup_validation",
+ f"Warmup validation failures for {robot_name}: " + "; ".join(msgs),
+ LogType.WARN,
+ throw_when_excep=False)
+ return failing
+
+ def _import_urdf(self,
+ robot_name: str,
+ fix_base = False,
+ self_collide = False,
+ merge_fixed = True):
+
+ import_config=_urdf.ImportConfig()
+ # status,import_config=omni_kit.commands.execute("URDFCreateImportConfig")
+
+ Journal.log(self.__class__.__name__,
+ "update_root_offsets",
+ "importing robot URDF",
+ LogType.STAT,
+ throw_when_excep = True)
+ _urdf.acquire_urdf_interface()
+ # we overwrite some settings which are bound to be fixed
+ import_config.merge_fixed_joints = merge_fixed # makes sim more stable
+ # in case of fixed joints with light objects
+ import_config.import_inertia_tensor = True
+ # import_config.convex_decomp = False
+ import_config.fix_base = fix_base
+ import_config.self_collision = self_collide
+ # import_config.distance_scale = 1
+ # import_config.make_default_prim = True
+ # import_config.create_physics_scene = True
+ # import_config.default_drive_strength = 1047.19751
+ # import_config.default_position_drive_damping = 52.35988
+ # import_config.default_drive_type = _urdf.UrdfJointTargetType.JOINT_DRIVE_POSITION
+ # import URDF
+ success, robot_prim_path_default = omni_kit.commands.execute(
+ "URDFParseAndImportFile",
+ urdf_path=self._urdf_dump_paths[robot_name],
+ import_config=import_config,
+ # get_articulation_root=True,
+ )
+
+ robot_base_prim_path = self._env_opts["template_env_ns"] + "/" + robot_name
+
+ if success:
+ Journal.log(self.__class__.__name__,
+ "_import_urdf",
+ "Successfully importedf URDF into IsaacSim",
+ LogType.STAT)
+ else:
+ Journal.log(self.__class__.__name__,
+ "_import_urdf",
+ "Failed to import URDF into IsaacSim",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ # moving default prim to base prim path for cloning
+ move_prim(robot_prim_path_default, # from
+ robot_base_prim_path) # to
+
+ robot_base_prim = prim_utils.get_prim_at_path(robot_base_prim_path)
+ children = prim_utils.get_prim_children(robot_base_prim)
+ # log imported prim children to the journal (print was getting truncated in logs)
+ Journal.log(self.__class__.__name__,
+ "_import_urdf",
+ f"Imported robot URDF children: {children}",
+ LogType.STAT)
+
+ # improve collision tesselation for cylinders (e.g., wheels) if requested
+ # self._apply_collision_refinement(robot_base_prim_path,
+ # self._env_opts["collision_refinement_level"])
+
+ return success
+
+ def _apply_collision_refinement(self, robot_base_prim_path: str, refinement_level: int):
+ """Set refinement level on collision cylinders to avoid coarse faceting."""
+ if refinement_level is None:
+ return
+ stage = get_current_stage()
+ coll_prefix = robot_base_prim_path + "/collisions"
+ count = 0
+ for prim in stage.Traverse():
+ if not prim.IsValid():
+ continue
+ path_str = prim.GetPath().pathString
+ if not path_str.startswith(coll_prefix):
+ continue
+ if prim.GetTypeName() == "Cylinder":
+ attr = prim.GetAttribute("refinementLevel")
+ if not attr.IsValid():
+ attr = prim.CreateAttribute("refinementLevel", Sdf.ValueTypeNames.Int)
+ attr.Set(int(refinement_level))
+ count += 1
+ Journal.log(self.__class__.__name__,
+ "_apply_collision_refinement",
+ f"Applied refinement level {refinement_level} to {count} cylinder collision prims under {coll_prefix}",
+ LogType.STAT)
+
+ def apply_collision_filters(self,
+ physicscene_path: str,
+ coll_root_path: str):
+
+ self._cloner.filter_collisions(physicsscene_path = physicscene_path,
+ collision_root_path = coll_root_path,
+ prim_paths=self._envs_prim_paths,
+ global_paths=self._ground_plane_prim_paths # can collide with these prims
+ )
+
+ def _read_root_state_from_robot(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ self._get_root_state(numerical_diff=self._env_opts["use_diff_vels"],
+ env_indxs=env_indxs,
+ robot_name=robot_name)
+
+ # height grid sensor readout
+ if robot_name in self._height_sensors:
+ pos_src = self._root_p[robot_name] if env_indxs is None else self._root_p[robot_name][env_indxs]
+ quat_src = self._root_q[robot_name] if env_indxs is None else self._root_q[robot_name][env_indxs]
+ heights = self._height_sensors[robot_name].read(pos_src, quat_src)
+ if self._env_opts["use_flat_ground"]:
+ heights.zero_()
+ if env_indxs is None:
+ self._height_imgs[robot_name] = heights
+ else:
+ # clone to avoid overlapping write/read views
+ self._height_imgs[robot_name][env_indxs] = heights.clone()
+
+ # print("height image")
+ # print(self._height_imgs[robot_name][0, :, : ])
+
+ def _read_jnts_state_from_robot(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ self._get_robots_jnt_state(
+ numerical_diff=self._env_opts["use_diff_vels"],
+ env_indxs=env_indxs,
+ robot_name=robot_name)
+
+ def _get_root_state(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None,
+ numerical_diff: bool = False,
+ base_loc: bool = True):
+
+ # reading = self._is.get_sensor_reading("/World/Cube/Imu_Sensor",
+ # use_latest_data = True)
+
+ dt=self._cluster_dt[robot_name] # getting diff state always at cluster rate
+
+ # measurements from simulator are in world frame
+ if env_indxs is not None:
+
+ pose = self._robots_art_views[robot_name].get_world_poses(
+ clone = True,
+ indices=env_indxs) # tuple: (pos, quat), quat is [w, i, j, k] in Isaac4.2
+
+ self._root_p[robot_name][env_indxs, :] = pose[0]
+
+ going_to_fly=self._root_p[robot_name][env_indxs, 0:2]>(self._env_opts["terrain_border"]-0.1)
+ if going_to_fly.any():
+ flying=going_to_fly.sum().item()
+ warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!"
+ Journal.log(self.__class__.__name__,
+ "_get_root_state",
+ warn,
+ LogType.WARN,
+ throw_when_excep = True)
+ self._root_q[robot_name][env_indxs, :] = pose[1] # root orientation
+ if not numerical_diff:
+ # we get velocities from the simulation. This is not good since
+ # these can actually represent artifacts which do not have physical meaning.
+ # It's better to obtain them by differentiation to avoid issues with controllers, etc...
+ self._root_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_linear_velocities(
+ clone = True,
+ indices=env_indxs) # root lin. velocity
+ self._root_omega[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_angular_velocities(
+ clone = True,
+ indices=env_indxs) # root ang. velocity
+
+ # for now obtain root a numerically
+ self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \
+ self._root_v_prev[robot_name][env_indxs, :]) / dt
+ self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \
+ self._root_omega_prev[robot_name][env_indxs, :]) / dt
+
+ self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :]
+ self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :]
+ else:
+ # differentiate numerically
+ self._root_v[robot_name][env_indxs, :] = (self._root_p[robot_name][env_indxs, :] - \
+ self._root_p_prev[robot_name][env_indxs, :]) / dt
+ self._root_omega[robot_name][env_indxs, :] = quat_to_omega(
+ self._root_q_prev[robot_name][env_indxs, :],
+ self._root_q[robot_name][env_indxs, :],
+ dt)
+
+ self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \
+ self._root_v_prev[robot_name][env_indxs, :]) / dt
+ self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \
+ self._root_omega_prev[robot_name][env_indxs, :]) / dt
+
+ # update "previous" data for numerical differentiation
+ self._root_p_prev[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :]
+ self._root_q_prev[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :]
+ self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :]
+ self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :]
+ self._track_terrain_hits(robot_name=robot_name, env_indxs=env_indxs)
+
+ else:
+ # updating data for all environments
+ pose = self._robots_art_views[robot_name].get_world_poses(
+ clone = True) # tuple: (pos, quat)
+ self._root_p[robot_name][:, :] = pose[0]
+ self._root_q[robot_name][:, :] = pose[1] # root orientation
+
+ going_to_fly=self._root_p[robot_name][:, 0:2]>(self._env_opts["terrain_border"]-0.1)
+ if going_to_fly.any():
+ flying=going_to_fly.sum().item()
+ warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!"
+ Journal.log(self.__class__.__name__,
+ "_get_root_state",
+ warn,
+ LogType.WARN,
+ throw_when_excep = True)
+
+ if not numerical_diff:
+ # we get velocities from the simulation. This is not good since
+ # these can actually represent artifacts which do not have physical meaning.
+ # It's better to obtain them by differentiation to avoid issues with controllers, etc...
+ self._root_v[robot_name][:, :] = self._robots_art_views[robot_name].get_linear_velocities(
+ clone = True) # root lin. velocity
+ self._root_omega[robot_name][:, :] = self._robots_art_views[robot_name].get_angular_velocities(
+ clone = True) # root ang. velocity
+
+ self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \
+ self._root_v_prev[robot_name][:, :]) / dt
+ self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \
+ self._root_omega_prev[robot_name][:, :]) / dt
+
+ self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :]
+ self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :]
+ else:
+ # differentiate numerically
+ self._root_v[robot_name][:, :] = (self._root_p[robot_name][:, :] - \
+ self._root_p_prev[robot_name][:, :]) / dt
+ self._root_omega[robot_name][:, :] = quat_to_omega(self._root_q_prev[robot_name][:, :],
+ self._root_q[robot_name][:, :],
+ dt)
+
+ self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \
+ self._root_v_prev[robot_name][:, :]) / dt
+ self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \
+ self._root_omega_prev[robot_name][:, :]) / dt
+
+ # update "previous" data for numerical differentiation
+ self._root_p_prev[robot_name][:, :] = self._root_p[robot_name][:, :]
+ self._root_q_prev[robot_name][:, :] = self._root_q[robot_name][:, :]
+ self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :]
+ self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :]
+ self._track_terrain_hits(robot_name=robot_name, env_indxs=None)
+
+ if base_loc:
+ # rotate robot twist in base local
+ twist_w=torch.cat((self._root_v[robot_name],
+ self._root_omega[robot_name]),
+ dim=1)
+ twist_base_loc=torch.cat((self._root_v_base_loc[robot_name],
+ self._root_omega_base_loc[robot_name]),
+ dim=1)
+ world2base_frame(t_w=twist_w,q_b=self._root_q[robot_name],t_out=twist_base_loc)
+ self._root_v_base_loc[robot_name]=twist_base_loc[:, 0:3]
+ self._root_omega_base_loc[robot_name]=twist_base_loc[:, 3:6]
+
+ # rotate robot a in base local
+ a_w=torch.cat((self._root_a[robot_name],
+ self._root_alpha[robot_name]),
+ dim=1)
+ a_base_loc=torch.cat((self._root_a_base_loc[robot_name],
+ self._root_alpha_base_loc[robot_name]),
+ dim=1)
+ world2base_frame(t_w=a_w,q_b=self._root_q[robot_name],t_out=a_base_loc)
+ self._root_a_base_loc[robot_name]=a_base_loc[:, 0:3]
+ self._root_alpha_base_loc[robot_name]=a_base_loc[:, 3:6]
+
+ # rotate gravity in base local
+ world2base_frame3D(v_w=self._gravity_normalized[robot_name],q_b=self._root_q[robot_name],
+ v_out=self._gravity_normalized_base_loc[robot_name])
+
+ def _get_robots_jnt_state(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None,
+ numerical_diff: bool = False):
+
+ dt= self.physics_dt() if self._override_low_lev_controller else self._cluster_dt[robot_name]
+
+ # measurements from simulator are in world frame
+ if env_indxs is not None:
+
+ self._jnts_q[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_positions(
+ clone = True,
+ indices=env_indxs) # joint positions
+ if not numerical_diff:
+ self._jnts_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_velocities(
+ clone = True,
+ indices=env_indxs) # joint velocities
+ else:
+ # differentiate numerically
+ self._jnts_v[robot_name][env_indxs, :] = (self._jnts_q[robot_name][env_indxs, :] - \
+ self._jnts_q_prev[robot_name][env_indxs, :]) / dt
+ # update "previous" data for numerical differentiation
+ self._jnts_q_prev[robot_name][env_indxs, :] = self._jnts_q[robot_name][env_indxs, :]
+
+ self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts(
+ clone = True,
+ joint_indices=None,
+ indices=env_indxs) # measured joint efforts (computed by joint force solver)
+
+ else:
+ self._jnts_q[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_positions(
+ clone = True) # joint positions
+ if not numerical_diff:
+ self._jnts_v[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_velocities(
+ clone = True) # joint velocities
+ else:
+ self._jnts_v[robot_name][:, :] = (self._jnts_q[robot_name][:, :] - \
+ self._jnts_q_prev[robot_name][:, :]) / dt
+
+ self._jnts_q_prev[robot_name][:, :] = self._jnts_q[robot_name][:, :]
+
+ self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts(
+ clone = True) # measured joint efforts (computed by joint force solver)
+
+ def _get_contact_f(self,
+ robot_name: str,
+ contact_link: str,
+ env_indxs: torch.Tensor) -> torch.Tensor:
+
+ if self.omni_contact_sensors[robot_name] is not None:
+ return self.omni_contact_sensors[robot_name].get(dt=self.physics_dt(),
+ contact_link=contact_link,
+ env_indxs=env_indxs,
+ clone=False)
+
+ def _set_jnts_to_homing(self, robot_name: str):
+ self._robots_art_views[robot_name].set_joints_default_state(positions=self._homing,
+ velocities = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \
+ dtype=self._dtype, device=self._device),
+ efforts = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \
+ dtype=self._dtype, device=self._device))
+
+ def _set_root_to_defconfig(self, robot_name: str):
+ self._robots_art_views[robot_name].set_default_state(positions=self._root_p_default[robot_name],
+ orientations=self._root_q_default[robot_name])
+
+ def _alter_twist_warmup(self, robot_name: str, env_indxs: torch.Tensor = None):
+ """Zero angular velocities and joint velocities for the given robot/envs."""
+ if env_indxs is None:
+ twist = self._robots_art_views[robot_name].get_velocities(clone=True)
+ twist[:, 0] = 0.0
+ twist[:, 1] = 0.0
+ twist[:, 3:] = 0.0 # zero angular part, preserve current linear
+ self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=None)
+
+ # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities(
+ # indices = None, clone=True)
+ # jnt_vel[:, :] = 0.0
+
+ # self._robots_art_views[robot_name].set_joint_velocities(
+ # velocities = jnt_vel,
+ # indices = None)
+
+ else:
+ twist = self._robots_art_views[robot_name].get_velocities(clone=True, indices=env_indxs)
+ # twist[:, 0:2] = 0.0
+ twist[:, 0] = 0.0
+ twist[:, 1] = 0.0
+ twist[:, 3:] = 0.0
+ self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=env_indxs)
+
+ # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities(
+ # indices = env_indxs, clone=True)
+ # jnt_vel[:, :] = 0.0
+
+ # self._robots_art_views[robot_name].set_joint_velocities(
+ # velocities = jnt_vel,
+ # indices = env_indxs)
+
+ def _get_solver_info(self):
+ for i in range(0, len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ self._solver_position_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_position_iteration_counts()
+ self._solver_velocity_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_velocity_iteration_counts()
+ self._solver_stabilization_threshs[robot_name] = self._robots_art_views[robot_name].get_stabilization_thresholds()
+
+ def _update_art_solver_options(self):
+
+ # sets new solver iteration options for specifc articulations
+ self._get_solver_info() # gets current solver info for the articulations of the
+ # environments, so that dictionaries are filled properly
+
+ for i in range(0, len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ # increase by a factor
+ self._solver_position_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_position_iteration_count)
+ self._solver_velocity_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_velocity_iteration_count)
+ self._solver_stabilization_threshs[robot_name] = torch.full((self._num_envs,), self._solver_stabilization_thresh)
+ self._robots_art_views[robot_name].set_solver_position_iteration_counts(self._solver_position_iteration_counts[robot_name])
+ self._robots_art_views[robot_name].set_solver_velocity_iteration_counts(self._solver_velocity_iteration_counts[robot_name])
+ self._robots_art_views[robot_name].set_stabilization_thresholds(self._solver_stabilization_threshs[robot_name])
+ self._get_solver_info() # gets again solver info for articulation, so that it's possible to debug if
+ # the operation was successful
+
+ def _print_envs_info(self):
+
+ ground_info = f"[Ground info]" + "\n" + \
+ "static friction coeff.: " + str(self._terrain_material.GetStaticFrictionAttr().Get()) + "\n" + \
+ "dynamics friction coeff.: " + str(self._terrain_material.GetDynamicFrictionAttr().Get()) + "\n" + \
+ "restitution coeff.: " + str(self._terrain_material.GetRestitutionAttr().Get()) + "\n" +\
+ "friction comb. mode: " + str(self._terrain_physix_material.GetFrictionCombineModeAttr().Get()) + "\n" + \
+ "damping comb. mode: " + str(self._terrain_physix_material.GetDampingCombineModeAttr().Get()) + "\n" + \
+ "restitution comb. mode: " + str(self._terrain_physix_material.GetRestitutionCombineModeAttr().Get()) + "\n"
+
+ Journal.log(self.__class__.__name__,
+ "_print_envs_info",
+ ground_info,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ for i in range(0, len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ task_info = f"[{robot_name}]" + "\n" + \
+ "bodies: " + str(self._robots_art_views[robot_name].body_names) + "\n" + \
+ "n. prims: " + str(self._robots_art_views[robot_name].count) + "\n" + \
+ "prims names: " + str(self._robots_art_views[robot_name].prim_paths) + "\n" + \
+ "n. bodies: " + str(self._robots_art_views[robot_name].num_bodies) + "\n" + \
+ "n. dofs: " + str(self._robots_art_views[robot_name].num_dof) + "\n" + \
+ "dof names: " + str(self._robots_art_views[robot_name].dof_names) + "\n" + \
+ "solver_position_iteration_counts: " + str(self._solver_position_iteration_counts[robot_name]) + "\n" + \
+ "solver_velocity_iteration_counts: " + str(self._solver_velocity_iteration_counts[robot_name]) + "\n" + \
+ "stabiliz. thresholds: " + str(self._solver_stabilization_threshs[robot_name])
+ # print("dof limits: " + str(self._robots_art_views[robot_name].get_dof_limits()))
+ # print("effort modes: " + str(self._robots_art_views[robot_name].get_effort_modes()))
+ # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains()))
+ # print("dof max efforts: " + str(self._robots_art_views[robot_name].get_max_efforts()))
+ # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains()))
+ # print("physics handle valid: " + str(self._robots_art_views[robot_name].is_physics_handle_valid())
+ Journal.log(self.__class__.__name__,
+ "_print_envs_info",
+ task_info,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ def _fill_robot_info_from_world(self):
+ for i in range(0, len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ self._robot_bodynames[robot_name] = self._robots_art_views[robot_name].body_names
+ self._robot_n_links[robot_name] = self._robots_art_views[robot_name].num_bodies
+ self._robot_n_dofs[robot_name] = self._robots_art_views[robot_name].num_dof
+ self._robot_dof_names[robot_name] = self._robots_art_views[robot_name].dof_names
+
+ def _set_initial_camera_params(self,
+ camera_position=[10, 10, 3],
+ camera_target=[0, 0, 0],
+ camera_prim_path="/OmniverseKit_Persp"):
+ set_camera_view(eye=camera_position,
+ target=camera_target,
+ camera_prim_path=camera_prim_path)
+
+ def _init_contact_sensors(self, robot_name: str):
+ self.omni_contact_sensors[robot_name]=None
+ sensor_radii={}
+ contact_offsets={}
+ self._contact_names[robot_name]=self._env_opts["contact_prims"]
+ for contact_prim in self._env_opts["contact_prims"]:
+ sensor_radii[contact_prim]=self._env_opts["sensor_radii"]
+ contact_offsets[contact_prim]=np.array([0.0, 0.0, 0.0])
+ if not (len(self._env_opts["contact_prims"])==0):
+ self.omni_contact_sensors[robot_name]=OmniContactSensors(
+ name=robot_name,
+ n_envs=self._num_envs,
+ contact_prims=self._env_opts["contact_prims"],
+ contact_offsets=contact_offsets,
+ sensor_radii=sensor_radii,
+ device=self._device,
+ dtype=self._dtype,
+ enable_debug=self._debug,
+ filter_paths=self._ground_plane_prim_paths)
+ self.omni_contact_sensors[robot_name].create_contact_sensors(
+ self._world,
+ envs_namespace=self._env_opts["envs_ns"])
+
+ def _init_robots_state(self):
+
+ self._masses = {}
+ self._weights = {}
+
+ self._pert_active = {} # bool mask: (num_envs,)
+ self._pert_steps_remaining = {} # int steps: (num_envs,)
+ self._pert_forces_world = {} # (num_envs,3)
+ self._pert_torques_world = {} # (num_envs,3)
+ self._pert_force_local = {} # (num_envs,3) (if needed)
+ self._pert_torque_local = {} # (num_envs,3)
+ self._pert_lindir = {}
+ self._pert_angdir = {}
+ self._pert_durations = {}
+ self._pert_scratch = {}
+ self._pert_det_counter = {}
+
+ # convert durations in seconds to integer physics steps (min 1 step)
+ self._pert_min_steps = max(1, int(math.ceil(self._env_opts["pert_wrenches_min_duration"] / self.physics_dt())))
+ self._pert_max_steps = max(self._pert_min_steps, int(math.ceil(self._env_opts["pert_wrenches_max_duration"] / self.physics_dt())))
+
+ pert_wrenches_step_rate=self._env_opts["pert_wrenches_rate"]/self.physics_dt() # 1 pert every n physics steps
+ self._pert_det_steps = max(1, int(round(pert_wrenches_step_rate)))
+ self._pert_wrenches_prob=min(1.0, 1.0/pert_wrenches_step_rate) # sampling prob to be used when not deterministic
+
+ self._calc_robot_distrib()
+
+ for i in range(0, len(self._robot_names)):
+
+ robot_name = self._robot_names[i]
+
+ pose = self._robots_art_views[robot_name].get_world_poses(
+ clone = True) # tuple: (pos, quat)
+
+ # root p (measured, previous, default)
+ self._root_p[robot_name] = pose[0]
+ self._root_p_prev[robot_name] = torch.clone(pose[0])
+ # print(self._root_p_default[robot_name].device)
+ self._root_p_default[robot_name] = torch.clone(pose[0]) + self._distr_offset[robot_name]
+ # root q (measured, previous, default)
+ self._root_q[robot_name] = pose[1] # root orientation
+ self._root_q_prev[robot_name] = torch.clone(pose[1])
+ self._root_q_default[robot_name] = torch.clone(pose[1])
+ # jnt q (measured, previous, default)
+ self._jnts_q[robot_name] = self._robots_art_views[robot_name].get_joint_positions(
+ clone = True) # joint positions
+ self._jnts_q_prev[robot_name] = self._robots_art_views[robot_name].get_joint_positions(
+ clone = True)
+ self._jnts_q_default[robot_name] = torch.full((self._jnts_q[robot_name].shape[0],
+ self._jnts_q[robot_name].shape[1]),
+ 0.0,
+ dtype=self._dtype,
+ device=self._device)
+
+ # root v (measured, default)
+ self._root_v[robot_name] = self._robots_art_views[robot_name].get_linear_velocities(
+ clone = True) # root lin. velocityù
+ self._root_v_base_loc[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+ self._root_v_prev[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+ self._root_v_default[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+ # root omega (measured, default)
+ self._root_omega[robot_name] = self._robots_art_views[robot_name].get_angular_velocities(
+ clone = True) # root ang. velocity
+ self._root_omega_prev[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0)
+
+ self._root_omega_base_loc[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0)
+ self._root_omega_default[robot_name] = torch.full((self._root_omega[robot_name].shape[0], self._root_omega[robot_name].shape[1]),
+ 0.0,
+ dtype=self._dtype,
+ device=self._device)
+ # root a (measured,)
+ self._root_a[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+ self._root_a_base_loc[robot_name] = torch.full_like(self._root_a[robot_name], fill_value=0.0)
+ self._root_alpha[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+ self._root_alpha_base_loc[robot_name] = torch.full_like(self._root_alpha[robot_name], fill_value=0.0)
+
+ # height grid sensor storage
+ grid_sz = int(self._env_opts["height_sensor_pixels"])
+ self._height_imgs[robot_name] = torch.zeros((self._num_envs, grid_sz, grid_sz),
+ dtype=self._dtype,
+ device=self._device)
+
+ # joints v (measured, default)
+ self._jnts_v[robot_name] = self._robots_art_views[robot_name].get_joint_velocities(
+ clone = True) # joint velocities
+ self._jnts_v_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]),
+ 0.0,
+ dtype=self._dtype,
+ device=self._device)
+
+ # joints efforts (measured, default)
+ self._jnts_eff[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]),
+ 0.0,
+ dtype=self._dtype,
+ device=self._device)
+ self._jnts_eff_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]),
+ 0.0,
+ dtype=self._dtype,
+ device=self._device)
+ self._root_pos_offsets[robot_name] = torch.zeros((self._num_envs, 3),
+ device=self._device) # reference position offses
+
+ self._root_q_offsets[robot_name] = torch.zeros((self._num_envs, 4),
+ device=self._device)
+ self._root_q_offsets[robot_name][:, 0] = 1.0 # init to valid identity quaternion
+
+ # boolean active flag per env
+ self._pert_active[robot_name] = torch.zeros((self._num_envs,), dtype=torch.bool, device=self._device)
+ # remaining steps as integer tensor
+ self._pert_steps_remaining[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device)
+ # world force & torque (N and N*m) stored as floats
+ self._pert_forces_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+ self._pert_torques_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+ # local frame copies (if you want to store local-frame versions)
+ self._pert_force_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+ self._pert_torque_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+
+ self._pert_lindir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+ self._pert_angdir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+
+ self._pert_durations[robot_name] = torch.zeros((self._num_envs, 1), dtype=torch.int32, device=self._device)
+
+ self._pert_scratch[robot_name] = torch.zeros((self._num_envs, 1), dtype=self._dtype, device=self._device)
+ self._pert_det_counter[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device)
+
+ self._masses[robot_name] = torch.sum(self._robots_art_views[robot_name].get_body_masses(clone=True), dim=1).to(dtype=self._dtype, device=self._device)
+
+ self._weights[robot_name] = (self._masses[robot_name] * abs(self._env_opts["gravity"][2].item())).reshape((self._num_envs, 1))
+
+ def _track_terrain_hits(self, robot_name: str, env_indxs: torch.Tensor = None):
+ """Track transitions into the terrain boundary margin (1 m) and count hits per env."""
+ if self._env_opts["use_flat_ground"]:
+ return
+ border = float(self._env_opts["terrain_border"])
+ threshold = max(0.0, border - self._terrain_hit_margin)
+ state = self._terrain_hit_active[robot_name]
+ counts = self._terrain_hit_counts[robot_name]
+ if env_indxs is None:
+ pos_xy = self._root_p[robot_name][:, 0:2]
+ hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1)
+ new_hits = (~state) & hitting
+ if new_hits.any():
+ counts[new_hits] += 1
+ state.copy_(hitting)
+ else:
+ pos_xy = self._root_p[robot_name][env_indxs, 0:2]
+ hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1)
+ prev_state = state[env_indxs]
+ new_hits = (~prev_state) & hitting
+ if new_hits.any():
+ counts[env_indxs[new_hits]] += 1
+ state[env_indxs] = hitting
+
+ def _maybe_log_terrain_hits(self):
+ """Log boundary hits at low frequency only when counters change."""
+ if self._env_opts["use_flat_ground"]:
+ return
+ period = int(self._env_opts["terrain_hit_log_period"])
+ if period <= 0 or (self.step_counter % period != 0):
+ return
+ for robot_name in self._robot_names:
+ active = self._terrain_hit_active.get(robot_name, None)
+ if active is None:
+ continue
+ active_now = int(active.sum().item())
+ if active_now == 0:
+ continue
+ counts = self._terrain_hit_counts[robot_name]
+ last = self._terrain_hit_counts_last_logged.get(robot_name, None)
+ if last is not None and torch.equal(counts, last):
+ continue
+ total_hits = int(counts.sum().item())
+ msg = f"{active_now} {robot_name} robots within {self._terrain_hit_margin}m of terrain border. Total hits: {total_hits}."
+ Journal.log(self.__class__.__name__,
+ "_terrain_hits",
+ msg,
+ LogType.WARN,
+ throw_when_excep = True)
+ self._terrain_hit_counts_last_logged[robot_name] = counts.clone()
+
+ @override
+ def _post_world_step(self) -> bool:
+ res = super()._post_world_step()
+ self._maybe_log_terrain_hits()
+ return res
+
+ @override
+ def _post_world_step_db(self) -> bool:
+ res = super()._post_world_step_db()
+ self._maybe_log_terrain_hits()
+ return res
+
+ def current_tstep(self):
+ self._world.current_time_step_index
+
+ def world_time(self, robot_name: str):
+ return self._world.current_time
+
+ def physics_dt(self):
+ return self._world.get_physics_dt()
+
+ def rendering_dt(self):
+ return self._env_opts["rendering_dt"]
+
+ def set_physics_dt(self, physics_dt:float):
+ self._world.set_simulation_dt(physics_dt=physics_dt,rendering_dt=None)
+
+ def set_rendering_dt(self, rendering_dt:float):
+ self._world.set_simulation_dt(physics_dt=None,rendering_dt=rendering_dt)
+
+ def _robot_jnt_names(self, robot_name: str):
+ return self._robots_art_views[robot_name].dof_names
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe4c2fdf7657e65e5e558c3daf8b27cf57ee22ec
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config.yaml
@@ -0,0 +1,17 @@
+XBotInterface:
+ urdf_path: $PWD/xmj_env_files/kyon.urdf
+ srdf_path: $PWD/xmj_env_files/kyon.srdf
+
+# defaults
+motor_pd:
+ "hip_roll*": [200, 60]
+ "hip_pitch*": [200, 60]
+ "knee_pitch*": [200, 60]
+ "wheel_joint*": [0, 30]
+
+# startup pd config
+startup_motor_pd:
+ "hip_roll*": [100, 10]
+ "hip_pitch*": [100, 10]
+ "knee_pitch*": [100, 10]
+ "wheel_joint*": [0, 30]
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config_b2w.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config_b2w.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc07eec1283f3b176a387b8b2cc404391a5d7b3a
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config_b2w.yaml
@@ -0,0 +1,17 @@
+XBotInterface:
+ urdf_path: $PWD/xmj_env_files/b2w/unitree_b2w.urdf
+ srdf_path: $PWD/xmj_env_files/b2w/unitree_b2w.srdf
+
+# defaults
+motor_pd:
+ "*hip_joint": [200, 60]
+ "*thigh_joint": [200, 60]
+ "*calf_joint": [200, 60]
+ "*foot_joint": [0, 30]
+
+# startup pd config
+startup_motor_pd:
+ "*hip_joint": [200, 60]
+ "*thigh_joint": [200, 60]
+ "*calf_joint": [200, 60]
+ "*foot_joint": [0, 30]
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_control_cluster.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_control_cluster.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c484f45d52cd3cff4da3401453a30258d1e53ab
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_control_cluster.py
@@ -0,0 +1,106 @@
+import os
+import argparse
+import multiprocessing as mp
+import importlib.util
+import inspect
+
+from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict
+
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+this_script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0]
+
+# Function to dynamically import a module from a specific file path
+def import_env_module(env_path):
+ spec = importlib.util.spec_from_file_location("env_module", env_path)
+ env_module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(env_module)
+ return env_module
+
+if __name__ == "__main__":
+
+ # Parse command line arguments for CPU affinity
+ parser = argparse.ArgumentParser(description="Set CPU affinity for the script.")
+ parser.add_argument('--ns', type=str, help='Namespace to be used for cluster shared memory')
+ parser.add_argument('--urdf_path', type=str, help='Robot description package path for URDF ')
+ parser.add_argument('--srdf_path', type=str, help='Robot description package path for SRDF ')
+ parser.add_argument('--size', type=int, help='cluster size', default=1)
+
+ # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8
+ parser.add_argument('--cloop',action='store_true', help='whether to use RHC controllers in closed loop mode')
+ parser.add_argument('--cluster_dt', type=float, default=0.05, help='dt used by MPC controllers for discretization')
+ parser.add_argument('--n_nodes', type=int, default=31, help='n nodes used by MPC controllers')
+
+ parser.add_argument('--verbose',action='store_true', help='run in verbose mode')
+
+ parser.add_argument('--enable_debug',action='store_true', help='enable debug mode for cluster client and all controllers')
+
+ parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data")
+
+ parser.add_argument('--no_mp_fork',action='store_true', help='whether to multiprocess with forkserver context')
+
+ parser.add_argument('--set_affinity',action='store_true', help='set affinity to a core for each controller')
+
+ parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="")
+ parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000)
+ parser.add_argument('--codegen_override_dir', type=str, help='Path to base dir where codegen is to be loaded', default="")
+
+ parser.add_argument('--custom_args_names', nargs='+', default=None,
+ help='list of custom arguments names')
+ parser.add_argument('--custom_args_vals', nargs='+', default=None,
+ help='list of custom arguments values')
+ parser.add_argument('--custom_args_dtype', nargs='+', default=None,
+ help='list of custom arguments data types')
+
+ parser.add_argument('--cluster_client_fname', type=str,
+ default="aug_mpc.controllers.rhc.hybrid_quad_client",
+ help="cluster client file import pattern (without extension)")
+
+ args = parser.parse_args()
+
+ # Ensure custom_args_names and custom_args_vals have the same length
+ custom_opts = generate_custom_arg_dict(args=args)
+ custom_opts.update({"cloop": args.cloop,
+ "cluster_dt": args.cluster_dt,
+ "n_nodes": args.n_nodes,
+ "codegen_override_dir": args.codegen_override_dir})
+ if args.no_mp_fork: # this needs to be in the main
+ mp.set_start_method('spawn')
+ else:
+ # mp.set_start_method('forkserver')
+ mp.set_start_method('fork')
+
+ cluster_module=importlib.import_module(args.cluster_client_fname)
+ # Get all classes defined in the module
+ classes_in_module = [name for name, obj in inspect.getmembers(cluster_module, inspect.isclass)
+ if obj.__module__ == cluster_module.__name__]
+ if len(classes_in_module) == 1:
+ cluster_classname=classes_in_module[0]
+ ClusterClient = getattr(cluster_module, cluster_classname)
+ cluster_client = ClusterClient(namespace=args.ns,
+ cluster_size=args.size,
+ urdf_xacro_path=args.urdf_path,
+ srdf_xacro_path=args.srdf_path,
+ open_loop=not args.cloop,
+ use_mp_fork = not args.no_mp_fork,
+ verbose=args.verbose,
+ debug=args.enable_debug,
+ base_dump_dir=args.dmpdir,
+ timeout_ms=args.timeout_ms,
+ custom_opts=custom_opts,
+ codegen_override=args.codegen_override_dir,
+ set_affinity=args.set_affinity)
+ cluster_client.run()
+
+ else:
+ class_list_str = ", ".join(classes_in_module)
+ Journal.log("launch_control_cluster.py",
+ "",
+ f"Found more than one class in cluster client file {args.cluster_client_fname}. Found: {class_list_str}",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ exit()
+
+ # control_cluster_client =
+ # control_cluster_client.run() # spawns the controllers on separate processes (blocking)
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_train_env.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_train_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..1864f2e4f20d85297c2b5568666ea16d9bd8f644
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_train_env.py
@@ -0,0 +1,358 @@
+from aug_mpc.utils.determinism import deterministic_run
+
+from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo
+from mpc_hive.utilities.shared_data.cluster_data import SharedClusterInfo
+
+from EigenIPC.PyEigenIPC import VLevel, Journal, LogType
+from EigenIPC.PyEigenIPC import StringTensorServer
+
+import os, argparse, sys, types, inspect
+
+from perf_sleep.pyperfsleep import PerfSleep
+
+import importlib.util
+import torch
+import signal
+
+algo = None # global to make it accessible by signal handler
+exit_request=False
+dummy_step_exit_req=False
+
+def handle_sigint(signum, frame):
+ global exit_request, dummy_step_exit_req
+ Journal.log("launch_train_env.py",
+ "",
+ f"Received sigint. Will stop training.",
+ LogType.WARN)
+ exit_request=True
+ dummy_step_exit_req=True # in case dummy_step_loop was used
+
+# Function to dynamically import a module from a specific file path
+# def import_env_module(env_path):
+# spec = importlib.util.spec_from_file_location("env_module", env_path)
+# env_module = importlib.util.module_from_spec(spec)
+# spec.loader.exec_module(env_module)
+# return env_module
+
+def import_env_module(env_path, local_env_root: str = None):
+ """
+ env_path: full path to the child env .py file to exec
+ local_env_root: directory where local copies of aug_mpc_envs.training_envs modules live
+ """
+ if local_env_root is not None:
+ local_env_root = os.path.abspath(local_env_root)
+ # override aug_mpc_envs.training_envs package to point to the local_env_root
+ pkg_name = "aug_mpc_envs.training_envs"
+ if pkg_name not in sys.modules:
+ mod = types.ModuleType(pkg_name)
+ mod.__path__ = [local_env_root] # tell Python to look here first
+ sys.modules[pkg_name] = mod
+ else:
+ existing = getattr(sys.modules[pkg_name], "__path__", None)
+ if existing is None:
+ sys.modules[pkg_name].__path__ = [local_env_root]
+ elif local_env_root not in existing:
+ existing.insert(0, local_env_root)
+
+ # load the module as usual
+ spec = importlib.util.spec_from_file_location("env_module", env_path)
+ env_module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(env_module)
+ return env_module
+
+def log_env_hierarchy(env_class, env_path, env_type="training"):
+ """
+ Logs the env class, its file, and full inheritance hierarchy with file paths.
+ env_class: the child env class
+ env_path: file path where the child class was loaded from
+ env_type: string label, e.g., "training", "evaluation", "resumed_training"
+ """
+ def get_bases_recursive(cls):
+ """Recursively get all base classes with their file paths."""
+ info = []
+ for base in cls.__bases__:
+ try:
+ file = inspect.getfile(base)
+ except TypeError:
+ file = "built-in or unknown"
+ info.append(f"{base.__name__} (from {file})")
+ # Recurse unless it's object
+ if base is not object:
+ info.extend(get_bases_recursive(base))
+ return info
+
+ hierarchy_info = get_bases_recursive(env_class)
+ hierarchy_str = " -> ".join(hierarchy_info) if hierarchy_info else "No parents"
+
+ Journal.log(
+ "launch_train_env.py",
+ "",
+ f"loading {env_type} env {env_class.__name__} (from {env_path}) "
+ f"with hierarchy: {hierarchy_str}",
+ LogType.INFO,
+ throw_when_excep=True
+ )
+
+def dummy_step_loop(env):
+ global dummy_step_exit_req
+ while True:
+ if dummy_step_exit_req:
+ return True
+ step_ok=env.step(action=env.safe_action) # not a busy loop because of MPC in the step
+ if not step_ok:
+ return False
+
+if __name__ == "__main__":
+
+ signal.signal(signal.SIGINT, handle_sigint)
+
+ # Parse command line arguments for CPU affinity
+ parser = argparse.ArgumentParser(description="Set CPU affinity for the script.")
+
+ parser.add_argument('--run_name', type=str, default=None, help='Name of training run')
+ parser.add_argument('--ns', type=str, help='Namespace to be used for shared memory')
+ parser.add_argument('--timeout_ms', type=int, help='Connection timeout after which the script self-terminates', default=60000)
+ parser.add_argument('--drop_dir', type=str, help='Directory root where all run data will be dumped')
+ parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="")
+ parser.add_argument('--seed', type=int, help='Seed', default=1)
+ parser.add_argument('--use_cpu',action='store_true', help='If set, all the training (data included) will be performed on CPU')
+
+ parser.add_argument('--db',action='store_true', help='Whether to enable local data logging for the algorithm (reward metrics, etc.)')
+ parser.add_argument('--env_db',action='store_true', help='Whether to enable env db data logging on shared mem (e.g. reward metrics are not available for reading anymore)')
+ parser.add_argument('--full_env_db',action='store_true', help='Whether to enable detailed episodic data storage (data over single transitions)')
+ parser.add_argument('--rmdb',action='store_true', help='Whether to enable remote debug (e.g. data logging on remote servers)')
+
+ parser.add_argument('--tot_tsteps', type=int, help='Total number of timesteps to be collected', default=int(30e6))
+ parser.add_argument('--action_repeat', type=int, help='Frame skipping (1-> no skip)', default=1)
+ parser.add_argument('--discount_factor', type=float, help='', default=0.99)
+ parser.add_argument('--obs_norm',action='store_true', help='Whether to enable the use of running normalizer in agent')
+ parser.add_argument('--obs_rescale',action='store_true', help='Whether to rescale observation depending on their expected range')
+ parser.add_argument('--add_weight_norm',action='store_true', help='Whether to add weight normalization to agent interal llayers')
+ parser.add_argument('--add_layer_norm',action='store_true', help='Whether to add layer normalization to agent internal llayers')
+ parser.add_argument('--add_batch_norm',action='store_true', help='Whether to add batch normalization to agent internal llayers')
+
+ parser.add_argument('--act_rescale_critic',action='store_true', help='Whether to rescale actions provided to critic (if SAC) to be in range [-1, 1]')
+ parser.add_argument('--use_period_resets',action='store_true', help='')
+
+ parser.add_argument('--sac',action='store_true', help='Use SAC, otherwise PPO, unless dummy is set')
+ parser.add_argument('--dummy',action='store_true', help='Use dummy agent (useful for testing and debugging environments)')
+
+ parser.add_argument('--dump_checkpoints',action='store_true', help='Whether to dump model checkpoints during training')
+
+ parser.add_argument('--demo_envs_perc', type=float, help='[0, 1]', default=0.0)
+ parser.add_argument('--demo_stop_thresh', type=float, default=None,
+ help='Performance hreshold above which demonstration envs should be deactivated.')
+
+ parser.add_argument('--expl_envs_perc', type=float, help='[0, 1]', default=0)
+
+ parser.add_argument('--use_rnd',action='store_true', help='Whether to use RND for exploration')
+
+ parser.add_argument('--eval',action='store_true', help='Whether to perform an evaluation run')
+ parser.add_argument('--n_eval_timesteps', type=int, help='Total number of timesteps to be evaluated', default=int(1e6))
+ parser.add_argument('--det_eval',action='store_true', help='Whether to perform a deterministic eval (only action mean is used). Only valid if --eval.')
+ parser.add_argument('--allow_expl_during_eval',action='store_true', help='Whether to allow expl envs during evaluation (useful to tune exploration)')
+
+ parser.add_argument('--resume',action='store_true', help='Resume a previous training using a checkpoint')
+ parser.add_argument('--mpath', type=str, help='Model path to be used for policy evaluation', default=None)
+ parser.add_argument('--mname', type=str, help='Model name', default=None)
+ parser.add_argument('--override_env',action='store_true', help='Whether to override env when running evaluation')
+
+ parser.add_argument('--anomaly_detect',action='store_true', help='Whether to enable anomaly detection (useful for debug)')
+
+ parser.add_argument('--compression_ratio', type=float,
+ help='If e.g. 0.8, the fist layer will be of dimension [input_features_size x (input_features_size*compression_ratio)]', default=-1.0)
+ parser.add_argument('--actor_lwidth', type=int, help='Actor network layer width', default=128)
+ parser.add_argument('--critic_lwidth', type=int, help='Critic network layer width', default=256)
+ parser.add_argument('--actor_n_hlayers', type=int, help='Actor network size', default=3)
+ parser.add_argument('--critic_n_hlayers', type=int, help='Critic network size', default=4)
+
+ parser.add_argument('--env_fname', type=str, default="twist_tracking_env", help='Training env file name (without extension)')
+ parser.add_argument('--env_classname', type=str, default="TwistTrackingEnv", help='Training env class name')
+ parser.add_argument('--override_agent_actions',action='store_true', help='Whether to override agent actions with custom ones from shared mem (useful for db)')
+ parser.add_argument('--override_agent_refs',action='store_true', help='Whether to override automatically generated agent refs (useful for debug)')
+
+ parser.add_argument('--step_while_setup',action='store_true', help='Continue stepping env with default actions while setting up agent, etc..')
+ parser.add_argument('--reset_on_init',action='store_true', help='Whether to reset the environment on initialization')
+
+ args = parser.parse_args()
+ args_dict = vars(args)
+
+ if args.eval and args.resume:
+ Journal.log("launch_train_env.py",
+ "",
+ f"Cannot set both --eval and --resume flags. Exiting.",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ deterministic_run(seed=args.seed, torch_det_algos=False)
+
+ anomaly_detect=False
+ if args.anomaly_detect:
+ torch.autograd.set_detect_anomaly(True)
+
+ if (not args.mpath is None) and (not args.mname is None):
+ mpath_full = os.path.join(args.mpath, args.mname)
+ else:
+ mpath_full=None
+
+ env_fname=args.env_fname
+ env_classname = args.env_classname
+ env_path=""
+ env_module=None
+ if (not args.eval and not args.resume) or (args.override_env):
+ # if starting a fresh traning or overriding env, load from a fresh env from aug_mpc
+ env_path = f"aug_mpc_envs.training_envs.{env_fname}"
+ env_module = importlib.import_module(env_path)
+ else:
+ if args.mpath is None:
+ Journal.log("launch_train_env.py",
+ "",
+ f"no mpath provided! Cannot load env. Either provide a mpath or run with --override_env",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ env_path = os.path.join(args.mpath, env_fname + ".py")
+ env_module = import_env_module(env_path, local_env_root=args.mpath)
+
+ EnvClass = getattr(env_module, env_classname)
+ env_type = "training" if not args.eval else "evaluation"
+ if args.resume:
+ env_type = "resumed_training"
+ log_env_hierarchy(EnvClass, env_path, env_type) # db print of env class
+
+ env = EnvClass(namespace=args.ns,
+ verbose=True,
+ vlevel=VLevel.V2,
+ use_gpu=not args.use_cpu,
+ debug=args.env_db,
+ override_agent_refs=args.override_agent_refs,
+ timeout_ms=args.timeout_ms,
+ env_opts=args_dict)
+ if not env.is_ready(): # something went wrong
+ exit()
+
+ dummy_step_thread = None
+ if args.step_while_setup:
+ import threading
+ # spawn step thread (we don't true parallelization, thread is fine)
+ # start the dummy stepping in a separate thread so setup can continue concurrently
+ dummy_step_thread = threading.Thread(target=dummy_step_loop, args=(env,), daemon=True)
+ dummy_step_thread.start()
+
+ # getting some sim info for debugging
+ sim_data = {}
+ sim_info_shared = SharedEnvInfo(namespace=args.ns,
+ is_server=False,
+ safe=False)
+ sim_info_shared.run()
+ sim_info_keys = sim_info_shared.param_keys
+ sim_info_data = sim_info_shared.get().flatten()
+ for i in range(len(sim_info_keys)):
+ sim_data[sim_info_keys[i]] = sim_info_data[i]
+
+ # getting come cluster info for debugging
+ cluster_data={}
+ cluste_info_shared = SharedClusterInfo(namespace=args.ns,
+ is_server=False,
+ safe=False)
+ cluste_info_shared.run()
+ cluster_info_keys = cluste_info_shared.param_keys
+ cluster_info_data = cluste_info_shared.get().flatten()
+ for i in range(len(cluster_info_keys)):
+ cluster_data[cluster_info_keys[i]] = cluster_info_data[i]
+
+ custom_args={}
+ custom_args["uname_host"]="user_host"
+ try:
+ username = os.getlogin() # add machine info to db data
+ hostname = os.uname().nodename
+ user_host = f"{username}@{hostname}"
+ custom_args["uname_host"]=user_host
+ except:
+ pass
+
+ algo=None
+ if not args.dummy:
+ if args.sac:
+ from aug_mpc.training_algs.sac.sac import SAC
+
+ algo = SAC(env=env,
+ debug=args.db,
+ remote_db=args.rmdb,
+ seed=args.seed)
+ else:
+ from aug_mpc.training_algs.ppo.ppo import PPO
+
+ algo = PPO(env=env,
+ debug=args.db,
+ remote_db=args.rmdb,
+ seed=args.seed)
+ else:
+ from aug_mpc.training_algs.dummy.dummy import Dummy
+
+ algo=Dummy(env=env,
+ debug=args.db,
+ remote_db=args.rmdb,
+ seed=args.seed)
+
+ custom_args.update(args_dict)
+ custom_args.update(cluster_data)
+ custom_args.update(sim_data)
+
+ run_name=env_classname if args.run_name is None else args.run_name
+ algo.setup(run_name=run_name,
+ ns=args.ns,
+ verbose=True,
+ drop_dir_name=args.drop_dir,
+ custom_args=custom_args,
+ comment=args.comment,
+ eval=args.eval,
+ resume=args.resume,
+ model_path=mpath_full,
+ n_eval_timesteps=args.n_eval_timesteps,
+ dump_checkpoints=args.dump_checkpoints,
+ norm_obs=args.obs_norm,
+ rescale_obs=args.obs_rescale)
+
+ full_drop_dir=algo.drop_dir()
+ shared_drop_dir = StringTensorServer(length=1,
+ basename="SharedTrainingDropDir",
+ name_space=args.ns,
+ verbose=True,
+ vlevel=VLevel.V2,
+ force_reconnection=True)
+ shared_drop_dir.run()
+
+ while True:
+ if not shared_drop_dir.write_vec([full_drop_dir], 0):
+ ns=1000000000
+ PerfSleep.thread_sleep(ns)
+ continue
+ else:
+ break
+
+ if args.step_while_setup:
+ # stop dummy step thread and give algo authority on step
+ dummy_step_exit_req=True
+ # wait for thread to join
+ if dummy_step_thread is not None:
+ dummy_step_thread.join()
+ Journal.log("launch_train_env.py",
+ "",
+ f"Dummy env step thread joined. Moving step authority to algo.",
+ LogType.INFO)
+
+ eval=args.eval
+ if args.override_agent_actions:
+ eval=True
+ if not eval:
+ while not exit_request:
+ if not algo.learn():
+ break
+ else: # eval phase
+ with torch.no_grad(): # no need for grad computation
+ while not exit_request:
+ if not algo.eval():
+ break
+
+ algo.done() # make sure to terminate training properly
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_world_interface.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_world_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..1836cedcdd2191f40b35db291e86f4ff4a7b8295
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_world_interface.py
@@ -0,0 +1,207 @@
+import os
+import argparse
+import importlib.util
+import inspect
+
+from aug_mpc.utils.rt_factor import RtFactor
+from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict
+from aug_mpc.utils.determinism import deterministic_run
+
+from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0]
+
+# Function to dynamically import a module from a specific file path
+def import_world_module(env_path):
+ spec = importlib.util.spec_from_file_location("world_module", env_path)
+ world_module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(world_module)
+ return world_module
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser(description="Sim. env launcher")
+ # Add arguments
+ parser.add_argument('--robot_name', type=str, help='Alias to be used for the robot and also shared memory')
+ parser.add_argument('--urdf_path', type=str, help='path to the URDF file description for each robot')
+ parser.add_argument('--srdf_path', type=str, help='path to the SRDF file description for each robot (used for homing)')
+ parser.add_argument('--jnt_imp_config_path', type=str, help='path to a valid YAML file containing information on jnt impedance gains')
+ parser.add_argument('--num_envs', type=int, default=1)
+ parser.add_argument('--n_contacts', type=int, default=4)
+ parser.add_argument('--cluster_dt', type=float, default=0.03, help='dt at which the control cluster runs')
+ parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data")
+ parser.add_argument('--remote_stepping',action='store_true',
+ help='Whether to use remote stepping for cluster triggering (to be set during training)')
+
+ # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8
+ parser.add_argument('--use_gpu',action='store_true', help='Whether to use gpu simulation')
+
+ parser.add_argument('--enable_debug',action='store_true', help='Whether to enable debug mode (may introduce significant overhead)')
+
+ parser.add_argument('--headless',action='store_true', help='Whether to run simulation in headless mode')
+
+ parser.add_argument('--verbose',action='store_true', help='Enable verbose mode')
+
+ parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="")
+ parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000)
+ parser.add_argument('--physics_dt', type=float, default=5e-4, help='')
+
+ parser.add_argument('--use_custom_jnt_imp',action='store_true',
+ help='Whether to override the default PD controller with a custom one')
+
+ parser.add_argument('--diff_vels',action='store_true',
+ help='Whether to obtain velocities by differentiation or not')
+
+ parser.add_argument('--init_timesteps', type=int, help='initialization timesteps', default=None)
+ parser.add_argument('--seed', type=int, help='seed', default=0)
+
+ parser.add_argument('--custom_args_names', nargs='+', default=None,
+ help='list of custom arguments names')
+ parser.add_argument('--custom_args_vals', nargs='+', default=None,
+ help='list of custom arguments values')
+ parser.add_argument('--custom_args_dtype', nargs='+', default=None,
+ help='list of custom arguments data types')
+
+ parser.add_argument('--world_iface_fname', type=str,
+ default="aug_mpc_envs.world_interfaces.isaac_world_interface",
+ help="world interface file import pattern (without extension)")
+
+ args = parser.parse_args()
+
+ deterministic_run(seed=args.seed, torch_det_algos=False)
+
+ default_init_duration=3.0 # [s]
+ default_init_tsteps=int(default_init_duration/args.physics_dt)
+ init_tsteps=args.init_timesteps
+ if init_tsteps is None:
+ init_tsteps=default_init_tsteps
+ # Ensure custom_args_names, custom_args_vals, and custom_args_dtype have the same length
+ custom_opt = generate_custom_arg_dict(args=args)
+
+ Journal.log("launch_world_interface.py",
+ "",
+ f"Will warmup world interface for {default_init_duration}s ({default_init_tsteps} physics steps)",
+ LogType.STAT)
+
+ robot_names = [args.robot_name]
+ robot_urdf_paths = [args.urdf_path]
+ robot_srdf_paths = [args.srdf_path]
+ control_clust_dts = [float(args.cluster_dt)]
+ use_remote_stepping = [args.remote_stepping]
+ n_contacts = [args.n_contacts]
+ jnt_imp_config_paths = [args.jnt_imp_config_path]
+ num_envs = args.num_envs
+ control_clust_dt = args.cluster_dt # [s]. Dt at which RHC controllers run
+ headless = args.headless
+
+ # simulation parameters
+ remote_env_params = {}
+ remote_env_params["physics_dt"] = args.physics_dt # physics_dt?
+ remote_env_params["n_envs"] = num_envs
+ remote_env_params["use_gpu"] = args.use_gpu
+ remote_env_params["substepping_dt"] = control_clust_dts[0]
+ remote_env_params["headless"] = headless
+ remote_env_params["debug_enabled"] = args.enable_debug
+ remote_env_params["seed"] = args.seed
+ remote_env_params.update(custom_opt)
+ # sim info to be broadcasted on shared memory
+ # adding some data to dict for debugging
+
+ shared_sim_infos = []
+ for i in range(len(robot_names)):
+ shared_sim_infos.append(SharedEnvInfo(
+ namespace=robot_names[i],
+ is_server=True,
+ env_params_dict=remote_env_params,
+ verbose=True,
+ vlevel=VLevel.V2,
+ force_reconnection=True))
+ shared_sim_infos[i].run()
+
+ world_module=importlib.import_module(args.world_iface_fname)
+ classes_in_module = [name for name, obj in inspect.getmembers(world_module, inspect.isclass)
+ if obj.__module__ == world_module.__name__]
+ if len(classes_in_module) == 1:
+ cluster_classname=classes_in_module[0]
+ WorldInterface = getattr(world_module, cluster_classname)
+ else:
+ class_list_str = ", ".join(classes_in_module)
+ Journal.log("launch_world_interface.py",
+ "",
+ f"Found more than one class in world file {args.world_iface_fname}. Found: {class_list_str}",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ exit()
+
+ world_interface = WorldInterface(robot_names=robot_names,
+ robot_urdf_paths=robot_urdf_paths,
+ robot_srdf_paths=robot_srdf_paths,
+ cluster_dt=control_clust_dts,
+ jnt_imp_config_paths=jnt_imp_config_paths,
+ n_contacts=n_contacts,
+ use_remote_stepping=use_remote_stepping,
+ name=classes_in_module[0],
+ num_envs=num_envs,
+ debug=args.enable_debug,
+ verbose=args.verbose,
+ vlevel=VLevel.V2,
+ n_init_step=init_tsteps,
+ timeout_ms=args.timeout_ms,
+ env_opts=remote_env_params,
+ use_gpu=args.use_gpu,
+ override_low_lev_controller=args.use_custom_jnt_imp) # create environment
+ # reset_ok=world_interface.reset(reset_sim=True)
+ # if not reset_ok:
+ # world_interface.close()
+ # exit()
+
+ rt_factor = RtFactor(dt_nom=world_interface.physics_dt(),
+ window_size=100)
+
+ while True:
+
+ if rt_factor.reset_due():
+ rt_factor.reset()
+
+ step_ok=world_interface.step()
+
+ if not step_ok:
+ break
+
+ rt_factor.update()
+
+ for i in range(len(robot_names)):
+ robot_name=robot_names[i]
+ n_steps = world_interface.cluster_sim_step_counters[robot_name]
+ sol_counter = world_interface.cluster_servers[robot_name].solution_counter()
+ trigger_counter = world_interface.cluster_servers[robot_name].trigger_counter()
+ shared_sim_infos[i].write(dyn_info_name=["sim_rt_factor",
+ "total_rt_factor",
+ "env_stepping_dt",
+ "world_stepping_dt",
+ "time_to_get_states_from_env",
+ "cluster_state_update_dt",
+ "cluster_sol_time",
+ "n_sim_steps",
+ "n_cluster_trigger_steps",
+ "n_cluster_sol_steps",
+ "sim_time",
+ "cluster_time"],
+ val=[rt_factor.get(),
+ rt_factor.get() * num_envs,
+ rt_factor.get_avrg_step_time(),
+ world_interface.debug_data["time_to_step_world"],
+ world_interface.debug_data["time_to_get_states_from_env"],
+ world_interface.debug_data["cluster_state_update_dt"][robot_name],
+ world_interface.debug_data["cluster_sol_time"][robot_name],
+ n_steps,
+ trigger_counter,
+ sol_counter,
+ world_interface.debug_data["sim_time"][robot_name],
+ sol_counter*world_interface.cluster_servers[robot_name].cluster_dt()
+ ])
+
+ world_interface.close()
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/__init__.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d88c7b914b4bb37297e0371dfd1acbd7eafee04
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py
@@ -0,0 +1,123 @@
+from mpc_hive.cluster_client.control_cluster_client import ControlClusterClient
+from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf
+from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds
+from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds
+
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+from typing import List, Dict
+
+import os
+
+from abc import abstractmethod
+
+class AugMpcClusterClient(ControlClusterClient):
+
+ def _import_aux_libs(self):
+ super()._import_aux_libs()
+
+ def __init__(self,
+ namespace: str,
+ urdf_xacro_path: str,
+ srdf_xacro_path: str,
+ cluster_size: int,
+ set_affinity: bool = False,
+ use_mp_fork: bool = False,
+ isolated_cores_only: bool = False,
+ core_ids_override_list: List[int] = None,
+ verbose: bool = False,
+ debug: bool = False,
+ codegen_base_dirname: str = "CodeGen",
+ base_dump_dir: str = "/tmp",
+ codegen_override: str = None,
+ custom_opts: Dict = {}):
+
+ self._base_dump_dir = base_dump_dir
+
+ self._temp_path = base_dump_dir + "/" + f"{self.__class__.__name__}" + f"_{namespace}"
+
+ self._codegen_base_dirname = codegen_base_dirname
+ self._codegen_basedir = self._temp_path + "/" + self._codegen_base_dirname
+
+ self._codegen_override = codegen_override # can be used to manually override
+ # the default codegen dir
+
+ if not os.path.exists(self._temp_path):
+ os.makedirs(self._temp_path)
+ if not os.path.exists(self._codegen_basedir):
+ os.makedirs(self._codegen_basedir)
+
+ self._urdf_xacro_path = urdf_xacro_path
+ self._srdf_xacro_path = srdf_xacro_path
+ self._urdf_path=""
+ self._srdf_path=""
+
+ super().__init__(namespace = namespace,
+ cluster_size=cluster_size,
+ isolated_cores_only = isolated_cores_only,
+ set_affinity = set_affinity,
+ use_mp_fork = use_mp_fork,
+ core_ids_override_list = core_ids_override_list,
+ verbose = verbose,
+ debug = debug,
+ custom_opts=custom_opts)
+ self._generate_srdf(namespace=namespace)
+
+ self._generate_urdf(namespace=namespace)
+
+
+ def codegen_dir(self):
+
+ return self._codegen_basedir
+
+ def codegen_dir_override(self):
+
+ return self._codegen_override
+
+ def _generate_srdf(self,namespace:str):
+
+ custom_xacro_args=extract_custom_xacro_args(self._custom_opts)
+ cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(),
+ new_cmds=custom_xacro_args)
+
+ # call _xrdf_cmds_override in case some cmds need to be overridden
+ override_cmds=self._xrdf_cmds_override()
+ cmds=merge_xacro_cmds(prev_cmds=cmds,
+ new_cmds=override_cmds)
+
+ self._srdf_path=generate_srdf(robot_name=namespace,
+ xacro_path=self._srdf_xacro_path,
+ dump_path=self._temp_path,
+ xrdf_cmds=cmds)
+
+ def _generate_urdf(self,namespace:str):
+
+ custom_xacro_args=extract_custom_xacro_args(self._custom_opts)
+ cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(),
+ new_cmds=custom_xacro_args)
+
+ # call _xrdf_cmds_override in case some cmds need to be overridden
+ override_cmds=self._xrdf_cmds_override()
+ cmds=merge_xacro_cmds(prev_cmds=cmds,
+ new_cmds=override_cmds)
+
+ self._urdf_path=generate_urdf(robot_name=namespace,
+ xacro_path=self._urdf_xacro_path,
+ dump_path=self._temp_path,
+ xrdf_cmds=cmds)
+
+ @abstractmethod
+ def _xrdf_cmds(self):
+
+ # to be implemented by parent class (
+ # for an example have a look at utils/centauro_xrdf_gen.py)
+
+ pass
+
+ def _xrdf_cmds_override(self):
+
+ # to be overridden by parent class
+
+ to_be_overridden = ["dummy_cmd:=true"]
+
+ return to_be_overridden
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea5cf7226db1ae834b7bb599c33b426022fcc25d
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py
@@ -0,0 +1,43 @@
+from mpc_hive.cluster_server.control_cluster_server import ControlClusterServer
+from typing import List
+from EigenIPC.PyEigenIPC import VLevel
+
+class AugMpcClusterServer(ControlClusterServer):
+
+ def __init__(self,
+ robot_name: str,
+ cluster_size: int,
+ cluster_dt: float,
+ control_dt: float,
+ jnt_names: List[str],
+ n_contacts: int,
+ contact_linknames: List[str] = None,
+ verbose: bool = False,
+ vlevel: VLevel = VLevel.V1,
+ debug: bool = False,
+ use_gpu: bool = True,
+ force_reconnection: bool = True,
+ timeout_ms: int = 60000,
+ enable_height_sensor: bool = False,
+ height_grid_size: int = None,
+ height_grid_resolution: float = None):
+
+ self.robot_name = robot_name
+
+ super().__init__(
+ namespace=self.robot_name,
+ cluster_size=cluster_size,
+ cluster_dt=cluster_dt,
+ control_dt=control_dt,
+ jnt_names=jnt_names,
+ n_contacts = n_contacts,
+ contact_linknames = contact_linknames,
+ verbose=verbose,
+ vlevel=vlevel,
+ debug=debug,
+ use_gpu=use_gpu,
+ force_reconnection=force_reconnection,
+ timeout_ms=timeout_ms,
+ enable_height_sensor=enable_height_sensor,
+ height_grid_size=height_grid_size,
+ height_grid_resolution=height_grid_resolution)
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/__init__.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db15baee95a0dcc257d9c1ccb788a7aabdb6e456
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml
@@ -0,0 +1,200 @@
+# A dummy example of a rhc controller configuration built on top of horizon
+# and iLQR
+
+solver:
+ type: ilqr
+ ilqr.tol: 0.01
+ ilqr.constr_viol_tol: 0.01
+ ilqr.suppress_all_output: 'yes'
+ ilqr.codegen_enabled: true
+# ilqr.codegen_workdir: /tmp/tyhio
+ ilqr.enable_gn: true
+ ilqr.hxx_reg_base: 0.0
+ ilqr.n_threads: 0
+ print_time: 0
+
+constraints:
+ - ball_1_contact
+ - ball_2_contact
+ - ball_3_contact
+ - ball_4_contact
+ - z_ball_1
+ - z_ball_2
+ - z_ball_3
+ - z_ball_4
+
+costs:
+ - joint_regularization
+ - joint_posture
+ - base_position
+ - base_orientation
+
+.define:
+ - &w_base_pos 10
+ - &w_base_ori 1
+ - &w_ball_z 1
+ # - &w_postural 0.0001
+ - &w_jnt_v_reg 0.01
+ - &w_jnt_a_reg 0.001
+ - &w_jnt_f_reg 0.0001
+ - &wheel_radius 0.124
+
+base_position:
+ type: Cartesian
+ distal_link: base_link
+ indices: [0, 1, 2]
+ nodes: ${N}
+ weight: *w_base_pos
+
+base_orientation:
+ type: Cartesian
+ distal_link: base_link
+ indices: [3, 4, 5]
+ nodes: ${N}
+ weight: *w_base_ori
+
+# ===============================
+
+rolling_contact_1:
+ type: Rolling
+ frame: wheel_1
+ radius: *wheel_radius
+
+rolling_contact_2:
+ type: Rolling
+ frame: wheel_2
+ radius: *wheel_radius
+
+rolling_contact_3:
+ type: Rolling
+ frame: wheel_3
+ radius: *wheel_radius
+
+rolling_contact_4:
+ type: Rolling
+ frame: wheel_4
+ radius: *wheel_radius
+
+# contact_1:
+# type: Cartesian
+# distal_link: ball_1
+# indices: [0, 1, 2]
+# cartesian_type: velocity
+
+# contact_2:
+# type: Cartesian
+# distal_link: ball_2
+# indices: [0, 1, 2]
+# cartesian_type: velocity
+
+# contact_3:
+# type: Cartesian
+# distal_link: ball_3
+# indices: [0, 1, 2]
+# cartesian_type: velocity
+
+# contact_4:
+# type: Cartesian
+# distal_link: ball_4
+# indices: [0, 1, 2]
+# cartesian_type: velocity
+
+# ==================================
+
+interaction_wheel_1:
+ type: VertexForce
+ frame: ball_1
+ fn_min: 10.0
+ enable_fc: true
+ friction_coeff: 0.5
+ vertex_frames:
+ - wheel_1
+
+interaction_wheel_2:
+ type: VertexForce
+ frame: ball_2
+ fn_min: 10.0
+ enable_fc: true
+ friction_coeff: 0.5
+ vertex_frames:
+ - wheel_2
+
+interaction_wheel_3:
+ type: VertexForce
+ frame: ball_3
+ fn_min: 10.0
+ enable_fc: true
+ friction_coeff: 0.5
+ vertex_frames:
+ - wheel_3
+
+interaction_wheel_4:
+ type: VertexForce
+ frame: ball_4
+ fn_min: 10.0
+ enable_fc: true
+ friction_coeff: 0.5
+ vertex_frames:
+ - wheel_4
+
+ball_1_contact:
+ type: Contact
+ subtask: [interaction_wheel_1, rolling_contact_1]
+
+ball_2_contact:
+ type: Contact
+ subtask: [interaction_wheel_2, rolling_contact_2]
+
+ball_3_contact:
+ type: Contact
+ subtask: [interaction_wheel_3, rolling_contact_3]
+
+ball_4_contact:
+ type: Contact
+ subtask: [interaction_wheel_4, rolling_contact_4]
+
+# joint_posture:
+# type: Postural
+# weight: *w_postural
+# indices: [0, 1, 2,
+# 4, 5, 6,
+# 8, 9, 10,
+# 12, 13, 14]
+# nodes: ${range(N-8, N)}
+
+# todo: wrong, as the order COUNTS. If I add the contacts after the joint regularization, they wont get considered.
+joint_regularization:
+ type: Regularization
+ nodes: all # maybe not on first nodes??
+ weight:
+ velocity: *w_jnt_v_reg
+ acceleration: *w_jnt_a_reg
+ force: *w_jnt_f_reg
+
+z_ball_1:
+ type: Cartesian
+ distal_link: ball_1
+ indices: [2]
+ cartesian_type: position
+ weight: *w_ball_z
+
+z_ball_2:
+ type: Cartesian
+ distal_link: ball_2
+ indices: [2]
+ cartesian_type: position
+ weight: *w_ball_z
+
+z_ball_3:
+ type: Cartesian
+ distal_link: ball_3
+ indices: [2]
+ cartesian_type: position
+ weight: *w_ball_z
+
+z_ball_4:
+ type: Cartesian
+ distal_link: ball_4
+ indices: [2]
+ cartesian_type: position
+ weight: *w_ball_z
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..448a35a2251bef0c51e573d6b32d93fcb2e7d9ce
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py
@@ -0,0 +1,566 @@
+import numpy as np
+
+from aug_mpc.controllers.rhc.horizon_based.horizon_imports import *
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+from typing import Dict
+
+class GaitManager:
+
+ def __init__(self,
+ task_interface: TaskInterface,
+ phase_manager: pymanager.PhaseManager,
+ injection_node: int = None,
+ keep_yaw_vert: bool = False,
+ yaw_vertical_weight: float = None,
+ vertical_landing: bool = False,
+ landing_vert_weight: float = None,
+ phase_force_reg: float = None,
+ flight_duration: int = 15,
+ post_flight_stance: int = 3,
+ step_height: float = 0.1,
+ dh: float = 0.0,
+ custom_opts: Dict = {}):
+
+ self._custom_opts=custom_opts
+
+ self._is_open_loop=False
+ if "is_open_loop" in self._custom_opts:
+ self._is_open_loop=self._custom_opts["is_open_loop"]
+
+ self.task_interface = task_interface
+ self._n_nodes_prb=self.task_interface.prb.getNNodes()
+
+ self._phase_manager = phase_manager
+ self._model=self.task_interface.model
+ self._q0=self._model.q0
+ self._kin_dyn=self.task_interface.model.kd
+
+ # phase weights and regs
+ self._keep_yaw_vert=keep_yaw_vert
+ self._yaw_vertical_weight=yaw_vertical_weight
+ self._vertical_landing=vertical_landing
+ self._landing_vert_weight=landing_vert_weight
+ self._phase_force_reg=phase_force_reg
+ self._total_weight = np.atleast_2d(np.array([0, 0, self._kin_dyn.mass() * 9.81])).T
+
+ self._f_reg_ref={}
+
+ # flight parameters
+ self._post_flight_stance=post_flight_stance
+ self._flight_info_now=None
+ self._flight_duration_max=self._n_nodes_prb-(injection_node+1)
+ self._flight_duration_min=3
+ self._flight_duration_default=flight_duration
+ # apex bounds/defaults
+ self._step_height_default=step_height
+ self._step_height_min=0.0
+ self._step_height_max=0.5
+ # end height bounds/defaults
+ self._dh_default=dh
+ self._dh_min=0.0
+ self._dh_max=0.5
+ # landing dx, dy bounds/defaults
+ self._land_dx_default=0.0
+ self._land_dx_min=-0.5
+ self._land_dx_max=0.5
+ self._land_dy_default=0.0
+ self._land_dy_min=-0.5
+ self._land_dy_max=0.5
+
+ # timeline data
+ self._contact_timelines = dict()
+ self.timeline_names=[]
+
+ self._flight_phases = {}
+ self._touchdown_phases = {}
+ self._contact_phases = {}
+ self._fk_contacts = {}
+ self._fkd_contacts = {}
+ self._f_reg_ref = {}
+
+ # reference traj
+ self._tg = trajectoryGenerator.TrajectoryGenerator()
+ self._traj_der= [None, 0, 0]
+ self._traj_second_der=[None, 0, 0]
+ self._third_traj_der=[None, None, 0]
+
+ self._ref_trjs = {}
+ self._ref_vtrjs = {}
+
+ if injection_node is None:
+ self._injection_node = round(self.task_interface.prb.getNNodes()/2.0)
+ else:
+ self._injection_node = injection_node
+
+ self._init_contact_timelines()
+
+ self._reset_contact_timelines()
+
+ def _init_contact_timelines(self):
+
+ short_stance_duration=1
+ flight_phase_short_duration=1
+
+ self.n_contacts=len(self._model.cmap.keys())
+ self._dt=float(self.task_interface.prb.getDt())
+
+ self._name_to_idx_map={}
+
+ j=0
+ for contact in self._model.cmap.keys():
+
+ self._fk_contacts[contact]=self._kin_dyn.fk(contact)
+ self._fkd_contacts[contact]=self._kin_dyn.frameVelocity(contact, self._model.kd_frame)
+ self.timeline_names.append(contact)
+ self._contact_timelines[contact]=self._phase_manager.createTimeline(f'{contact}_timeline')
+ # stances
+ self._contact_phases[contact] = self._contact_timelines[contact].createPhase(short_stance_duration,
+ f'stance_{contact}_short')
+
+ if self.task_interface.getTask(f'{contact}') is not None:
+ self._contact_phases[contact].addItem(self.task_interface.getTask(f'{contact}'))
+ else:
+ Journal.log(self.__class__.__name__,
+ "_init_contact_timelines",
+ f"contact task {contact} not found",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ i=0
+ self._f_reg_ref[contact]=[]
+ for force in self._model.cmap[contact]:
+ f_ref=self.task_interface.prb.createParameter(name=f"{contact}_force_reg_f{i}_ref",
+ dim=3)
+ force_reg=self.task_interface.prb.createResidual(f'{contact}_force_reg_f{i}', self._phase_force_reg * (force - f_ref),
+ nodes=[])
+ self._f_reg_ref[contact].append(f_ref)
+ self.set_f_reg(contact_name=contact, scale=4)
+ self._contact_phases[contact].addCost(force_reg, nodes=list(range(0, short_stance_duration)))
+ i+=1
+
+ # flights
+ self._flight_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration,
+ f'flight_{contact}_short')
+
+ # sanity checks (z trajectory)
+ self._zpos_task_found=True
+ self._zvel_task_found=True
+ self._xypos_task_found=True
+ self._xyvel_task_found=True
+ if self.task_interface.getTask(f'z_{contact}') is None:
+ self._zpos_task_found=False
+ if self.task_interface.getTask(f'vz_{contact}') is None:
+ self._zvel_task_found=False
+ if self.task_interface.getTask(f'xy_{contact}') is None:
+ self._xypos_task_found=False
+ if self.task_interface.getTask(f'vxy_{contact}') is None:
+ self._xyvel_task_found=False
+ if (not self._zpos_task_found) and (not self._zvel_task_found):
+ Journal.log(self.__class__.__name__,
+ "_init_contact_timelines",
+ f"neither pos or vel task for contacts were found! Aborting.",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ if (not self._zpos_task_found) and self._is_open_loop:
+ Journal.log(self.__class__.__name__,
+ "_init_contact_timelines",
+ f"Running in open loop, but no contact pos task found. Aborting.",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ if self._zpos_task_found and self._xyvel_task_found:
+ Journal.log(self.__class__.__name__,
+ "_init_contact_timelines",
+ f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ if self._zvel_task_found and self._xypos_task_found:
+ Journal.log(self.__class__.__name__,
+ "_init_contact_timelines",
+ f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ if (not self._xypos_task_found) and (not self._xyvel_task_found):
+ Journal.log(self.__class__.__name__,
+ "_init_contact_timelines",
+ f"neither pos or vel task for contact {contact} xy were found! Will proceed without xy landing constraints.",
+ LogType.WARN)
+ # if (not self._zvel_task_found) and (not self._is_open_loop):
+ # Journal.log(self.__class__.__name__,
+ # "_init_contact_timelines",
+ # f"Running in closed loop, but contact vel task not found. Aborting",
+ # LogType.EXCEP,
+ # throw_when_excep=True)
+
+ self._ref_trjs[contact]=None
+ self._ref_vtrjs[contact]=None
+ self._touchdown_phases[contact]=None
+
+ if self._zpos_task_found: # we use pos trajectory
+ self._ref_trjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()])
+ init_z_foot = self._fk_contacts[contact](q=self._q0)['ee_pos'].elements()[2]
+ if self._is_open_loop:
+ self._ref_trjs[contact][2, :] = np.atleast_2d(init_z_foot)
+ else:
+ self._ref_trjs[contact][2, :] = 0.0 # place foot at ground level initially ()
+
+ # z
+ self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'z_{contact}'),
+ self._ref_trjs[contact][2, 0:1],
+ nodes=list(range(0, flight_phase_short_duration)))
+
+ if self._xypos_task_found: # xy, we add a landing phase of unit duration to enforce landing pos costs
+
+ self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration,
+ f'touchdown_{contact}_short')
+
+ self._touchdown_phases[contact].addItemReference(self.task_interface.getTask(f'xy_{contact}'),
+ self._ref_trjs[contact][0:2, 0:1],
+ nodes=list(range(0, short_stance_duration)))
+
+ else: # foot traj in velocity
+ # ref vel traj
+ self._ref_vtrjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) # allocate traj
+ # of max length eual to number of nodes
+ self._ref_vtrjs[contact][2, :] = np.atleast_2d(0)
+
+ # z
+ self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vz_{contact}'),
+ self._ref_vtrjs[contact][2, 0:1],
+ nodes=list(range(0, flight_phase_short_duration)))
+
+ if self._xyvel_task_found: # xy (when in vel the xy vel is set on the whole flight phase)
+ self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vxy_{contact}'),
+ self._ref_vtrjs[contact][0:2, 0:1],
+ nodes=list(range(0, flight_phase_short_duration)))
+
+ if self._vertical_landing: # add touchdown phase for vertical landing
+ self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration,
+ f'touchdown_{contact}_short')
+
+ if self._vertical_landing and self._touchdown_phases[contact] is not None:
+ v_xy=self._fkd_contacts[contact](q=self._model.q, qdot=self._model.v)['ee_vel_linear'][0:2]
+ vertical_landing=self.task_interface.prb.createResidual(f'{contact}_only_vert_v',
+ self._landing_vert_weight * v_xy,
+ nodes=[])
+ self._touchdown_phases[contact].addCost(vertical_landing, nodes=list(range(0, short_stance_duration)))
+
+ if self._keep_yaw_vert:
+ # keep ankle vertical over the whole horizon (can be useful with wheeled robots)
+ c_ori = self._model.kd.fk(contact)(q=self._model.q)['ee_rot'][2, :]
+ cost_ori = self.task_interface.prb.createResidual(f'{contact}_ori', self._yaw_vertical_weight * (c_ori.T - np.array([0, 0, 1])))
+ # flight_phase.addCost(cost_ori, nodes=list(range(0, flight_duration+post_landing_stance)))
+
+ self._name_to_idx_map[contact]=j
+
+ j+=1
+
+ # current pos [c0, c1, ....], current length, nominal length, nom. apex, no. landing height, landing dx, landing dy (local world aligned)
+ self._flight_info_now=np.zeros(shape=(7*self.n_contacts))
+
+ self.update()
+
+ def _reset_contact_timelines(self):
+
+ for contact in self._model.cmap.keys():
+
+ idx=self._name_to_idx_map[contact]
+ # we follow same order as in shm for more efficient writing
+ self._flight_info_now[idx]= -1.0 # pos [nodes]
+ self._flight_info_now[idx+1*self.n_contacts]= -1.0 # duration (remaining) [nodes]
+ self._flight_info_now[idx+2*self.n_contacts]=self._flight_duration_default # [nodes]
+ self._flight_info_now[idx+3*self.n_contacts]=self._step_height_default
+ self._flight_info_now[idx+4*self.n_contacts]=self._dh_default
+ self._flight_info_now[idx+5*self.n_contacts]=self._land_dx_default
+ self._flight_info_now[idx+6*self.n_contacts]=self._land_dy_default
+ # fill timeline with stances
+ contact_timeline=self._contact_timelines[contact]
+ contact_timeline.clear() # remove phases
+ short_stance_phase = contact_timeline.getRegisteredPhase(f'stance_{contact}_short')
+ while contact_timeline.getEmptyNodes() > 0:
+ contact_timeline.addPhase(short_stance_phase)
+
+ self.update()
+
+ def reset(self):
+ # self.phase_manager.clear()
+ self.task_interface.reset()
+ self._reset_contact_timelines()
+
+ def set_f_reg(self,
+ contact_name,
+ scale: float = 4.0):
+ f_refs=self._f_reg_ref[contact_name]
+ for force in f_refs:
+ ref=self._total_weight/(scale*len(f_refs))
+ force.assign(ref)
+
+ def set_flight_duration(self, contact_name, val: float):
+ self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts]=val
+
+ def get_flight_duration(self, contact_name):
+ return self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts]
+
+ def set_step_apexdh(self, contact_name, val: float):
+ self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts]=val
+
+ def get_step_apexdh(self, contact_name):
+ return self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts]
+
+ def set_step_enddh(self, contact_name, val: float):
+ self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts]=val
+
+ def get_step_enddh(self, contact_name):
+ return self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts]
+
+ def get_step_landing_dx(self, contact_name):
+ return self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts]
+
+ def set_step_landing_dx(self, contact_name, val: float):
+ self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts]=val
+
+ def get_step_landing_dy(self, contact_name):
+ return self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts]
+
+ def set_step_landing_dy(self, contact_name, val: float):
+ self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts]=val
+
+ def add_stand(self, contact_name):
+ # always add stand at the end of the horizon
+ timeline = self._contact_timelines[contact_name]
+ if timeline.getEmptyNodes() > 0:
+ timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short'))
+
+ def add_flight(self, contact_name,
+ robot_q: np.ndarray):
+
+ timeline = self._contact_timelines[contact_name]
+
+ flights_on_horizon=self._contact_timelines[contact_name].getPhaseIdx(self._flight_phases[contact_name])
+
+ last_flight_idx=self._injection_node-1 # default to make things work
+ if not len(flights_on_horizon)==0: # some flight phases are there
+ last_flight_idx=flights_on_horizon[-1]+self._post_flight_stance
+
+ if last_flight_idx1:
+ Journal.log(self.__class__.__name__,
+ "add_flight",
+ f"Got flight duration {flight_duration_req} < 1!",
+ LogType.WARN,
+ throw_when_excep=True)
+
+ # process requests to ensure flight params are valid
+ # duration
+ if flight_duration_reqself._flight_duration_max:
+ flight_duration_req=self._flight_duration_max
+ # apex height
+ if flight_apex_reqself._step_height_max:
+ flight_apex_req=self._step_height_max
+ # landing height
+ if flight_enddh_reqself._dh_max:
+ flight_enddh_req=self._dh_max
+ # landing dx
+ if flight_land_dx_reqself._land_dx_max:
+ flight_land_dx_req=self._land_dx_max
+ # landing dy
+ if flight_land_dy_reqself._land_dy_max:
+ flight_land_dy_req=self._land_dy_max
+
+ land_dx_w = flight_land_dx_req
+ land_dy_w = flight_land_dy_req
+ if self._xypos_task_found or self._xyvel_task_found:
+ # landing dx/dy are specified in horizontal frame; rotate into world aligned frame
+ q_base = robot_q[3:7]
+ if q_base.ndim == 1:
+ q_base = q_base.reshape(-1, 1)
+ q_w = q_base[3, 0]
+ q_x = q_base[0, 0]
+ q_y = q_base[1, 0]
+ q_z = q_base[2, 0]
+ r11 = 1 - 2 * (q_y * q_y + q_z * q_z)
+ r21 = 2 * (q_x * q_y + q_z * q_w)
+ norm = np.hypot(r11, r21)
+ if norm > 0.0:
+ cos_yaw = r11 / norm
+ sin_yaw = r21 / norm
+ else:
+ cos_yaw = 1.0
+ sin_yaw = 0.
+
+ land_dx_w = flight_land_dx_req * cos_yaw - flight_land_dy_req * sin_yaw
+ land_dy_w = flight_land_dx_req * sin_yaw + flight_land_dy_req * cos_yaw
+
+ if self._ref_vtrjs[contact_name] is not None and \
+ self._ref_trjs[contact_name] is not None: # only allow one mode (pos/velocity traj)
+ Journal.log(self.__class__.__name__,
+ "add_flight",
+ f"Both pos and vel traj for contact {contact_name} are not None! This is not allowed, aborting.",
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+ # inject pos traj if pos mode
+ if self._ref_trjs[contact_name] is not None:
+ # recompute trajectory online (just needed if using pos traj)
+ foot_pos=self._fk_contacts[contact_name](q=robot_q)['ee_pos'].elements()
+ starting_pos=foot_pos[2] # compute foot traj (local world aligned)
+ starting_x_pos=foot_pos[0]
+ starting_y_pos=foot_pos[1]
+ # starting_pos=0.0
+ self._ref_trjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.from_derivatives(
+ flight_duration_req,
+ p_start=starting_pos,
+ p_goal=starting_pos+flight_enddh_req,
+ clearance=flight_apex_req,
+ derivatives=self._traj_der,
+ second_der=self._traj_second_der,
+ third_der=self._third_traj_der)
+ )
+ if self._xypos_task_found: # we use _ref_trjs to write xy pos references
+ self._ref_trjs[contact_name][0, -1]=starting_x_pos+land_dx_w
+ self._ref_trjs[contact_name][1, -1]=starting_y_pos+land_dy_w
+
+ for i in range(flight_duration_req):
+ res, phase_token_flight=timeline.addPhase(self._flight_phases[contact_name],
+ pos=self._injection_node+i,
+ absolute_position=True)
+ phase_token_flight.setItemReference(f'z_{contact_name}',
+ self._ref_trjs[contact_name][:, i])
+
+ if self._touchdown_phases[contact_name] is not None:
+ # add touchdown phase after flight
+ res, phase_token_touchdown=timeline.addPhase(self._touchdown_phases[contact_name],
+ pos=self._injection_node+flight_duration_req,
+ absolute_position=True)
+ if self._xypos_task_found:
+ phase_token_touchdown.setItemReference(f'xy_{contact_name}',
+ self._ref_trjs[contact_name][:, -1])
+
+ # inject vel traj if vel mode
+ if self._ref_vtrjs[contact_name] is not None:
+ self._ref_vtrjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.derivative_of_trajectory(
+ flight_duration_req,
+ p_start=0.0,
+ p_goal=flight_enddh_req,
+ clearance=flight_apex_req,
+ derivatives=self._traj_der,
+ second_der=self._traj_second_der,
+ third_der=self._third_traj_der))
+ if self._xyvel_task_found: # compute vel reference using problem dt and flight length
+ flight_duration_sec=float(flight_duration_req)*self._dt
+ self._ref_vtrjs[contact_name][0, 0:flight_duration_req]=land_dx_w/flight_duration_sec
+ self._ref_vtrjs[contact_name][1, 0:flight_duration_req]=land_dy_w/flight_duration_sec
+
+ for i in range(flight_duration_req):
+ res, phase_token=timeline.addPhase(self._flight_phases[contact_name],
+ pos=self._injection_node+i,
+ absolute_position=True)
+ phase_token.setItemReference(f'vz_{contact_name}',
+ self._ref_vtrjs[contact_name][2:3, i:i+1])
+ if self._touchdown_phases[contact_name] is not None:
+ # add touchdown phase for forcing vertical landing
+ res, phase_token=timeline.addPhase(self._touchdown_phases[contact_name],
+ pos=self._injection_node+flight_duration_req,
+ absolute_position=True)
+
+ if timeline.getEmptyNodes() > 0: # fill empty nodes at the end of the horizon, if any, with stance
+ timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short'))
+
+ def update(self):
+ self._phase_manager.update()
+
+ def update_flight_info(self, timeline_name):
+
+ # get current position and remaining duration of flight phases over the horizon for a single contact
+
+ # phase indexes over timeline
+ phase_idxs=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])
+
+ if not len(phase_idxs)==0: # at least one flight phase on horizon -> read info from timeline
+
+ # all active phases on timeline
+ active_phases=self._contact_timelines[timeline_name].getActivePhases()
+
+ phase_idx_start=phase_idxs[0]
+ # active_nodes_start=active_phases[phase_idx_start].getActiveNodes()
+ pos_start=active_phases[phase_idx_start].getPosition()
+ # n_nodes_start=active_phases[phase_idx_start].getNNodes()
+
+ phase_idx_last=phase_idxs[-1] # just get info for last phase on the horizon
+ # active_nodes_last=active_phases[phase_idx_last].getActiveNodes()
+ pos_last=active_phases[phase_idx_last].getPosition()
+ # n_nodes_last=active_phases[phase_idx_last].getNNodes()
+
+ # write to info
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts]=pos_last
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts]=pos_last - pos_start
+
+ return True
+
+ return False
+
+ def get_flight_info(self, timeline_name):
+ return (self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts],
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts],
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+2*self.n_contacts],
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+3*self.n_contacts],
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+4*self.n_contacts],
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+5*self.n_contacts],
+ self._flight_info_now[self._name_to_idx_map[timeline_name]+6*self.n_contacts])
+
+ def get_flight_info_all(self):
+ return self._flight_info_now
+
+ def set_ref_pos(self,
+ timeline_name:str,
+ ref_height: np.array = None,
+ threshold: float = 0.05):
+
+ if ref_height is not None:
+ self._ref_trjs[timeline_name][2, :]=ref_height
+ if ref_height>threshold:
+ self.add_flight(timeline_name=timeline_name)
+ this_flight_token_idx=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])[-1]
+ active_phases=self._contact_timelines[timeline_name].getActivePhases()
+ active_phases[this_flight_token_idx].setItemReference(f'z_{timeline_name}',
+ self._ref_trjs[timeline_name])
+ else:
+ self.add_stand(timeline_name=timeline_name)
+
+ def set_force_feedback(self,
+ timeline_name: str,
+ force_norm: float):
+
+ flight_tokens=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])
+ contact_tokens=self._contact_phases[timeline_name].getPhaseIdx(self._contact_phases[timeline_name])
+ if not len(flight_tokens)==0:
+ first_flight=flight_tokens[0]
+ first_flight
+
+ def check_horizon_full(self,
+ timeline_name):
+ timeline = self._contact_timelines[timeline_name]
+ if timeline.getEmptyNodes() > 0:
+ error = f"Empty nodes detected over the horizon for timeline {timeline}! Make sure to fill the whole horizon with valid phases!!"
+ Journal.log(self.__class__.__name__,
+ "check_horizon_full",
+ error,
+ LogType.EXCEP,
+ throw_when_excep = True)
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py
new file mode 100644
index 0000000000000000000000000000000000000000..991636dd18679c11050f97d3c47dd7df2d97eb5e
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py
@@ -0,0 +1,18 @@
+# robot modeling and automatic differentiation
+import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn
+import casadi as cs
+
+# horizon stuff
+import horizon.utils.kin_dyn as kd
+from horizon.problem import Problem
+from horizon.rhc.model_description import FullModelInverseDynamics
+from horizon.rhc.taskInterface import TaskInterface
+from horizon.rhc.tasks.interactionTask import VertexContact
+from horizon.rhc.tasks.contactTask import ContactTask
+from horizon.utils import trajectoryGenerator, utils
+
+# phase managing
+import phase_manager.pymanager as pymanager
+import phase_manager.pyphase as pyphase
+import phase_manager.pytimeline as pytimeline
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py
new file mode 100644
index 0000000000000000000000000000000000000000..679e7d460ab545b0bd2f741904b5ac8af893f897
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py
@@ -0,0 +1,28 @@
+"""
+Dynamically import all necessary Horizon and related dependencies.
+This function is intended to be used within the import_child_lib method
+of a class, enabling the parent process to load all required libraries.
+"""
+def import_horizon_global():
+ # Global imports to make modules accessible in child processes
+ global casadi_kin_dyn, cs, kd, Problem, FullModelInverseDynamics
+ global TaskInterface, VertexContact, ContactTask, trajectoryGenerator, utils
+ global pymanager, pyphase, pytimeline
+
+ # robot modeling and automatic differentiation
+ import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn
+ import casadi as cs
+
+ # horizon stuff
+ import horizon.utils.kin_dyn as kd
+ from horizon.problem import Problem
+ from horizon.rhc.model_description import FullModelInverseDynamics
+ from horizon.rhc.taskInterface import TaskInterface
+ from horizon.rhc.tasks.interactionTask import VertexContact
+ from horizon.rhc.tasks.contactTask import ContactTask
+ from horizon.utils import trajectoryGenerator, utils
+
+ # phase managing
+ import phase_manager.pymanager as pymanager
+ import phase_manager.pyphase as pyphase
+ import phase_manager.pytimeline as pytimeline
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py
@@ -0,0 +1,1324 @@
+from mpc_hive.controllers.rhc import RHController
+
+from aug_mpc.controllers.rhc.horizon_based.horizon_imports import *
+
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs
+from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+import numpy as np
+
+import os
+
+import time
+
+from typing import Dict, List
+
+class HybridQuadRhc(RHController):
+
+ def __init__(self,
+ srdf_path: str,
+ urdf_path: str,
+ config_path: str,
+ robot_name: str, # used for shared memory namespaces
+ codegen_dir: str,
+ n_nodes:float = 25,
+ injection_node:int = 10,
+ dt: float = 0.02,
+ max_solver_iter = 1, # defaults to rt-iteration
+ open_loop: bool = True,
+ close_loop_all: bool = False,
+ dtype = np.float32,
+ verbose = False,
+ debug = False,
+ refs_in_hor_frame = True,
+ timeout_ms: int = 60000,
+ custom_opts: Dict = {}):
+
+ self._refs_in_hor_frame = refs_in_hor_frame
+
+ self._injection_node = injection_node
+
+ self._open_loop = open_loop
+ self._close_loop_all = close_loop_all
+
+ self._codegen_dir = codegen_dir
+ if not os.path.exists(self._codegen_dir):
+ os.makedirs(self._codegen_dir)
+ # else:
+ # # Directory already exists, delete it and recreate
+ # shutil.rmtree(self._codegen_dir)
+ # os.makedirs(self._codegen_dir)
+
+ self.step_counter = 0
+ self.sol_counter = 0
+
+ self.max_solver_iter = max_solver_iter
+
+ self._timer_start = time.perf_counter()
+ self._prb_update_time = time.perf_counter()
+ self._phase_shift_time = time.perf_counter()
+ self._task_ref_update_time = time.perf_counter()
+ self._rti_time = time.perf_counter()
+
+ self.robot_name = robot_name
+
+ self.config_path = config_path
+
+ self.urdf_path = urdf_path
+ # read urdf and srdf files
+ with open(self.urdf_path, 'r') as file:
+ self.urdf = file.read()
+ self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
+
+ self._c_timelines = dict()
+ self._f_reg_timelines = dict()
+
+ self._custom_opts={"replace_continuous_joints": False,
+ "use_force_feedback": False,
+ "lin_a_feedback": False,
+ "is_open_loop": self._open_loop, # fully open (just for db)
+ "fully_closed": False, # closed loop with full feedback (just for db)
+ "closed_partial": False, # closed loop with partial feedback
+ "adaptive_is": True, # closed loop with adaptation
+ "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase
+ "alpha_from_outside": False, # alpha set ext. from shared memory
+ "alpha_half": 1.0,
+ "only_vel_wheels": True, # whether wheels (if present) are just vel controlled
+ "use_jnt_v_feedback": False
+ }
+
+ self._custom_opts.update(custom_opts)
+
+ self._alpha_half=self._custom_opts["alpha_half"]
+
+ if self._open_loop:
+ self._custom_opts["fully_closed"]=False
+ self._custom_opts["adaptive_is"]=False
+ self._custom_opts["closed_partial"]=False
+ else:
+ self._custom_opts["is_open_loop"]=False
+ if self._custom_opts["fully_closed"]:
+ self._custom_opts["adaptive_is"]=False
+ self._custom_opts["closed_partial"]=False
+ self._custom_opts["lin_a_feedback"]=False
+ if self._custom_opts["closed_partial"]:
+ self._custom_opts["adaptive_is"]=False
+ self._custom_opts["fully_closed"]=False
+ self._custom_opts["lin_a_feedback"]=False
+ if self._custom_opts["adaptive_is"]:
+ self._custom_opts["closed_partial"]=False
+ self._custom_opts["fully_closed"]=False
+
+ super().__init__(srdf_path=srdf_path,
+ n_nodes=n_nodes,
+ dt=dt,
+ namespace=self.robot_name,
+ dtype=dtype,
+ verbose=verbose,
+ debug=debug,
+ timeout_ms=timeout_ms)
+
+ self._rhc_fpaths.append(self.config_path)
+
+ def _config_override(self):
+ pass
+
+ def _post_problem_init(self):
+
+ self.rhc_costs={}
+ self.rhc_constr={}
+
+ self._fail_idx_scale=0.0
+ self._expl_idx_window_size=int(1*self._n_nodes)
+ self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size))
+ self._expl_idx_counter=0
+ self._expl_idx_buffer_counter=0
+
+ self._pred_node_idx=self._n_nodes-1
+
+ self._nq=self.nq()
+ self._nq_jnts=self._nq-7# assuming floating base
+ self._nv=self.nv()
+ self._nv_jnts=self._nv-6
+
+ self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype)
+ self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype)
+ self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype)
+ self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype)
+ self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype)
+ self._alphas_q_root[:, :]=1.0 # default to all open
+ self._alphas_q_jnts[:, :]=1.0
+ self._alphas_twist_root[:, :]=1.0
+ self._alphas_v_jnts[:, :]=1.0
+ self._alphas_a[:, :]=1.0
+
+ def _init_problem(self,
+ fixed_jnt_patterns: List[str] = None,
+ wheels_patterns: List[str] = None,
+ foot_linkname: str = None,
+ flight_duration: int = 10,
+ post_flight_stance: int = 3,
+ step_height: float = 0.12,
+ keep_yaw_vert: bool = False,
+ yaw_vertical_weight: float = 2.0,
+ vertical_landing: bool = False,
+ vertical_land_weight: float = 1.0,
+ phase_force_reg: float = 1e-2,
+ vel_bounds_weight: float = 1.0):
+
+ self._fixed_jnt_patterns=fixed_jnt_patterns
+
+ self._config_override()
+
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f" Will use horizon config file at {self.config_path}",
+ LogType.INFO,
+ throw_when_excep=True)
+
+ self._vel_bounds_weight=vel_bounds_weight
+ self._phase_force_reg=phase_force_reg
+ self._yaw_vertical_weight=yaw_vertical_weight
+ self._vertical_land_weight=vertical_land_weight
+ # overrides parent
+ self._prb = Problem(self._n_intervals,
+ receding=True,
+ casadi_type=cs.SX)
+ self._prb.setDt(self._dt)
+
+ if "replace_continuous_joints" in self._custom_opts:
+ # continous joints are parametrized in So2
+ if self._custom_opts["replace_continuous_joints"]:
+ self.urdf = self.urdf.replace('continuous', 'revolute')
+ else:
+ self.urdf = self.urdf.replace('continuous', 'revolute')
+
+ self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names
+ self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+
+ self._init_robot_homer()
+
+ # handle fixed joints
+ fixed_joint_map={}
+ if self._fixed_jnt_patterns is not None:
+ for jnt_name in self._get_robot_jnt_names():
+ for fixed_jnt_pattern in self._fixed_jnt_patterns:
+ if fixed_jnt_pattern in jnt_name:
+ fixed_joint_map.update({f"{jnt_name}":
+ self._homer.get_homing_val(jnt_name=jnt_name)})
+ break # do not search for other pattern matches
+
+ if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f"Will fix following joints: \n{str(fixed_joint_map)}",
+ LogType.INFO,
+ throw_when_excep=True)
+ # with the fixed joint map
+ self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map)
+ # assign again controlled joints names
+ self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+ # updated robot homer for controlled joints
+ self._init_robot_homer()
+
+ # handle continuous joints (need to change homing and retrieve
+ # cont jnts indexes) and homing
+ self._continuous_joints=self._get_continuous_jnt_names()
+ # reduced
+ self._continuous_joints_idxs=[]
+ self._continuous_joints_idxs_cos=[]
+ self._continuous_joints_idxs_sin=[]
+ self._continuous_joints_idxs_red=[]
+ self._rev_joints_idxs=[]
+ self._rev_joints_idxs_red=[]
+ # qfull
+ self._continuous_joints_idxs_qfull=[]
+ self._continuous_joints_idxs_cos_qfull=[]
+ self._continuous_joints_idxs_sin_qfull=[]
+ self._continuous_joints_idxs_red_qfull=[]
+ self._rev_joints_idxs_qfull=[]
+ self._rev_joints_idxs_red_qfull=[]
+ jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints))
+ jnt_names=self._get_robot_jnt_names()
+ for i in range(len(jnt_names)):
+ jnt=jnt_names[i]
+ index=self._get_jnt_id(jnt)# accounting for floating joint
+ homing_idx=index-7 # homing is only for actuated joints
+ homing_value=self._homer.get_homing_val(jnt)
+ if jnt in self._continuous_joints:
+ jnt_homing[homing_idx]=np.cos(homing_value).item()
+ jnt_homing[homing_idx+1]=np.sin(homing_value).item()
+ # just actuated joints
+ self._continuous_joints_idxs.append(homing_idx) # cos
+ self._continuous_joints_idxs.append(homing_idx+1) # sin
+ self._continuous_joints_idxs_cos.append(homing_idx)
+ self._continuous_joints_idxs_sin.append(homing_idx+1)
+ self._continuous_joints_idxs_red.append(i)
+ # q full
+ self._continuous_joints_idxs_qfull.append(index) # cos
+ self._continuous_joints_idxs_qfull.append(index+1) # sin
+ self._continuous_joints_idxs_cos_qfull.append(index)
+ self._continuous_joints_idxs_sin_qfull.append(index+1)
+ self._continuous_joints_idxs_red_qfull.append(i+7)
+ else:
+ jnt_homing[homing_idx]=homing_value
+ # just actuated joints
+ self._rev_joints_idxs.append(homing_idx)
+ self._rev_joints_idxs_red.append(i)
+ # q full
+ self._rev_joints_idxs_qfull.append(index)
+ self._rev_joints_idxs_red_qfull.append(i+7)
+
+ self._jnts_q_reduced=None
+ if not len(self._continuous_joints)==0:
+ cont_joints=", ".join(self._continuous_joints)
+
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f"The following continuous joints were found: \n{cont_joints}",
+ LogType.INFO,
+ throw_when_excep=True)
+ # preallocating data
+ self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype)
+ self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+ self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype)
+ self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+ else:
+ self._custom_opts["replace_continuous_joints"]=True
+ Journal.log(self.__class__.__name__,
+ "_init_problem",
+ f"No continuous joints were found.",
+ LogType.INFO,
+ throw_when_excep=True)
+
+ # retrieve wheels indexes (not considering continuous joints,
+ # ok just for v, eff, etc..)
+ self._wheel_patterns=wheels_patterns
+ self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns)
+ self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81]
+
+ # we can create an init for the base
+ init = self._base_init.tolist() + jnt_homing
+
+ if foot_linkname is not None:
+ FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height
+ ground_level = FK(q=init)['ee_pos']
+ self._base_init[2] = -ground_level[2] # override init
+
+ self._model = FullModelInverseDynamics(problem=self._prb,
+ kd=self._kin_dyn,
+ q_init=self._homer.get_homing_map(),
+ base_init=self._base_init)
+
+ self._ti = TaskInterface(prb=self._prb,
+ model=self._model,
+ max_solver_iter=self.max_solver_iter,
+ debug = self._debug,
+ verbose = self._verbose,
+ codegen_workdir = self._codegen_dir)
+ self._ti.setTaskFromYaml(self.config_path)
+
+ # setting initial base pos ref if exists
+ base_pos = self._ti.getTask('base_height')
+ if base_pos is not None:
+ base_pos.setRef(np.atleast_2d(self._base_init).T)
+
+ self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes?????
+
+ self._gm = GaitManager(self._ti,
+ self._pm,
+ self._injection_node,
+ keep_yaw_vert=keep_yaw_vert,
+ yaw_vertical_weight=self._yaw_vertical_weight,
+ vertical_landing=vertical_landing,
+ landing_vert_weight=self._vertical_land_weight,
+ phase_force_reg=self._phase_force_reg,
+ custom_opts=self._custom_opts,
+ flight_duration=flight_duration,
+ post_flight_stance=post_flight_stance,
+ step_height=step_height,
+ dh=0.0)
+
+ self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0)
+ self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0)
+ self._ti.model.q.setInitialGuess(self._ti.model.q0)
+ self._ti.model.v.setInitialGuess(self._ti.model.v0)
+ for _, cforces in self._ti.model.cmap.items():
+ n_contact_f=len(cforces)
+ for c in cforces:
+ c.setInitialGuess(np.array(self._f0)/n_contact_f)
+
+ vel_lims = self._model.kd.velocityLimits()
+ import horizon.utils as utils
+ self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:]))
+ self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:]))
+
+ self._meas_lin_a_par=None
+ # if self._custom_opts["lin_a_feedback"]:
+ # # acceleration feedback on first node
+ # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback",
+ # dim=3, nodes=0)
+ # base_lin_a_prb=self._prb.getInput().getVars()[0:3]
+ # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par,
+ # nodes=[0])
+
+ # if not self._open_loop:
+ # # we create a residual cost to be used as an attractor to the measured state on the first node
+ # # hard constraints injecting meas. states are pure EVIL!
+ # prb_state=self._prb.getState()
+ # full_state=prb_state.getVars()
+ # state_dim=prb_state.getBounds()[0].shape[0]
+ # meas_state=self._prb.createParameter(name="measured_state",
+ # dim=state_dim, nodes=0)
+ # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state),
+ # nodes=[0])
+
+ self._ti.finalize()
+ self._ti.bootstrap()
+
+ self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes
+ self._ti.load_initial_guess()
+
+ self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we
+ # know n_dofs -> we assign it (by default = None)
+
+ self.n_contacts = len(self._model.cmap.keys())
+
+ # remove variables bounds (before they were necessary to have a good
+ # quality bootstrap solution)
+ self._q_inf=np.zeros((self.nq(), 1))
+ self._q_inf[:, :]=np.inf
+ self._v_inf=np.zeros((self.nv(), 1))
+ self._v_inf[:, :]=np.inf
+ self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0)
+ self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0)
+
+ # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb)
+
+ def get_file_paths(self):
+ # can be overriden by child
+ paths = super().get_file_paths()
+ return paths
+
+ def _get_quat_remap(self):
+ # overrides parent
+ return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention
+
+ def _zmp(self, model):
+
+ num = cs.SX([0, 0])
+ den = cs.SX([0])
+ pos_contact = dict()
+ force_val = dict()
+
+ q = cs.SX.sym('q', model.nq)
+ v = cs.SX.sym('v', model.nv)
+ a = cs.SX.sym('a', model.nv)
+
+ com = model.kd.centerOfMass()(q=q, v=v, a=a)['com']
+
+ n = cs.SX([0, 0, 1])
+ for c in model.fmap.keys():
+ pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos']
+ force_val[c] = cs.SX.sym('force_val', 3)
+ num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n)
+ den += cs.dot(force_val[c], n)
+
+ zmp = com[0:2] + (num / den)
+ input_list = []
+ input_list.append(q)
+ input_list.append(v)
+ input_list.append(a)
+
+ for elem in force_val.values():
+ input_list.append(elem)
+
+ f = cs.Function('zmp', input_list, [zmp])
+ return f
+
+ def _add_zmp(self):
+
+ input_zmp = []
+
+ input_zmp.append(self._model.q)
+ input_zmp.append(self._model.v)
+ input_zmp.append(self._model.a)
+
+ for f_var in self._model.fmap.values():
+ input_zmp.append(f_var)
+
+ c_mean = cs.SX([0, 0, 0])
+ for c_name, f_var in self._model.fmap.items():
+ fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos']
+ c_mean += fk_c_pos
+
+ c_mean /= len(self._model.cmap.keys())
+
+ zmp_nominal_weight = 10.
+ zmp_fun = self._zmp(self._model)(*input_zmp)
+
+ if 'wheel_joint_1' in self._model.kd.joint_names():
+ zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2]))
+
+ def _quaternion_multiply(self,
+ q1, q2):
+ x1, y1, z1, w1 = q1
+ x2, y2, z2, w2 = q2
+ w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+ x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+ y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+ z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+ return np.array([x, y, z, w])
+
+ def _get_continuous_jnt_names(self):
+ import xml.etree.ElementTree as ET
+ root = ET.fromstring(self.urdf)
+ continuous_joints = []
+ for joint in root.findall('joint'):
+ joint_type = joint.get('type')
+ if joint_type == 'continuous':
+ joint_name = joint.get('name')
+ continuous_joints.append(joint_name)
+ return continuous_joints
+
+ def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]):
+ jnt_names=self._get_robot_jnt_names()
+ wheels_idxs=[]
+ for i in range(len(jnt_names)):
+ jnt_name=jnt_names[i]
+ for wheel_pattern in wheel_patterns:
+ if wheel_pattern in jnt_name:
+ wheels_idxs.append(i)
+ break
+ return wheels_idxs
+
+ def _get_jnt_id(self, jnt_name):
+ return self._kin_dyn.joint_iq(jnt_name)
+
+ def _init_rhc_task_cmds(self):
+
+ rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm,
+ robot_index_shm=self.controller_index,
+ robot_index_view=0, # when using optimize_mem the view if always of shape 1x...
+ namespace=self.namespace,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ use_force_feedback=self._custom_opts["use_force_feedback"],
+ optimize_mem=True)
+
+ rhc_refs.run()
+
+ rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller)
+ rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap())
+
+ rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3],
+ q_ref=np.atleast_2d(self._base_init)[:, 3:7])
+
+ return rhc_refs
+
+ def get_vertex_fnames_from_ti(self):
+ tasks=self._ti.task_list
+ contact_f_names=[]
+ for task in tasks:
+ if isinstance(task, ContactTask):
+ interaction_task=task.dynamics_tasks[0]
+ contact_f_names.append(interaction_task.vertex_frames[0])
+ return contact_f_names
+
+ def _get_contact_names(self):
+ # should get contact names from vertex frames
+ # list(self._ti.model.cmap.keys())
+ return self.get_vertex_fnames_from_ti()
+
+ def _get_robot_jnt_names(self):
+
+ joints_names = self._kin_dyn.joint_names()
+ to_be_removed = ["universe",
+ "reference",
+ "world",
+ "floating",
+ "floating_base"]
+ for name in to_be_removed:
+ if name in joints_names:
+ joints_names.remove(name)
+
+ return joints_names
+
+ def _get_ndofs(self):
+ return len(self._model.joint_names)
+
+ def nq(self):
+ return self._kin_dyn.nq()
+
+ def nv(self):
+ return self._kin_dyn.nv()
+
+ def _get_robot_mass(self):
+
+ return self._kin_dyn.mass()
+
+ def _get_root_full_q_from_sol(self, node_idx=1):
+
+ root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype)
+
+ np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False)
+ np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full)
+
+ return root_q_full
+
+ def _get_full_q_from_sol(self, node_idx=1):
+
+ return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype)
+
+ def _get_root_twist_from_sol(self, node_idx=1):
+ # provided in world frame
+ twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6)
+ # if world_aligned:
+ # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+ # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+ # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3]
+ # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6]
+ return twist_base_local
+
+ def _get_root_a_from_sol(self, node_idx=0):
+ # provided in world frame
+ a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6)
+ # if world_aligned:
+ # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+ # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+ # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3]
+ # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6]
+ return a_base_local
+
+ def _get_jnt_q_from_sol(self, node_idx=0,
+ reduce: bool = True,
+ clamp: bool = True):
+
+ full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype)
+
+ np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place
+ np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place
+
+ if self._custom_opts["replace_continuous_joints"] or (not reduce):
+ if clamp:
+ return np.fmod(full_jnts_q, 2*np.pi)
+ else:
+ return full_jnts_q
+ else:
+ cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2)
+ # copy rev joint vals
+ self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1)
+ # and continuous
+ self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1)
+ return self._jnts_q_reduced
+
+ def _get_jnt_v_from_sol(self, node_idx=1):
+
+ jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1,
+ self._nv_jnts)
+ np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place
+ # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place
+
+ return jnt_v_sol
+
+ def _get_jnt_a_from_sol(self, node_idx=1):
+
+ return self._get_a_from_sol()[6:, node_idx].reshape(1,
+ self._nv_jnts)
+
+ def _get_jnt_eff_from_sol(self, node_idx=0):
+
+ efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx)
+
+ # if self._custom_opts["only_vel_wheels"]:
+
+ jnt_efforts=efforts_on_node[6:, 0]
+
+ if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v:
+ jnt_efforts[self._wheels_idxs_v]=0.0
+
+ return jnt_efforts.reshape(1,
+ self._nv_jnts).astype(self._dtype)
+
+ def _get_rhc_cost(self):
+
+ return self._ti.solution["opt_cost"]
+
+ def _get_rhc_constr_viol(self):
+
+ return self._ti.solution["residual_norm"]
+
+ def _get_rhc_nodes_cost(self):
+
+ cost = self._ti.solver_rti.getCostValOnNodes()
+ return cost.reshape((1, -1))
+
+ def _get_rhc_nodes_constr_viol(self):
+
+ constr_viol = self._ti.solver_rti.getConstrValOnNodes()
+ return constr_viol.reshape((1, -1))
+
+ def _get_rhc_niter_to_sol(self):
+
+ return self._ti.solution["n_iter2sol"]
+
+ def _set_ig_bootstrap(self,
+ q_state: np.ndarray = None,
+ v_state: np.ndarray = None):
+
+ xig = self._ti.solution['x_opt'].copy()
+ uig = self._ti.solution['u_opt'].copy()
+
+ # Normalize and keep quaternion in the same hemisphere as the previous
+ # solution to avoid artificial 180-deg jumps in the bootstrap warm start.
+ q_state_boot = q_state.copy()
+ q_prev = xig[3:7, 0]
+ q_now = q_state_boot[3:7, 0]
+
+ q_now_norm = np.linalg.norm(q_now)
+ if q_now_norm > 1e-9:
+ q_state_boot[3:7, :] /= q_now_norm
+ else:
+ q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype)
+
+ q_prev_norm = np.linalg.norm(q_prev)
+ if q_prev_norm > 1e-9:
+ q_prev = q_prev / q_prev_norm
+
+ q_now = q_state_boot[3:7, 0]
+ if np.dot(q_prev, q_now) < 0.0:
+ q_state_boot[3:7, :] *= -1.0
+
+ xig[0:self._nq, :] = q_state_boot
+ xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes
+ uig[0:self._nv, :]=0.0 # 0 acceleration
+
+ # assigning ig
+ self._prb.getState().setInitialGuess(xig)
+ self._prb.getInput().setInitialGuess(uig)
+ # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype))
+ for _, cforces in self._ti.model.cmap.items():
+ n_contact_f = len(cforces)
+ if n_contact_f == 0:
+ continue
+ f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f
+ for c in cforces:
+ c.setInitialGuess(f_guess)
+
+ # print("initial guesses")
+ # print(self._nq)
+ # print(self._nv)
+ # print("q")
+ # qig=self._ti.model.q.getInitialGuess()
+ # print(qig.shape)
+ # print(qig)
+ # print("v")
+ # print(self._ti.model.v.getInitialGuess())
+ # print("a")
+ # print(self._ti.model.a.getInitialGuess())
+ # for _, cforces in self._ti.model.cmap.items():
+ # for c in cforces:
+ # print("force")
+ # print(c.getInitialGuess())
+
+ return xig, uig
+
+ def _set_ig(self):
+
+ shift_num = -1 # shift data by one node
+
+ x_opt = self._ti.solution['x_opt']
+ u_opt = self._ti.solution['u_opt']
+
+ # building ig for state
+ xig = np.roll(x_opt,
+ shift_num, axis=1) # rolling state sol.
+ for i in range(abs(shift_num)):
+ # state on last node is copied to the elements
+ # which are "lost" during the shift operation
+ xig[:, -1 - i] = x_opt[:, -1]
+ # building ig for inputs
+ uig = np.roll(u_opt,
+ shift_num, axis=1) # rolling state sol.
+ for i in range(abs(shift_num)):
+ # state on last node is copied to the elements
+ # which are "lost" during the shift operation
+ uig[:, -1 - i] = u_opt[:, -1]
+
+ # assigning ig
+ self._prb.getState().setInitialGuess(xig)
+ self._prb.getInput().setInitialGuess(uig)
+
+ return xig, uig
+
+ def _update_open_loop(self,
+ bootstrap: bool = False):
+
+ q_state, v_state, a_state=self._set_is_open()
+
+ if not bootstrap:
+ self._set_ig()
+ else:
+ self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+ # robot_state=xig[:, 0]
+ # # open loop update:
+ # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0
+ # # is node 1 in the last opt solution)
+
+ return q_state, v_state, a_state
+
+ def _update_closed_loop(self,
+ bootstrap: bool = False):
+
+ # set initial state
+ q_state=None
+ v_state=None
+ a_state=None
+ if self._custom_opts["adaptive_is"]:
+ # adaptive closed loop
+ q_state, v_state, a_state=self._set_is_adaptive()
+ elif self._custom_opts["fully_closed"]:
+ q_state, v_state, a_state=self._set_is_full()
+ elif self._custom_opts["closed_partial"]:
+ q_state, v_state, a_state=self._set_is_partial()
+ else:
+ Journal.log(self.__class__.__name__,
+ "_update_closed_loop",
+ "Neither adaptive_is, fully_closed, or closed_partial.",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ q_state, v_state, a_state=self._set_is()
+
+ # set initial guess for controller
+ if not bootstrap:
+ self._set_ig()
+ else:
+ self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+ return q_state, v_state, a_state
+
+ def _set_is_open(self):
+
+ # overriding states with rhc data
+ q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1)
+
+ twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1)
+ v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_q_full_rhc=q[0:7] # root full q
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_twist_rhc=vel[0:6] # lin v.
+ jnts_v_rhc=vel[6:] # jnts v
+
+ self.rhc_refs.set_alpha(alpha=1.0) # fully open
+
+ # close state on known quantities
+ root_q_full_rhc.setBounds(lb=q_full_root,
+ ub=q_full_root, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts,
+ ub=q_jnts, nodes=0)
+ root_twist_rhc.setBounds(lb=twist_root,
+ ub=twist_root, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts,
+ ub=v_jnts, nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((q_full_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((twist_root, v_jnts),
+ axis=0)
+
+ return (q_state, v_state, None)
+
+ def _set_is_full(self):
+
+ # measurements
+ q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+ self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+ self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+ self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+ q_jnts=self._jnts_q_expanded.reshape(-1,1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_full_q_rhc=q[0:7] # root p
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_v_rhc=vel[0:3] # lin v.
+ root_omega_rhc=vel[3:6] # omega
+ jnts_v_rhc=vel[6:] # jnts v
+ acc=self._prb.getVariables("a")
+ lin_a_prb=acc[0:3] # lin acc
+
+ self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+ root_full_q_rhc.setBounds(lb=q_full_root,
+ ub=q_full_root, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts,
+ ub=q_jnts, nodes=0)
+ root_v_rhc.setBounds(lb=v_root,
+ ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints)
+ root_omega_rhc.setBounds(lb=omega,
+ ub=omega, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts,
+ ub=v_jnts, nodes=0)
+ if self._custom_opts["lin_a_feedback"]:
+ # write base lin 13793197 from meas
+ lin_a_prb.setBounds(lb=a_root[0:3, :],
+ ub=a_root[0:3, :],
+ nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((q_full_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((v_root, omega, v_jnts),
+ axis=0)
+ a_state=np.concatenate((a_root, a_jnts),
+ axis=0)
+
+ return (q_state, v_state, a_state)
+
+ def _set_is_partial(self):
+
+ # measurements
+ p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+ self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+ self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+ self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+ q_jnts=self._jnts_q_expanded.reshape(-1,1)
+
+ # overriding states with rhc data (-> all overridden state are open looop)
+ root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+ p_root[:, :]=root_p_from_rhc # position is always open loop
+
+ if not self._custom_opts["estimate_v_root"]:
+ v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1)
+ # override v jnts with the ones from controller
+ if not self._custom_opts["use_jnt_v_feedback"]:
+ v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+ # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+ # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1)
+ # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1)
+ # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1)
+ # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1)
+ # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_p_rhc=q[0:3] # root p
+ root_q_rhc=q[3:7] # root orientation
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_v_rhc=vel[0:3] # lin v.
+ root_omega_rhc=vel[3:6] # omega
+ jnts_v_rhc=vel[6:] # jnts v
+ acc=self._prb.getVariables("a")
+ lin_a_prb=acc[0:3] # lin acc
+
+ self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+ root_p_rhc.setBounds(lb=p_root,
+ ub=p_root, nodes=0)
+ root_q_rhc.setBounds(lb=q_root,
+ ub=q_root, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts,
+ ub=q_jnts, nodes=0)
+
+ if self._custom_opts["estimate_v_root"]:
+ root_v_rhc.setBounds(lb=-self._v_inf[0:3],
+ ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints)
+ else: # get it from controller
+ root_v_rhc.setBounds(lb=v_root,
+ ub=v_root, nodes=0)
+ root_omega_rhc.setBounds(lb=omega,
+ ub=omega, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts,
+ ub=v_jnts, nodes=0)
+ if self._custom_opts["lin_a_feedback"]:
+ # write base lin 13793197 from meas
+ lin_a_prb.setBounds(lb=a_root[0:3, :],
+ ub=a_root[0:3, :],
+ nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((p_root, q_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((v_root, omega, v_jnts),
+ axis=0)
+ a_state=np.concatenate((a_root, a_jnts),
+ axis=0)
+
+ return (q_state, v_state, a_state)
+
+ def _set_is_adaptive(self):
+
+ # measurements
+ p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ # rhc variables to be set
+ q=self._prb.getVariables("q") # .setBounds()
+ root_p_rhc=q[0:3] # root p
+ root_q_rhc=q[3:7] # root orientation
+ jnts_q_rhc=q[7:] # jnts q
+ vel=self._prb.getVariables("v")
+ root_v_rhc=vel[0:3] # lin v.
+ root_omega_rhc=vel[3:6] # omega
+ jnts_v_rhc=vel[6:] # jnts v
+ acc=self._prb.getVariables("a")
+ lin_a_prb=acc[0:3] # lin acc
+
+ # getting prediction defects
+ root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+ a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+ # close state on known quantities, estimate some (e.g. lin vel) and
+ # open loop if thing start to explode
+ alpha_now=1.0
+ delta=0.0
+ if self._custom_opts["alpha_from_outside"]:
+ alpha_now=self.rhc_refs.get_alpha()
+ else: # "autotuned" alpha
+ if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.)
+ delta=np.max(np.abs(jnt_v_delta))
+ else:
+ delta=np.max(np.abs(omega_root_delta))
+ # fail_idx=self._get_failure_index()
+ # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh
+ alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0
+
+ bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1]
+ self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db
+ self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db
+
+ self._alphas_q_root[:]=alpha_now # for now single alpha for everything
+ self._alphas_q_jnts[:]=alpha_now
+ self._alphas_twist_root[:]=alpha_now
+ self._alphas_v_jnts[:]=alpha_now
+ self._alphas_a[:]=alpha_now
+ if not self._custom_opts["estimate_v_root"]:
+ self._alphas_twist_root[0:3]=1.0 # open
+ self._alphas_v_jnts[:]=1.0 # open
+
+ # position is always open loop
+ root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+ p_root[:, :]=root_p_from_rhc
+
+ # expaning meas q if continuous joints
+ if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+ self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+ self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+ self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+
+ # continous joints position is always open loop, but we need a delta vector of matching dimension
+ q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1)
+
+ self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:]
+
+ self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\
+ np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+ np.cos(q_jnts[self._continuous_joints_idxs_red, :])
+ self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\
+ np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+ np.sin(q_jnts[self._continuous_joints_idxs_red, :])
+
+ q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts
+ jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts
+
+ self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop
+ self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop
+
+ # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop
+
+ root_p_rhc.setBounds(lb=p_root,
+ ub=p_root, nodes=0)
+ root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta,
+ ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0)
+ jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta,
+ ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0)
+ if self._custom_opts["estimate_v_root"]:
+ root_v_rhc.setBounds(lb=-self._v_inf[0:3],
+ ub=self._v_inf[0:3], nodes=0)
+ else:
+ root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta,
+ ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0)
+ root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta,
+ ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0)
+ jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta,
+ ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0)
+ if self._custom_opts["lin_a_feedback"]:
+ lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :],
+ ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :],
+ nodes=0)
+
+ # return state used for feedback
+ q_state=np.concatenate((p_root, q_root, q_jnts),
+ axis=0)
+ v_state=np.concatenate((v_root, omega, v_jnts),
+ axis=0)
+ a_state=np.concatenate((a_root, a_jnts),
+ axis=0)
+
+ return (q_state, v_state, a_state)
+
+ def _solve(self):
+
+ if self._debug:
+ return self._db_solve(bootstrap=False)
+ else:
+ return self._min_solve(bootstrap=False)
+
+ def _bootstrap(self):
+
+ if self._debug:
+ return self._db_solve(bootstrap=True)
+ else:
+ return self._min_solve(bootstrap=True)
+
+ def _min_solve(self, bootstrap: bool = False):
+ # minimal solve version -> no debug
+ robot_qstate=None
+ robot_vstate=None
+ robot_astate=None
+ if self._open_loop:
+ robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using data from the solution itself
+ else:
+ robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using robot measurements
+
+ self._pm.shift() # shifts phases of one dt
+ if self._refs_in_hor_frame:
+ # q_base=self.robot_state.root_state.get(data_type="q",
+ # robot_idxs=self.controller_index).reshape(-1, 1)
+ # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ # using internal base pose from rhc. in case of closed loop, it will be the meas state
+ force_norm=None
+ if self._custom_opts["use_force_feedback"]:
+ contact_forces=self.robot_state.contact_wrenches.get(data_type="f",
+ robot_idxs=self.controller_index_np,
+ contact_name=None).reshape(self.n_contacts,3)
+ force_norm=np.linalg.norm(contact_forces, axis=1)
+ self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+ force_norm=force_norm)
+ else:
+ self.rhc_refs.step()
+
+ try:
+ if not bootstrap:
+ converged = self._ti.rti() # RTI step
+ else:
+ converged = self._ti.bootstrap() # full solve (to convergence)
+ self.sol_counter = self.sol_counter + 1
+ return not self._check_rhc_failure()
+ except Exception as e: # fail in case of exceptions
+ return False
+
+ def _db_solve(self, bootstrap: bool = False):
+
+ self._timer_start = time.perf_counter()
+
+ robot_qstate=None
+ robot_vstate=None
+ robot_astate=None
+ if self._open_loop:
+ robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using data from the solution itself
+ else:
+ robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and
+ # initial conditions using robot measurements
+
+ self._prb_update_time = time.perf_counter()
+ self._pm.shift() # shifts phases of one dt
+ self._phase_shift_time = time.perf_counter()
+
+ if self._refs_in_hor_frame:
+ # q_base=self.robot_state.root_state.get(data_type="q",
+ # robot_idxs=self.controller_index).reshape(-1, 1)
+ # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+ # using internal base pose from rhc. in case of closed loop, it will be the meas state
+ force_norm=None
+ if self._custom_opts["use_force_feedback"]:
+ contact_forces=self.robot_state.contact_wrenches.get(data_type="f",
+ robot_idxs=self.controller_index_np,
+ contact_name=None).reshape(self.n_contacts,3)
+ force_norm=np.linalg.norm(contact_forces, axis=1)
+ self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+ force_norm=force_norm)
+ else:
+ self.rhc_refs.step()
+
+ self._task_ref_update_time = time.perf_counter()
+
+ try:
+ if not bootstrap:
+ converged = self._ti.rti() # RTI step
+ else:
+ converged = self._ti.bootstrap() # full solve bootstrap
+ self._rti_time = time.perf_counter()
+ self.sol_counter = self.sol_counter + 1
+ self._update_db_data()
+ return not self._check_rhc_failure()
+ except Exception as e: # fail in case of exceptions
+ if self._verbose:
+ solve_mode = "RTI" if not bootstrap else "Bootstrap"
+ exception = f"{solve_mode}() for controller {self.controller_index} failed" + \
+ f" with exception {type(e).__name__}"
+ Journal.log(self.__class__.__name__,
+ "solve",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = False)
+ self._update_db_data()
+ return False
+
+ def _get_fail_idx(self):
+
+ self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx()
+ self._expl_idx_buffer_counter+=1
+ self._expl_idx_counter+=1
+ if self._expl_idx_counter%self._expl_idx_window_size==0:
+ self._expl_idx_buffer_counter=0 # restart from 0
+
+ running_avrg=np.mean(self._explosion_idx_buffer).item()
+
+ return running_avrg
+
+ def _get_explosion_idx(self):
+ explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale
+ return explosion_index
+
+ def _update_db_data(self):
+
+ self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start
+ self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time
+ self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time
+ self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time
+ self.rhc_costs.update(self._ti.solver_rti.getCostsValues())
+ self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues())
+
+ def _reset(self):
+
+ # reset task interface (ig, solvers, etc..) +
+ # phase manager and sets bootstap as solution
+ self._gm.reset()
+ self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution
+ self._expl_idx_counter=0.0
+ self._expl_idx_buffer_counter=0
+
+ def _get_cost_info(self):
+
+ cost_dict = self._ti.solver_rti.getCostsValues()
+ cost_names = list(cost_dict.keys())
+ cost_dims = [1] * len(cost_names) # costs are always scalar
+ return cost_names, cost_dims
+
+ def _get_constr_info(self):
+
+ constr_dict = self._ti.solver_rti.getConstraintsValues()
+ constr_names = list(constr_dict.keys())
+ constr_dims = [-1] * len(constr_names)
+ i = 0
+ for constr in constr_dict:
+ constr_val = constr_dict[constr]
+ constr_shape = constr_val.shape
+ constr_dims[i] = constr_shape[0]
+ i+=1
+ return constr_names, constr_dims
+
+ def _get_q_from_sol(self):
+ full_q=self._ti.solution['q'].astype(self._dtype)
+ if self._custom_opts["replace_continuous_joints"]:
+ return full_q
+ else:
+ cont_jnts=full_q[self._continuous_joints_idxs_qfull, :]
+ cos=cont_jnts[::2, :]
+ sin=cont_jnts[1::2, :]
+ # copy root
+ self._full_q_reduced[0:7, :]=full_q[0:7, :]
+ # copy rev joint vals
+ self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :]
+ # and continuous
+ angle=np.arctan2(sin, cos)
+ self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle
+ return self._full_q_reduced
+
+ def _get_v_from_sol(self):
+ return self._ti.solution['v'].astype(self._dtype)
+
+ def _get_a_from_sol(self):
+ return self._ti.solution['a'].astype(self._dtype)
+
+ def _get_a_dot_from_sol(self):
+ return None
+
+ def _get_f_from_sol(self):
+ # to be overridden by child class
+ contact_names =self._get_contacts() # we use controller-side names
+ try:
+ data=[]
+ for key in contact_names:
+ contact_f=self._ti.solution["f_" + key].astype(self._dtype)
+ np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False)
+ np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f)
+ data.append(contact_f)
+ return np.concatenate(data, axis=0)
+ except:
+ return None
+
+ def _get_f_dot_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_eff_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_cost_from_sol(self,
+ cost_name: str):
+ return self.rhc_costs[cost_name]
+
+ def _get_constr_from_sol(self,
+ constr_name: str):
+ return self.rhc_constr[constr_name]
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c751aa8a6e8d32c028f94d4224123863866a933
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py
@@ -0,0 +1,381 @@
+from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager
+from aug_mpc.controllers.rhc.horizon_based.utils.math_utils import hor2w_frame
+
+from mpc_hive.utilities.shared_data.rhc_data import RhcRefs
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+
+from typing import Union
+
+import numpy as np
+
+class HybridQuadRhcRefs(RhcRefs):
+
+ def __init__(self,
+ gait_manager: GaitManager,
+ robot_index_shm: int,
+ robot_index_view: int,
+ namespace: str, # namespace used for shared mem
+ verbose: bool = True,
+ vlevel: bool = VLevel.V2,
+ safe: bool = True,
+ use_force_feedback: bool = False,
+ optimize_mem: bool = False):
+
+ self.robot_index = robot_index_shm
+ self.robot_index_view = robot_index_view
+ self.robot_index_np_view = np.array(self.robot_index_view)
+
+ self._step_idx = 0
+ self._print_frequency = 100
+
+ self._verbose = verbose
+
+ self._use_force_feedback=use_force_feedback
+
+ if optimize_mem:
+ super().__init__(
+ is_server=False,
+ with_gpu_mirror=False,
+ namespace=namespace,
+ safe=safe,
+ verbose=verbose,
+ vlevel=vlevel,
+ optimize_mem=optimize_mem,
+ n_robots=1, # we just need the row corresponding to this controller
+ n_jnts=None, # got from server
+ n_contacts=None # got from server
+ )
+ else:
+ super().__init__(
+ is_server=False,
+ with_gpu_mirror=False,
+ namespace=namespace,
+ safe=safe,
+ verbose=verbose,
+ vlevel=vlevel)
+
+ if not isinstance(gait_manager, GaitManager):
+ exception = f"Provided gait_manager argument should be of GaitManager type!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ self.gait_manager = gait_manager
+
+ self.timeline_names = self.gait_manager.timeline_names
+
+ # task interfaces from horizon for setting commands to rhc
+ self._get_tasks()
+
+ self._p_ref_default=np.zeros((1, 3))
+ self._q_ref_default=np.zeros((1, 4))
+ self._q_ref_default[0, 0]=1
+
+ def _get_tasks(self):
+ # can be overridden by child
+ # cartesian tasks are in LOCAL_WORLD_ALIGNED (frame centered at distal link, oriented as WORLD)
+ self.base_lin_velxy = self.gait_manager.task_interface.getTask('base_lin_velxy')
+ self.base_lin_velz = self.gait_manager.task_interface.getTask('base_lin_velz')
+ self.base_omega = self.gait_manager.task_interface.getTask('base_omega')
+ self.base_height = self.gait_manager.task_interface.getTask('base_height')
+
+ def run(self):
+
+ super().run()
+ if not (self.robot_index < self.rob_refs.n_robots()):
+ exception = f"Provided \(0-based\) robot index {self.robot_index} exceeds number of " + \
+ " available robots {self.rob_refs.n_robots()}."
+ Journal.log(self.__class__.__name__,
+ "run",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ contact_names = list(self.gait_manager.task_interface.model.cmap.keys())
+ if not (self.n_contacts() == len(contact_names)):
+ exception = f"N of contacts within problem {len(contact_names)} does not match n of contacts {self.n_contacts()}"
+ Journal.log(self.__class__.__name__,
+ "run",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ # set some defaults from gait manager
+ for i in range(self.n_contacts()):
+ self.flight_settings_req.set(data=self.gait_manager.get_flight_duration(contact_name=contact_names[i]),
+ data_type="len_remain",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i)
+ self.flight_settings_req.set(data=self.gait_manager.get_step_apexdh(contact_name=contact_names[i]),
+ data_type="apex_dpos",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i)
+ self.flight_settings_req.set(data=self.gait_manager.get_step_enddh(contact_name=contact_names[i]),
+ data_type="end_dpos",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i)
+
+ self.flight_settings_req.synch_retry(row_index=self.robot_index,
+ col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1,
+ n_cols=self.flight_settings_req.n_cols,
+ read=False)
+
+ def step(self, qstate: np.ndarray = None,
+ vstate: np.ndarray = None,
+ force_norm: np.ndarray = None):
+
+ if self.is_running():
+
+ # updates robot refs from shared mem
+ self.rob_refs.synch_from_shared_mem(robot_idx=self.robot_index, robot_idx_view=self.robot_index_view)
+ self.phase_id.synch_all(read=True, retry=True,
+ row_index=self.robot_index,
+ row_index_view=self.robot_index_view)
+ self.contact_flags.synch_all(read=True, retry=True,
+ row_index=self.robot_index,
+ row_index_view=self.robot_index_view)
+ self.flight_settings_req.synch_all(read=True, retry=True,
+ row_index=self.robot_index,
+ row_index_view=self.robot_index_view)
+ self._set_contact_phases(q_full=qstate)
+
+ # updated internal references with latest available ones
+ q_base=qstate[3:7,0:1] # quaternion
+ self._apply_refs_to_tasks(q_base=q_base)
+
+ # if self._use_force_feedback:
+ # self._set_force_feedback(force_norm=force_norm)
+
+ self._step_idx +=1
+
+ else:
+ exception = f"{self.__class__.__name__} is not running"
+ Journal.log(self.__class__.__name__,
+ "step",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ def _set_contact_phases(self,
+ q_full: np.ndarray):
+
+ # phase_id = self.phase_id.read_retry(row_index=self.robot_index,
+ # col_index=0)[0]
+
+ contact_flags_refs = self.contact_flags.get_numpy_mirror()[self.robot_index_np_view, :]
+ target_n_limbs_in_contact=np.sum(contact_flags_refs).item()
+ if target_n_limbs_in_contact==0:
+ target_n_limbs_in_contact=4
+
+ is_contact = contact_flags_refs.flatten().tolist()
+ n_contacts=len(is_contact)
+
+ for i in range(n_contacts): # loop through contact timelines
+ timeline_name = self.timeline_names[i]
+
+ self.gait_manager.set_f_reg(contact_name=timeline_name,
+ scale=target_n_limbs_in_contact)
+
+ if is_contact[i]==False: # release contact
+
+ # flight parameters requests are set only when inserting a flight phase
+ len_req_now=int(self.flight_settings_req.get(data_type="len_remain",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i).item())
+ apex_req_now=self.flight_settings_req.get(data_type="apex_dpos",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i).item()
+ end_req_now=self.flight_settings_req.get(data_type="end_dpos",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i).item()
+ landing_dx_req_now=self.flight_settings_req.get(data_type="land_dx",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i).item()
+ landing_dy_req_now=self.flight_settings_req.get(data_type="land_dy",
+ robot_idxs=self.robot_index_np_view,
+ contact_idx=i).item()
+
+ # set flight phase properties depending on last value on shared memory
+ self.gait_manager.set_flight_duration(contact_name=timeline_name,
+ val=len_req_now)
+ self.gait_manager.set_step_apexdh(contact_name=timeline_name,
+ val=apex_req_now)
+ self.gait_manager.set_step_enddh(contact_name=timeline_name,
+ val=end_req_now)
+ self.gait_manager.set_step_landing_dx(contact_name=timeline_name,
+ val=landing_dx_req_now)
+ self.gait_manager.set_step_landing_dy(contact_name=timeline_name,
+ val=landing_dy_req_now)
+ # insert flight phase over the horizon
+ self.gait_manager.add_flight(contact_name=timeline_name,
+ robot_q=q_full)
+
+ else: # contact phase
+ self.gait_manager.add_stand(contact_name=timeline_name)
+
+ at_least_one_flight=self.gait_manager.update_flight_info(timeline_name)
+ # flight_info=self.gait_manager.get_flight_info(timeline_name)
+
+ self.gait_manager.check_horizon_full(timeline_name=timeline_name)
+
+ # write flight info to shared mem for all contacts in one shot (we follow same order as in flight_info shm)
+ all_flight_info=self.gait_manager.get_flight_info_all()
+ flight_info_shared=self.flight_info.get_numpy_mirror()
+ flight_info_shared[self.robot_index_np_view, :]=all_flight_info
+ self.flight_info.synch_retry(row_index=self.robot_index,
+ col_index=0,
+ row_index_view=self.robot_index_np_view,
+ n_rows=1, n_cols=self.flight_info.n_cols,
+ read=False)
+
+ self.gait_manager.update()
+
+ def _apply_refs_to_tasks(self, q_base = None):
+ # overrides parent
+ if q_base is not None: # rhc refs are assumed to be specified in the so called "horizontal"
+ # frame, i.e. a vertical frame, with the x axis aligned with the projection of the base x axis
+ # onto the plane
+ root_pose = self.rob_refs.root_state.get(data_type = "q_full",
+ robot_idxs=self.robot_index_np_view).reshape(-1, 1) # this should also be
+ # rotated into the horizontal frame (however, for now only the z componet is used, so it's ok)
+
+ root_twist_ref = self.rob_refs.root_state.get(data_type="twist",
+ robot_idxs=self.robot_index_np_view).reshape(-1, 1)
+
+ root_twist_ref_h = root_twist_ref.copy()
+
+ hor2w_frame(root_twist_ref, q_base, root_twist_ref_h) # horizon works in local world aligned frame
+
+ if self.base_lin_velxy is not None:
+ self.base_lin_velxy.setRef(root_twist_ref_h[0:2, :])
+ if self.base_omega is not None:
+ self.base_omega.setRef(root_twist_ref_h[3:, :])
+ if self.base_lin_velz is not None:
+ self.base_lin_velz.setRef(root_twist_ref_h[2:3, :])
+ if self.base_height is not None:
+ self.base_height.setRef(root_pose)
+ else:
+ root_pose = self.rob_refs.root_state.get(data_type = "q_full",
+ robot_idxs=self.robot_index_np_view).reshape(-1, 1)
+ root_twist_ref = self.rob_refs.root_state.get(data_type="twist",
+ robot_idxs=self.robot_index_np_view).reshape(-1, 1)
+
+ if self.base_lin_velxy is not None:
+ self.base_lin_velxy.setRef(root_twist_ref[0:2, :])
+ if self.base_omega is not None:
+ self.base_omega.setRef(root_twist_ref[3:, :])
+ if self.base_lin_velz is not None:
+ self.base_lin_velz.setRef(root_twist_ref[2:3, :])
+ if self.base_height is not None:
+ self.base_height.setRef(root_pose)
+
+ # def _set_force_feedback(self,
+ # force_norm: np.ndarray = None):
+
+ # is_contact=force_norm>1.0
+
+ # for i in range(len(is_contact)):
+ # timeline_name = self._timeline_names[i]
+ # self.gait_manager.set_force_feedback(timeline_name=timeline_name,
+ # force_norm=force_norm[i])
+
+ # if not is_contact[i]:
+
+
+ def set_default_refs(self,
+ p_ref: np.ndarray,
+ q_ref: np.ndarray):
+
+ self._p_ref_default[:, :]=p_ref
+ self._q_ref_default[:, :]=q_ref
+
+ def set_alpha(self, alpha:float):
+ # set provided value
+ alpha_shared=self.alpha.get_numpy_mirror()
+ alpha_shared[self.robot_index_np_view, :] = alpha
+ self.alpha.synch_retry(row_index=self.robot_index, col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1, n_cols=self.alpha.n_cols,
+ read=False)
+
+ def get_alpha(self):
+ self.alpha.synch_retry(row_index=self.robot_index, col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1, n_cols=self.alpha.n_cols,
+ read=True)
+ alpha=self.alpha.get_numpy_mirror()[self.robot_index_np_view, :].item()
+ return alpha
+
+ def set_bound_relax(self, bound_relax:float):
+ # set provided value
+ bound_rel_shared=self.bound_rel.get_numpy_mirror()
+ bound_rel_shared[self.robot_index_np_view, :] = bound_relax
+ self.bound_rel.synch_retry(row_index=self.robot_index, col_index=0, n_rows=1,
+ row_index_view=self.robot_index_view,
+ n_cols=self.alpha.n_cols,
+ read=False)
+
+ def reset(self):
+
+ if self.is_running():
+
+ # resets shared mem
+ contact_flags_current = self.contact_flags.get_numpy_mirror()
+ phase_id_current = self.phase_id.get_numpy_mirror()
+ contact_flags_current[self.robot_index_np_view, :] = np.full((1, self.n_contacts()), dtype=np.bool_, fill_value=True)
+ phase_id_current[self.robot_index_np_view, :] = -1 # defaults to custom phase id
+
+ contact_pos_current=self.rob_refs.contact_pos.get_numpy_mirror()
+ contact_pos_current[self.robot_index_np_view, :] = 0.0
+
+ flight_info_current=self.flight_info.get_numpy_mirror()
+ flight_info_current[self.robot_index_np_view, :] = 0.0
+
+ alpha=self.alpha.get_numpy_mirror()
+ alpha[self.robot_index_np_view, :] = 0.0
+
+ self.rob_refs.root_state.set(data_type="p", data=self._p_ref_default, robot_idxs=self.robot_index_np_view)
+ self.rob_refs.root_state.set(data_type="q", data=self._q_ref_default, robot_idxs=self.robot_index_np_view)
+ self.rob_refs.root_state.set(data_type="twist", data=np.zeros((1, 6)), robot_idxs=self.robot_index_np_view)
+
+ self.contact_flags.synch_retry(row_index=self.robot_index, col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1, n_cols=self.contact_flags.n_cols,
+ read=False)
+ self.phase_id.synch_retry(row_index=self.robot_index, col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1, n_cols=self.phase_id.n_cols,
+ read=False)
+ self.rob_refs.root_state.synch_retry(row_index=self.robot_index, col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1, n_cols=self.rob_refs.root_state.n_cols,
+ read=False)
+
+ self.rob_refs.contact_pos.synch_retry(row_index=self.robot_index, col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1, n_cols=self.rob_refs.contact_pos.n_cols,
+ read=False)
+
+ self.flight_info.synch_retry(row_index=self.robot_index,
+ col_index=0,
+ row_index_view=self.robot_index_view,
+ n_rows=1, n_cols=self.flight_info.n_cols,
+ read=False)
+
+ # should also empty the timeline for stepping phases
+ self._step_idx = 0
+
+ else:
+ exception = f"Cannot call reset() since run() was not called!"
+ Journal.log(self.__class__.__name__,
+ "reset",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/__init__.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d4173d5b51ee8c644a54008ca0a40f1b34d1841
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py
@@ -0,0 +1,165 @@
+
+import numpy as np
+
+def w2hor_frame(t_w: np.ndarray,
+ q_b: np.ndarray,
+ t_out: np.ndarray):
+ """
+ Transforms a twist vector expressed in WORLD frame to
+ an "horizontal" frame (z aligned as world, x aligned as the projection
+ of the x-axis of the base frame described by q_b). This is useful for specifying locomotion
+ references in a "game"-like fashion.
+ t_out will hold the result
+ """
+ # q_b = q_b / q_b.norm(dim=1, keepdim=True)
+ q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+
+ R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+ R_21 = 2 * (q_i * q_j + q_k * q_w)
+
+ norm = np.sqrt(R_11 ** 2 + R_21 ** 2)
+ x_proj_x = R_11 / norm
+ x_proj_y = R_21 / norm
+
+ y_proj_x = -x_proj_y
+ y_proj_y = x_proj_x
+
+ t_out[0, :] = t_w[0, :] * x_proj_x + t_w[1, :] * x_proj_y
+ t_out[1, :] = t_w[0, :] * y_proj_x + t_w[1, :] * y_proj_y
+ t_out[2, :] = t_w[2, :] # z-component remains the same
+ t_out[3, :] = t_w[3, :] * x_proj_x + t_w[4, :] * x_proj_y
+ t_out[4, :] = t_w[3, :] * y_proj_x + t_w[4, :] * y_proj_y
+ t_out[5, :] = t_w[5, :] # z-component remains the same
+
+def hor2w_frame(t_h: np.ndarray,
+ q_b: np.ndarray,
+ t_out: np.ndarray):
+ """
+ Transforms a velocity vector expressed in "horizontal" frame to WORLD
+ v_out will hold the result
+ """
+
+ # Extract quaternion components
+ q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+
+ # Compute rotation matrix elements
+ R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+ R_21 = 2 * (q_i * q_j + q_k * q_w)
+
+ # Normalize to get projection components
+ norm = np.sqrt(R_11 ** 2 + R_21 ** 2)
+ x_proj_x = R_11 / norm
+ x_proj_y = R_21 / norm
+
+ # Orthogonal vector components
+ y_proj_x = -x_proj_y
+ y_proj_y = x_proj_x
+
+ # Transform velocity vector components from horizontal to world frame
+ t_out[0, :] = t_h[0, :] * x_proj_x + t_h[1, :] * y_proj_x
+ t_out[1, :] = t_h[0, :] * x_proj_y + t_h[1, :] * y_proj_y
+ t_out[2, :] = t_h[2, :] # z-component remains the same
+ t_out[3, :] = t_h[3, :] * x_proj_x + t_h[4, :] * y_proj_x
+ t_out[4, :] = t_h[3, :] * x_proj_y + t_h[4, :] * y_proj_y
+ t_out[5, :] = t_h[5, :] # z-component remains the same
+
+def base2world_frame(t_b: np.ndarray,
+ q_b: np.ndarray,
+ t_out: np.ndarray):
+ """
+ Transforms a velocity vector expressed in the base frame to
+ the WORLD frame using the given quaternion that describes the orientation
+ of the base with respect to the world frame. The result is written in v_out.
+ """
+ # q_b = q_b / q_b.norm(dim=1, keepdim=True)
+ q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+
+ R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+ R_01 = 2 * (q_i * q_j - q_k * q_w)
+ R_02 = 2 * (q_i * q_k + q_j * q_w)
+
+ R_10 = 2 * (q_i * q_j + q_k * q_w)
+ R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2)
+ R_12 = 2 * (q_j * q_k - q_i * q_w)
+
+ R_20 = 2 * (q_i * q_k - q_j * q_w)
+ R_21 = 2 * (q_j * q_k + q_i * q_w)
+ R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2)
+
+ # Transform the velocity to the world frame
+ t_out[0, :] = t_b[0, :] * R_00 + t_b[1, :] * R_01 + t_b[2, :] * R_02
+ t_out[1, :] = t_b[0, :] * R_10 + t_b[1, :] * R_11 + t_b[2, :] * R_12
+ t_out[2, :] = t_b[0, :] * R_20 + t_b[1, :] * R_21 + t_b[2, :] * R_22
+ t_out[3, :] = t_b[3, :] * R_00 + t_b[4, :] * R_01 + t_b[5, :] * R_02
+ t_out[4, :] = t_b[3, :] * R_10 + t_b[4, :] * R_11 + t_b[5, :] * R_12
+ t_out[5, :] = t_b[3, :] * R_20 + t_b[4, :] * R_21 + t_b[5, :] * R_22
+
+def world2base_frame(t_w: np.ndarray,
+ q_b: np.ndarray,
+ t_out: np.ndarray):
+ """
+ Transforms a velocity vector expressed in the WORLD frame to
+ the base frame using the given quaternion that describes the orientation
+ of the base with respect to the world frame. The result is written in v_out.
+ """
+ # q_b = q_b / q_b.norm(dim=1, keepdim=True)
+ q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+
+ R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+ R_01 = 2 * (q_i * q_j - q_k * q_w)
+ R_02 = 2 * (q_i * q_k + q_j * q_w)
+
+ R_10 = 2 * (q_i * q_j + q_k * q_w)
+ R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2)
+ R_12 = 2 * (q_j * q_k - q_i * q_w)
+
+ R_20 = 2 * (q_i * q_k - q_j * q_w)
+ R_21 = 2 * (q_j * q_k + q_i * q_w)
+ R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2)
+
+ # Transform the velocity to the base frame using the transpose of the rotation matrix
+ t_out[0, :] = t_w[0, :] * R_00 + t_w[1, :] * R_10 + t_w[2, :] * R_20
+ t_out[1, :] = t_w[0, :] * R_01 + t_w[1, :] * R_11 + t_w[2, :] * R_21
+ t_out[2, :] = t_w[0, :] * R_02 + t_w[1, :] * R_12 + t_w[2, :] * R_22
+ t_out[3, :] = t_w[3, :] * R_00 + t_w[4, :] * R_10 + t_w[5, :] * R_20
+ t_out[4, :] = t_w[3, :] * R_01 + t_w[4, :] * R_11 + t_w[5, :] * R_21
+ t_out[5, :] = t_w[3, :] * R_02 + t_w[4, :] * R_12 + t_w[5, :] * R_22
+
+if __name__ == "__main__":
+
+ n_envs = 5000
+ t_b = np.random.rand(6, n_envs)
+
+ q_b = np.random.rand(4, n_envs)
+ q_b_norm = q_b / np.linalg.norm(q_b, axis=0, keepdims=True)
+
+ t_w = np.zeros_like(t_b) # To hold horizontal frame velocities
+ t_b_recovered = np.zeros_like(t_b) # To hold recovered world frame velocities
+ base2world_frame(t_b, q_b_norm, t_w)
+ world2base_frame(t_w, q_b_norm, t_b_recovered)
+ assert np.allclose(t_b, t_b_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b"
+ print("Forward test passed: t_b_recovered matches t_b")
+
+ t_b2 = np.zeros_like(t_b) # To hold horizontal frame velocities
+ t_w_recovered = np.zeros_like(t_b)
+ world2base_frame(t_b, q_b_norm, t_b2)
+ base2world_frame(t_b2, q_b_norm, t_w_recovered)
+ assert np.allclose(t_b, t_w_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b"
+ print("Backward test passed: t_w_recovered matches t_w")
+
+ # test transf. world-horizontal frame
+ v_h = np.zeros_like(t_b) # To hold horizontal frame velocities
+ v_recovered = np.zeros_like(t_b)
+ w2hor_frame(t_b, q_b_norm, v_h)
+ hor2w_frame(v_h, q_b_norm, v_recovered)
+ assert np.allclose(t_b, v_recovered, atol=1e-6), "Test failed: v_recovered does not match t_b"
+ print("horizontal forward frame test passed: matches ")
+
+ t_w = np.zeros_like(t_b) # To hold horizontal frame velocities
+ v_h_recovered = np.zeros_like(t_b)
+ hor2w_frame(t_b, q_b_norm, t_w)
+ w2hor_frame(t_w, q_b_norm, v_h_recovered)
+ assert np.allclose(t_b, v_h_recovered, atol=1e-6), "Test failed: v_h_recovered does not match t_b"
+ print("horizontal backward frame test passed: matches ")
+
+
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/hybrid_quad_client.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/hybrid_quad_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..757dc3aba72a500bbbf6c8fa21145f4ea63a2337
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/hybrid_quad_client.py
@@ -0,0 +1,95 @@
+from aug_mpc.controllers.rhc.augmpc_cluster_client import AugMpcClusterClient
+
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc
+from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds_horizon
+from aug_mpc.utils.sys_utils import PathsGetter
+
+from typing import List, Dict
+
+class HybridQuadrupedClusterClient(AugMpcClusterClient):
+
+ def _import_aux_libs(self):
+ super()._import_aux_libs()
+ # Import Horizon and related dependencies as global libs
+ from aug_mpc.controllers.rhc.horizon_based.horizon_imports_glob import import_horizon_global
+ import_horizon_global()
+
+ def __init__(self,
+ namespace: str,
+ urdf_xacro_path: str,
+ srdf_xacro_path: str,
+ cluster_size: int,
+ set_affinity: bool = False,
+ use_mp_fork: bool = False,
+ isolated_cores_only: bool = False,
+ core_ids_override_list: List[int] = None,
+ verbose: bool = False,
+ debug: bool = False,
+ open_loop: bool = True,
+ base_dump_dir: str = "/tmp",
+ timeout_ms: int = 60000,
+ codegen_override: str = None,
+ custom_opts: Dict = {}):
+
+ self._open_loop = open_loop
+
+ self._paths = PathsGetter()
+
+ self._codegen_dir_name = namespace
+
+ self._timeout_ms = timeout_ms
+
+ super().__init__(namespace = namespace,
+ urdf_xacro_path = urdf_xacro_path,
+ srdf_xacro_path = srdf_xacro_path,
+ cluster_size=cluster_size,
+ set_affinity = set_affinity,
+ use_mp_fork = use_mp_fork,
+ isolated_cores_only = isolated_cores_only,
+ core_ids_override_list = core_ids_override_list,
+ verbose = verbose,
+ debug = debug,
+ base_dump_dir=base_dump_dir,
+ codegen_override=codegen_override,
+ custom_opts=custom_opts)
+
+ self._n_nodes = 31 if not ("n_nodes" in self._custom_opts) else self._custom_opts["n_nodes"]
+ self._dt = 0.05 if not ("cluster_dt" in self._custom_opts) else self._custom_opts["cluster_dt"]
+
+ def _xrdf_cmds(self):
+ parts = self._urdf_path.split('/')
+ urdf_descr_root_path = '/'.join(parts[:-2])
+ cmds = get_xrdf_cmds_horizon(urdf_descr_root_path=urdf_descr_root_path)
+ return cmds
+
+ def _process_codegen_dir(self,idx:int):
+
+ codegen_dir = self.codegen_dir() + f"/{self._codegen_dir_name}Rhc{idx}"
+ codegen_dir_ovveride = self.codegen_dir_override()
+ if not (codegen_dir_ovveride=="" or \
+ codegen_dir_ovveride=="none" or \
+ codegen_dir_ovveride=="None" or \
+ (codegen_dir_ovveride is None)): # if overrde was provided
+ codegen_dir = f"{codegen_dir_ovveride}{idx}"# override
+
+ return codegen_dir
+
+ def _generate_controller(self,
+ idx: int):
+
+ codegen_dir=self._process_codegen_dir(idx=idx)
+
+ controller = HybridQuadRhc(
+ urdf_path=self._urdf_path,
+ srdf_path=self._srdf_path,
+ config_path = self._paths.CONFIGPATH,
+ robot_name=self._namespace,
+ codegen_dir=codegen_dir,
+ n_nodes=self._n_nodes,
+ dt=self._dt,
+ max_solver_iter = 1, # rti
+ open_loop = self._open_loop,
+ verbose = self._verbose,
+ debug = self._debug)
+
+ return controller
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9410261d4dca35164fbe8abe5ac12b166b3483d
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rhc.py
@@ -0,0 +1,1262 @@
+# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com)
+#
+# This file is part of MPCHive and distributed under the General Public License version 2 license.
+#
+# MPCHive is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#
+# MPCHive is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MPCHive. If not, see .
+#
+from abc import ABC, abstractmethod
+# from perf_sleep.pyperfsleep import PerfSleep
+# from mpc_hive.utilities.cpu_utils.core_utils import get_memory_usage
+
+import time
+
+import numpy as np
+
+from mpc_hive.utilities.shared_data.rhc_data import RobotState
+from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred, RhcPredDelta
+from mpc_hive.utilities.shared_data.rhc_data import RhcStatus
+from mpc_hive.utilities.shared_data.rhc_data import RhcInternal
+from mpc_hive.utilities.shared_data.cluster_profiling import RhcProfiling
+from mpc_hive.utilities.remote_triggering import RemoteTriggererClnt
+
+from mpc_hive.utilities.homing import RobotHomer
+
+from mpc_hive.utilities.math_utils import world2base_frame
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper
+from EigenIPC.PyEigenIPC import dtype
+
+from typing import List
+# from typing import TypeVar, Union
+
+import signal
+import os
+import inspect
+
+class RHController(ABC):
+
+ def __init__(self,
+ srdf_path: str,
+ n_nodes: int,
+ dt: float,
+ namespace: str, # shared mem namespace
+ dtype = np.float32,
+ verbose = False,
+ debug = False,
+ timeout_ms: int = 60000,
+ allow_less_jnts: bool = True):
+
+ signal.signal(signal.SIGINT, self._handle_sigint)
+
+ self._allow_less_jnts = allow_less_jnts # whether to allow less joints in rhc controller than the ones on the robot
+ # (e.g. some joints might not be desirable for control purposes)
+
+ self.namespace = namespace
+ self._dtype = dtype
+ self._verbose = verbose
+ self._debug = debug
+
+ # if not self._debug:
+ np.seterr(over='ignore') # ignoring overflows
+
+ self._n_nodes = n_nodes
+ self._dt = dt
+ self._n_intervals = self._n_nodes - 1
+ self._t_horizon = self._n_intervals * dt
+ self._set_rhc_pred_idx() # prection is by default written on last node
+ self._set_rhc_cmds_idx() # default to idx 2 (i.e. cmds to get to third node)
+ self.controller_index = None # will be assigned upon registration to a cluster
+ self.controller_index_np = None
+ self._robot_mass=1.0
+
+ self.srdf_path = srdf_path # using for parsing robot homing
+
+ self._registered = False
+ self._closed = False
+
+ self._allow_triggering_when_failed = True
+
+ self._profiling_data_dict = {}
+ self._profiling_data_dict["full_solve_dt"] = np.nan
+ self._profiling_data_dict["rti_solve_dt"] = np.nan
+ self._profiling_data_dict["problem_update_dt"] = np.nan
+ self._profiling_data_dict["phases_shift_dt"] = np.nan
+ self._profiling_data_dict["task_ref_update"] = np.nan
+
+ self.n_dofs = None
+ self.n_contacts = None
+
+ # shared mem
+ self.robot_state = None
+ self.rhc_status = None
+ self.rhc_internal = None
+ self.cluster_stats = None
+ self.robot_cmds = None
+ self.robot_pred = None
+ self.rhc_pred_delta = None
+ self.rhc_refs = None
+ self._remote_triggerer = None
+ self._remote_triggerer_timeout = timeout_ms # [ms]
+
+ # remote termination
+ self._remote_term = None
+ self._term_req_received = False
+
+ # jnt names
+ self._env_side_jnt_names = []
+ self._controller_side_jnt_names = []
+ self._got_jnt_names_from_controllers = False
+
+ # data maps
+ self._to_controller = []
+ self._quat_remap = [0, 1, 2, 3] # defaults to no remap (to be overridden)
+
+ self._got_contact_names = False
+
+ self._received_trigger = False # used for proper termination
+
+ self._n_resets = 0
+ self._n_fails = 0
+ self._fail_idx_thresh = 5e3
+ self._contact_f_thresh = 1e5
+
+ self._failed = False
+
+ self._start_time = time.perf_counter() # used for profiling when in debug mode
+
+ self._homer = None # robot homing manager
+ self._homer_env = None # used for setting homing to jnts not contained in rhc prb
+
+ self._class_name_base = f"{self.__class__.__name__}"
+ self._class_name = self._class_name_base # will append controller index upon registration
+
+ self._contact_force_base_loc_aux=np.zeros((1,3),dtype=self._dtype)
+ self._norm_grav_vector_w=np.zeros((1,3),dtype=self._dtype)
+ self._norm_grav_vector_w[:, 2]=-1.0
+ self._norm_grav_vector_base_loc=np.zeros((1,3),dtype=self._dtype)
+
+ self._init() # initialize controller
+
+ if not hasattr(self, '_rhc_fpaths'):
+ self._rhc_fpaths = []
+ self._rhc_fpaths.append(os.path.abspath(__file__))
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs)
+ # Get the file path of the class being initialized and append it to the list
+ if not hasattr(cls, '_rhc_fpaths'):
+ cls._rhc_fpaths = []
+ child_class_file_path = os.path.abspath(inspect.getfile(cls))
+ cls._rhc_fpaths.append(child_class_file_path)
+
+ def this_paths(self):
+ return self._rhc_fpaths
+
+ def __del__(self):
+ self._close()
+
+ def _handle_sigint(self, signum, frame):
+ if self._verbose:
+ Journal.log(self._class_name,
+ "_handle_sigint",
+ "SIGINT received",
+ LogType.WARN)
+ self._term_req_received = True
+
+ def _set_rhc_pred_idx(self):
+ # default to last node
+ self._pred_node_idx=self._n_nodes-1
+
+ def _set_rhc_cmds_idx(self):
+ # use index 2 by default to compensate for
+ # the inevitable action delay
+ # (get_state, trigger sol -> +dt -> apply sol )
+ # if we apply action from second node (idenx 1)
+ # that action will already be one dt old. We assume we were already
+ # applying the right action to get to the state of node idx 1 and get the
+ # cmds for reaching the state at idx 1
+ self._rhc_cmds_node_idx=2
+
+ def _close(self):
+ if not self._closed:
+ self._unregister_from_cluster()
+ if self.robot_cmds is not None:
+ self.robot_cmds.close()
+ if self.robot_pred is not None:
+ self.robot_pred.close()
+ if self.rhc_pred_delta is not None:
+ self.rhc_pred_delta.close()
+ if self.robot_state is not None:
+ self.robot_state.close()
+ if self.rhc_status is not None:
+ self.rhc_status.close()
+ if self.rhc_internal is not None:
+ self.rhc_internal.close()
+ if self.cluster_stats is not None:
+ self.cluster_stats.close()
+ if self._remote_triggerer is not None:
+ self._remote_triggerer.close()
+ if self._remote_term is not None:
+ self._remote_term.close()
+ self._closed = True
+
+ def close(self):
+ self._close()
+
+ def get_file_paths(self):
+ # can be overriden by child
+ base_paths = []
+ base_paths.append(self._this_path)
+ return base_paths
+
+ def init_rhc_task_cmds(self):
+
+ self.rhc_refs = self._init_rhc_task_cmds()
+ self.rhc_refs.reset()
+
+ def _init_states(self):
+
+ quat_remap = self._get_quat_remap()
+ self.robot_state = RobotState(namespace=self.namespace,
+ is_server=False,
+ q_remapping=quat_remap, # remapping from environment to controller
+ with_gpu_mirror=False,
+ with_torch_view=False,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ optimize_mem=True,
+ n_robots=1, # we just need the row corresponding to this controller
+ n_jnts=None, # got from server
+ n_contacts=None # got from server
+ )
+ self.robot_state.run()
+ self.robot_cmds = RhcCmds(namespace=self.namespace,
+ is_server=False,
+ q_remapping=quat_remap, # remapping from environment to controller
+ with_gpu_mirror=False,
+ with_torch_view=False,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ optimize_mem=True,
+ n_robots=1, # we just need the row corresponding to this controller
+ n_jnts=None, # got from server
+ n_contacts=None # got from server
+ )
+ self.robot_cmds.run()
+ self.robot_pred = RhcPred(namespace=self.namespace,
+ is_server=False,
+ q_remapping=quat_remap, # remapping from environment to controller
+ with_gpu_mirror=False,
+ with_torch_view=False,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ optimize_mem=True,
+ n_robots=1, # we just need the row corresponding to this controller
+ n_jnts=None, # got from server
+ n_contacts=None # got from server
+ )
+ self.robot_pred.run()
+ self.rhc_pred_delta = RhcPredDelta(namespace=self.namespace,
+ is_server=False,
+ q_remapping=quat_remap, # remapping from environment to controller
+ with_gpu_mirror=False,
+ with_torch_view=False,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ optimize_mem=True,
+ n_robots=1, # we just need the row corresponding to this controller
+ n_jnts=None, # got from server
+ n_contacts=None # got from server
+ )
+ self.rhc_pred_delta.run()
+
+ def _rhc(self, rti: bool = True):
+ if self._debug:
+ self._rhc_db(rti=rti)
+ else:
+ self._rhc_min(rti=rti)
+
+ def _rhc_db(self, rti: bool = True):
+ # rhc with debug data
+ self._start_time = time.perf_counter()
+
+ self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with
+ # latest data on shared mem
+
+ self._compute_pred_delta()
+
+ if not self.failed():
+ # we can solve only if not in failure state
+ if rti:
+ self._failed = not self._solve() # solve actual TO with RTI
+ else:
+ self._failed = not self._bootstrap() # full bootstrap solve
+ if (self._failed):
+ # perform failure procedure
+ self._on_failure()
+ else:
+ if not self._allow_triggering_when_failed:
+ Journal.log(self._class_name,
+ "solve",
+ f"Received solution req, but controller is in failure state. " + \
+ " You should have reset() the controller!",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ else:
+ if self._verbose:
+ Journal.log(self._class_name,
+ "solve",
+ f"Received solution req, but controller is in failure state. No solution will be performed. " + \
+ " Use the reset() method to continue solving!",
+ LogType.WARN)
+
+ self._write_cmds_from_sol() # we update update the views of the cmds
+ # from the latest solution
+
+ # in debug, rhc internal state is streamed over
+ # shared mem.
+ self._update_rhc_internal()
+ self._profiling_data_dict["full_solve_dt"] = time.perf_counter() - self._start_time
+ self._update_profiling_data() # updates all profiling data
+ if self._verbose:
+ Journal.log(self._class_name,
+ "solve",
+ f"RHC full solve loop execution time -> " + str(self._profiling_data_dict["full_solve_dt"]),
+ LogType.INFO,
+ throw_when_excep = True)
+
+ def _rhc_min(self, rti: bool = True):
+
+ self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with
+ # latest data on shared mem
+
+ self._compute_pred_delta()
+
+ if not self.failed():
+ # we can solve only if not in failure state
+ if rti:
+ self._failed = not self._solve() # solve actual TO with RTI
+ else:
+ self._failed = not self._bootstrap() # full bootstrap solve
+ if (self._failed):
+ # perform failure procedure
+ self._on_failure()
+ else:
+ if not self._allow_triggering_when_failed:
+ Journal.log(self._class_name,
+ "solve",
+ f"Received solution req, but controller is in failure state. " + \
+ " You should have reset() the controller!",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ else:
+ if self._verbose:
+ Journal.log(self._class_name,
+ "solve",
+ f"Received solution req, but controller is in failure state. No solution will be performed. " + \
+ " Use the reset() method to continue solving!",
+ LogType.WARN)
+
+ self._write_cmds_from_sol() # we update the views of the cmds
+ # from the latest solution even if failed
+
+ def solve_once(self):
+ # run a single iteration of the solve loop (used for pooling)
+ if self._term_req_received:
+ return False
+
+ if not self._remote_triggerer.wait(self._remote_triggerer_timeout):
+ Journal.log(self._class_name,
+ f"solve",
+ "Didn't receive any remote trigger req within timeout!",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ return False
+
+ self._received_trigger = True
+
+ if self.rhc_status.resets.read_retry(row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)[0]:
+ self.reset() # rhc is reset
+
+ if self.rhc_status.trigger.read_retry(row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)[0]:
+ rti_solve = self.rhc_status.rti_solve.read_retry(row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)[0]
+ self._rhc(rti=rti_solve) # run solution with requested solve mode
+ self.rhc_status.trigger.write_retry(False,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0) # allow next solution trigger
+
+ self._remote_triggerer.ack() # send ack signal to server
+ self._received_trigger = False
+
+ self._term_req_received = self._term_req_received or self._remote_term.read_retry(row_index=0,
+ col_index=0,
+ row_index_view=0)[0]
+
+ if self._term_req_received:
+ self.close()
+ return False
+
+ return True
+
+ def solve(self):
+
+ # run the solution loop and wait for trigger signals
+ # using cond. variables (efficient)
+ while True:
+ if not self.solve_once():
+ break
+
+ self.close() # is not stricly necessary
+
+ def reset(self):
+
+ if not self._closed:
+ self.reset_rhc_data()
+ self._failed = False # allow triggering
+ self._n_resets += 1
+ self.rhc_status.fails.write_retry(False,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.rhc_status.resets.write_retry(False,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+
+ def _create_jnt_maps(self):
+
+ # retrieve env-side joint names from shared mem
+ self._env_side_jnt_names = self.robot_state.jnt_names()
+ self._check_jnt_names_compatibility() # will raise exception if not self._allow_less_jnts
+ if not self._got_jnt_names_from_controllers:
+ exception = f"Cannot run the solve(). assign_env_side_jnt_names() was not called!"
+ Journal.log(self._class_name,
+ "_create_jnt_maps",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ # robot homer guarantees that _controller_side_jnt_names is at least contained in self._env_side_jnt_names (or equal)
+ self._to_controller = [self._env_side_jnt_names.index(element) for element in self._controller_side_jnt_names]
+ # set joint remappings for shared data from env data to controller
+ # all shared data is by convention specified according to the env (jnts are ordered that way)
+ # the remapping is used so that when data is returned, its a remapped view from env to controller,
+ # so that data can be assigned direclty from the rhc controller without any issues
+ self.robot_state.set_jnts_remapping(jnts_remapping=self._to_controller)
+ self.robot_cmds.set_jnts_remapping(jnts_remapping=self._to_controller)
+ self.robot_pred.set_jnts_remapping(jnts_remapping=self._to_controller)
+ self.rhc_pred_delta.set_jnts_remapping(jnts_remapping=self._to_controller)
+
+ return True
+
+ def reset_rhc_data(self):
+
+ self._reset() # custom reset (e.g. it should set the current solution to some
+ # default solution, like a bootstrap)
+
+ self.rhc_refs.reset() # reset rhc refs to default (has to be called after _reset)
+
+ self._write_cmds_from_sol() # use latest solution (e.g. from bootstrap if called before running
+ # the first solve) as default state
+
+ def failed(self):
+ return self._failed
+
+ def robot_mass(self):
+ return self._robot_mass
+
+ def _assign_cntrl_index(self, reg_state: np.ndarray):
+ state = reg_state.flatten() # ensure 1D tensor
+ free_spots = np.nonzero(~state.flatten())[0]
+ return free_spots[0].item() # just return the first free spot
+
+ def _register_to_cluster(self):
+
+ self.rhc_status = RhcStatus(is_server=False,
+ namespace=self.namespace,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ with_torch_view=False,
+ with_gpu_mirror=False,
+ optimize_mem=True,
+ cluster_size=1, # we just need the row corresponding to this controller
+ n_contacts=None, # we get this from server
+ n_nodes=None # we get this from server
+ )
+ self.rhc_status.run() # rhc status (reg. flags, failure, tot cost, tot cnstrl viol, etc...)
+
+ # acquire semaphores since we have to perform non-atomic operations
+ # on the whole memory views
+ self.rhc_status.registration.data_sem_acquire()
+ self.rhc_status.controllers_counter.data_sem_acquire()
+ self.rhc_status.controllers_counter.synch_all(retry = True,
+ read = True)
+
+ available_spots = self.rhc_status.cluster_size
+ # from here on all pre registration ops can be done
+
+ # incrementing cluster controllers counter
+ controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror()
+ if controllers_counter[0, 0] + 1 > available_spots: # no space left -> return
+ self.rhc_status.controllers_counter.data_sem_release()
+ self.rhc_status.registration.data_sem_release()
+ exception = "Cannot register to cluster. No space left " + \
+ f"({controllers_counter[0, 0]} controllers already registered)"
+ Journal.log(self._class_name,
+ "_register_to_cluster",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ # now we can register
+
+ # increment controllers counter
+ controllers_counter += 1
+ self.controller_index = controllers_counter.item() -1
+
+ # actually register to cluster
+ self.rhc_status.controllers_counter.synch_all(retry = True,
+ read = False) # writes to shared mem (just one for all controllers (i.e n_rows = 1))
+
+ # read current registration state
+ self.rhc_status.registration.synch_all(retry = True,
+ read = True,
+ row_index=self.controller_index,
+ row_index_view=0)
+ registrations = self.rhc_status.registration.get_numpy_mirror()
+ # self.controller_index = self._assign_cntrl_index(registrations)
+
+
+ self._class_name_base = self._class_name_base+str(self.controller_index)
+ # self.controller_index_np = np.array(self.controller_index)
+ self.controller_index_np = np.array(0) # given that we use optimize_mem, the shared mem copy has shape 1 x n_cols (we can write and read at [0, :])
+
+ registrations[self.controller_index_np, 0] = True
+ self.rhc_status.registration.synch_all(retry = True,
+ read = False,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+
+ # now all heavy stuff that would otherwise make the registration slow
+ self._remote_term = SharedTWrapper(namespace=self.namespace,
+ basename="RemoteTermination",
+ is_server=False,
+ verbose = self._verbose,
+ vlevel = VLevel.V2,
+ with_gpu_mirror=False,
+ with_torch_view=False,
+ dtype=dtype.Bool)
+ self._remote_term.run()
+
+ # other initializations
+
+ self.cluster_stats = RhcProfiling(is_server=False,
+ name=self.namespace,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ safe=True,
+ optimize_mem=True,
+ cluster_size=1 # we just need the row corresponding to this controller
+ ) # profiling data
+ self.cluster_stats.run()
+ self.cluster_stats.synch_info()
+
+ self._create_jnt_maps()
+ self.init_rhc_task_cmds() # initializes rhc interface to external commands (defined by child class)
+ self._consinstency_checks() # sanity checks
+
+ if self._homer is None:
+ self._init_robot_homer() # call this in case it wasn't called by child
+
+ self._robot_mass = self._get_robot_mass() # uses child class implemented method
+ self._contact_f_scale = self._get_robot_mass() * 9.81
+
+ # writing some static info about this controller
+ # self.rhc_status.rhc_static_info.synch_all(retry = True,
+ # read = True,
+ # row_index=self.controller_index,
+ # col_index=0) # first read current static info from other controllers (not necessary if optimize_mem==True)
+ self.rhc_status.rhc_static_info.set(data=np.array(self._dt),
+ data_type="dts",
+ rhc_idxs=self.controller_index_np,
+ gpu=False)
+ self.rhc_status.rhc_static_info.set(data=np.array(self._t_horizon),
+ data_type="horizons",
+ rhc_idxs=self.controller_index_np,
+ gpu=False)
+ self.rhc_status.rhc_static_info.set(data=np.array(self._n_nodes),
+ data_type="nnodes",
+ rhc_idxs=self.controller_index_np,
+ gpu=False)
+ # writing some static rhc info which is only available after problem init
+ self.rhc_status.rhc_static_info.set(data=np.array(len(self._get_contacts())),
+ data_type="ncontacts",
+ rhc_idxs=self.controller_index_np,
+ gpu=False)
+ self.rhc_status.rhc_static_info.set(data=np.array(self.robot_mass()),
+ data_type="robot_mass",
+ rhc_idxs=self.controller_index_np,
+ gpu=False)
+ self.rhc_status.rhc_static_info.set(data=np.array(self._pred_node_idx),
+ data_type="pred_node_idx",
+ rhc_idxs=self.controller_index_np,
+ gpu=False)
+
+ self.rhc_status.rhc_static_info.synch_retry(row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0,
+ n_rows=1, n_cols=self.rhc_status.rhc_static_info.n_cols,
+ read=False)
+
+ # we set homings also for joints which are not in the rhc homing map
+ # since this is usually required on server side
+
+ homing_full = self._homer_env.get_homing().reshape(1,
+ self.robot_cmds.n_jnts())
+
+ null_action = np.zeros((1, self.robot_cmds.n_jnts()),
+ dtype=self._dtype)
+
+ self.robot_cmds.jnts_state.set(data=homing_full, data_type="q",
+ robot_idxs=self.controller_index_np,
+ no_remap=True)
+ self.robot_cmds.jnts_state.set(data=null_action, data_type="v",
+ robot_idxs=self.controller_index_np,
+ no_remap=True)
+ self.robot_cmds.jnts_state.set(data=null_action, data_type="eff",
+ robot_idxs=self.controller_index_np,
+ no_remap=True)
+
+ # write all joints (including homing for env-only ones)
+ self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0,
+ row_index_view=0,
+ n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols,
+ read=False) # only write data corresponding to this controller
+
+ self.reset() # reset controller
+ self._n_resets=0
+
+ # for last we create the trigger client
+ self._remote_triggerer = RemoteTriggererClnt(namespace=self.namespace,
+ verbose=self._verbose,
+ vlevel=VLevel.V2) # remote triggering
+ self._remote_triggerer.run()
+
+ if self._debug:
+ # internal solution is published on shared mem
+ # we assume the user has made available the cost
+ # and constraint data at this point (e.g. through
+ # the solution of a bootstrap)
+ cost_data = self._get_cost_info()
+ constr_data = self._get_constr_info()
+ config = RhcInternal.Config(is_server=True,
+ enable_q= True,
+ enable_v=True,
+ enable_a=True,
+ enable_a_dot=False,
+ enable_f=True,
+ enable_f_dot=False,
+ enable_eff=False,
+ cost_names=cost_data[0],
+ cost_dims=cost_data[1],
+ constr_names=constr_data[0],
+ constr_dims=constr_data[1],
+ )
+ self.rhc_internal = RhcInternal(config=config,
+ namespace=self.namespace,
+ rhc_index = self.controller_index,
+ n_contacts=self.n_contacts,
+ n_jnts=self.n_dofs,
+ jnt_names=self._controller_side_jnt_names,
+ n_nodes=self._n_nodes,
+ verbose = self._verbose,
+ vlevel=VLevel.V2,
+ force_reconnection=True,
+ safe=True)
+ self.rhc_internal.run()
+
+ self._class_name = self._class_name + f"-{self.controller_index}"
+
+ Journal.log(self._class_name,
+ "_register_to_cluster",
+ f"controller registered",
+ LogType.STAT,
+ throw_when_excep = True)
+
+ # we can now release everything so that other controllers can register
+ self.rhc_status.controllers_counter.data_sem_release()
+ self.rhc_status.registration.data_sem_release()
+
+ self._registered = True
+
+ def _unregister_from_cluster(self):
+
+ if self._received_trigger:
+ # received interrupt during solution -->
+ # send ack signal to server anyway
+ self._remote_triggerer.ack()
+ if self._registered:
+ # acquire semaphores since we have to perform operations
+ # on the whole memory views
+ self.rhc_status.registration.data_sem_acquire()
+ self.rhc_status.controllers_counter.data_sem_acquire()
+ self.rhc_status.registration.write_retry(False,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self._deactivate()
+ # decrementing controllers counter
+ self.rhc_status.controllers_counter.synch_all(retry = True,
+ read = True)
+ controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror()
+ controllers_counter -= 1
+ self.rhc_status.controllers_counter.synch_all(retry = True,
+ read = False)
+ Journal.log(self._class_name,
+ "_unregister_from_cluster",
+ "Done",
+ LogType.STAT,
+ throw_when_excep = True)
+ # we can now release everything
+ self.rhc_status.registration.data_sem_release()
+ self.rhc_status.controllers_counter.data_sem_release()
+ self._registered = False
+
+ def _get_quat_remap(self):
+ # to be overridden by child class if necessary
+ return [0, 1, 2, 3]
+
+ def _consinstency_checks(self):
+
+ # check controller dt
+ server_side_cluster_dt = self.cluster_stats.get_info(info_name="cluster_dt")
+ if not (abs(server_side_cluster_dt - self._dt) < 1e-4):
+ exception = f"Trying to initialize a controller with control dt {self._dt}, which" + \
+ f"does not match the cluster control dt {server_side_cluster_dt}"
+ Journal.log(self._class_name,
+ "_consinstency_checks",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ # check contact names
+
+ server_side_contact_names = set(self.robot_state.contact_names())
+ control_side_contact_names = set(self._get_contacts())
+
+ if (not server_side_contact_names == control_side_contact_names) and self._verbose:
+ warn = f"Controller-side contact names do not match server-side names!" + \
+ f"\nServer: {self.robot_state.contact_names()}\n Controller: {self._get_contacts()}"
+ Journal.log(self._class_name,
+ "_consinstency_checks",
+ warn,
+ LogType.WARN,
+ throw_when_excep = True)
+ if not len(self.robot_state.contact_names()) == len(self._get_contacts()):
+ # at least, we need the n of contacts to match!
+ exception = f"Controller-side n contacts {self._get_contacts()} do not match " + \
+ f"server-side n contacts {len(self.robot_state.contact_names())}!"
+ Journal.log(self._class_name,
+ "_consinstency_checks",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ def _init(self):
+
+ stat = f"Trying to initialize RHC controller " + \
+ f"with dt: {self._dt} s, t_horizon: {self._t_horizon} s, n_intervals: {self._n_intervals}"
+ Journal.log(self._class_name,
+ "_init",
+ stat,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ self._init_states() # initializes shared mem. states
+
+ self._init_problem() # we call the child's initialization method for the actual problem
+ self._post_problem_init()
+
+ self._register_to_cluster() # registers the controller to the cluster
+
+ Journal.log(self._class_name,
+ "_init",
+ f"RHC controller initialized with cluster index {self.controller_index} on process {os.getpid()}",
+ LogType.STAT,
+ throw_when_excep = True)
+
+ def _deactivate(self):
+ # signal controller deactivation over shared mem
+ self.rhc_status.activation_state.write_retry(False,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ # also set cmds to homing for safety
+ # self.reset_rhc_data()
+
+ def _on_failure(self):
+
+ self.rhc_status.fails.write_retry(True,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self._deactivate()
+ self._n_fails += 1
+ self.rhc_status.controllers_fail_counter.write_retry(self._n_fails,
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+
+ def _init_robot_homer(self):
+ self._homer = RobotHomer(srdf_path=self.srdf_path,
+ jnt_names=self._controller_side_jnt_names,
+ verbose=self._verbose)
+
+ self._homer_env = RobotHomer(srdf_path=self.srdf_path,
+ jnt_names=self.robot_state.jnt_names(),
+ verbose=self._verbose)
+
+ def _update_profiling_data(self):
+
+ # updated debug data on shared memory
+ # with the latest info available
+ self.cluster_stats.solve_loop_dt.write_retry(self._profiling_data_dict["full_solve_dt"],
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.cluster_stats.rti_sol_time.write_retry(self._profiling_data_dict["rti_solve_dt"],
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.cluster_stats.prb_update_dt.write_retry(self._profiling_data_dict["problem_update_dt"],
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.cluster_stats.phase_shift_dt.write_retry(self._profiling_data_dict["phases_shift_dt"],
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.cluster_stats.task_ref_update_dt.write_retry(self._profiling_data_dict["task_ref_update"],
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+
+ def _write_cmds_from_sol(self):
+
+ # gets data from the solution and updates the view on the shared data
+
+ # cmds for robot
+ # jnts
+ self.robot_cmds.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q", robot_idxs=self.controller_index_np)
+ self.robot_cmds.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="v", robot_idxs=self.controller_index_np)
+ self.robot_cmds.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a", robot_idxs=self.controller_index_np)
+ self.robot_cmds.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np)
+ # root
+ self.robot_cmds.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q_full", robot_idxs=self.controller_index_np)
+ self.robot_cmds.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="twist", robot_idxs=self.controller_index_np)
+ self.robot_cmds.root_state.set(data=self._get_root_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np)
+ self.robot_cmds.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np)
+ f_contact = self._get_f_from_sol()
+ if f_contact is not None:
+ contact_names = self.robot_state.contact_names()
+ node_idx_f_estimate=self._rhc_cmds_node_idx-1 # we always write the force to reach the desired state (prev node)
+ rhc_q_estimate=self._get_root_full_q_from_sol(node_idx=node_idx_f_estimate)[:, 3:7]
+ for i in range(len(contact_names)):
+ contact = contact_names[i]
+ contact_idx = i*3
+ contact_force_rhc_world=f_contact[contact_idx:(contact_idx+3), node_idx_f_estimate:node_idx_f_estimate+1].T
+ world2base_frame(v_w=contact_force_rhc_world,
+ q_b=rhc_q_estimate,
+ v_out=self._contact_force_base_loc_aux,
+ is_q_wijk=False # horizon q is ijkw
+ )
+ self.robot_cmds.contact_wrenches.set(data=self._contact_force_base_loc_aux,
+ data_type="f",
+ robot_idxs=self.controller_index_np,
+ contact_name=contact)
+
+ # prediction data from MPC horizon
+ self.robot_pred.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._pred_node_idx), data_type="q", robot_idxs=self.controller_index_np)
+ self.robot_pred.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._pred_node_idx), data_type="v", robot_idxs=self.controller_index_np)
+ self.robot_pred.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a", robot_idxs=self.controller_index_np)
+ self.robot_pred.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._pred_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np)
+ self.robot_pred.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._pred_node_idx), data_type="q_full", robot_idxs=self.controller_index_np)
+ self.robot_pred.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._pred_node_idx), data_type="twist", robot_idxs=self.controller_index_np)
+ self.robot_pred.root_state.set(data=self._get_root_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np)
+ self.robot_pred.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._pred_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np)
+
+ # write robot commands
+ self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0,
+ row_index_view=0,
+ n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols,
+ read=False) # jnt state
+ self.robot_cmds.root_state.synch_retry(row_index=self.controller_index, col_index=0,
+ row_index_view=0,
+ n_rows=1, n_cols=self.robot_cmds.root_state.n_cols,
+ read=False) # root state, in case it was written
+ self.robot_cmds.contact_wrenches.synch_retry(row_index=self.controller_index, col_index=0,
+ row_index_view=0,
+ n_rows=1, n_cols=self.robot_cmds.contact_wrenches.n_cols,
+ read=False) # contact state
+
+ # write robot pred
+ self.robot_pred.jnts_state.synch_retry(row_index=self.controller_index, col_index=0,
+ row_index_view=0,
+ n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols,
+ read=False)
+ self.robot_pred.root_state.synch_retry(row_index=self.controller_index, col_index=0,
+ row_index_view=0,
+ n_rows=1, n_cols=self.robot_cmds.root_state.n_cols,
+ read=False)
+
+ # we also fill other data (cost, constr. violation, etc..)
+ self.rhc_status.rhc_cost.write_retry(self._get_rhc_cost(),
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.rhc_status.rhc_constr_viol.write_retry(self._get_rhc_constr_viol(),
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.rhc_status.rhc_n_iter.write_retry(self._get_rhc_niter_to_sol(),
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.rhc_status.rhc_nodes_cost.write_retry(data=self._get_rhc_nodes_cost(),
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.rhc_status.rhc_nodes_constr_viol.write_retry(data=self._get_rhc_nodes_constr_viol(),
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0)
+ self.rhc_status.rhc_fail_idx.write_retry(self._get_failure_index(),
+ row_index=self.controller_index,
+ col_index=0,
+ row_index_view=0) # write idx on shared mem
+
+ def _compute_pred_delta(self):
+
+ # measurements
+ q_full_root_meas = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np)
+ twist_root_meas = self.robot_state.root_state.get(data_type="twist", robot_idxs=self.controller_index_np)
+ a_root_meas = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np)
+ g_vec_root_meas = self.robot_state.root_state.get(data_type="gn", robot_idxs=self.controller_index_np)
+
+ q_jnts_meas = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np)
+ v_jnts_meas = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np)
+ a_jnts_meas = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np)
+ eff_jnts_meas = self.robot_state.jnts_state.get(data_type="eff", robot_idxs=self.controller_index_np)
+
+ # prediction from rhc
+ delta_root_q_full=self._get_root_full_q_from_sol(node_idx=1)-q_full_root_meas
+ delta_root_twist=self._get_root_twist_from_sol(node_idx=1)-twist_root_meas
+ delta_root_a=self._get_root_a_from_sol(node_idx=0)-a_root_meas
+ delta_g_vec=self._get_norm_grav_vector_from_sol(node_idx=0)-g_vec_root_meas
+
+ delta_jnts_q=self._get_jnt_q_from_sol(node_idx=1)-q_jnts_meas
+ delta_jnts_v=self._get_jnt_v_from_sol(node_idx=1)-v_jnts_meas
+ delta_jnts_a=self._get_jnt_a_from_sol(node_idx=0)-a_jnts_meas
+ delta_jnts_eff=self._get_jnt_eff_from_sol(node_idx=0)-eff_jnts_meas
+
+ # writing pred. errors
+ self.rhc_pred_delta.root_state.set(data=delta_root_q_full, data_type="q_full", robot_idxs=self.controller_index_np)
+ self.rhc_pred_delta.root_state.set(data=delta_root_twist,data_type="twist", robot_idxs=self.controller_index_np)
+ self.rhc_pred_delta.root_state.set(data=delta_root_a,data_type="a_full", robot_idxs=self.controller_index_np)
+ self.rhc_pred_delta.root_state.set(data=delta_g_vec, data_type="gn", robot_idxs=self.controller_index_np)
+
+ self.rhc_pred_delta.jnts_state.set(data=delta_jnts_q,data_type="q", robot_idxs=self.controller_index_np)
+ self.rhc_pred_delta.jnts_state.set(data=delta_jnts_v,data_type="v", robot_idxs=self.controller_index_np)
+ self.rhc_pred_delta.jnts_state.set(data=delta_jnts_a,data_type="a", robot_idxs=self.controller_index_np)
+ self.rhc_pred_delta.jnts_state.set(data=delta_jnts_eff, data_type="eff", robot_idxs=self.controller_index_np)
+
+ # write on shared memory
+ self.rhc_pred_delta.jnts_state.synch_retry(row_index=self.controller_index,
+ col_index=0,
+ n_rows=1,
+ row_index_view=0,
+ n_cols=self.robot_cmds.jnts_state.n_cols,
+ read=False) # jnt state
+ self.rhc_pred_delta.root_state.synch_retry(row_index=self.controller_index,
+ col_index=0,
+ n_rows=1,
+ row_index_view=0,
+ n_cols=self.robot_cmds.root_state.n_cols,
+ read=False) # root state
+
+ def _assign_controller_side_jnt_names(self,
+ jnt_names: List[str]):
+
+ self._controller_side_jnt_names = jnt_names
+ self._got_jnt_names_from_controllers = True
+
+ def _check_jnt_names_compatibility(self):
+
+ set_rhc = set(self._controller_side_jnt_names)
+ set_env = set(self._env_side_jnt_names)
+
+ if not set_rhc == set_env:
+ rhc_is_missing=set_env-set_rhc
+ env_is_missing=set_rhc-set_env
+
+ msg_type=LogType.WARN
+ message=""
+ if not len(rhc_is_missing)==0: # allowed
+ message = "\nSome env-side joint names are missing on rhc client-side!\n" + \
+ "RHC-SERVER-SIDE-> \n" + \
+ " ".join(self._env_side_jnt_names) + "\n" +\
+ "RHC-CLIENT-SIDE -> \n" + \
+ " ".join(self._controller_side_jnt_names) + "\n" \
+ "\MISSING -> \n" + \
+ " ".join(list(rhc_is_missing)) + "\n"
+ if not self._allow_less_jnts: # raise exception
+ msg_type=LogType.EXCEP
+
+ if not len(env_is_missing)==0: # not allowed
+ message = "\nSome rhc-side joint names are missing on rhc server-side!\n" + \
+ "RHC-SERVER-SIDE-> \n" + \
+ " ".join(self._env_side_jnt_names) + \
+ "RHC-CLIENT-SIDE -> \n" + \
+ " ".join(self._controller_side_jnt_names) + "\n" \
+ "\nmissing -> \n" + \
+ " ".join(list(env_is_missing))
+ msg_type=LogType.EXCEP
+
+ if msg_type==LogType.WARN and not self._verbose:
+ return
+
+ Journal.log(self._class_name,
+ "_check_jnt_names_compatibility",
+ message,
+ msg_type,
+ throw_when_excep = True)
+
+ def _get_cost_info(self):
+ # to be overridden by child class
+ return None, None
+
+ def _get_constr_info(self):
+ # to be overridden by child class
+ return None, None
+
+ def _get_fail_idx(self):
+ # to be overriden by parent
+ return 0.0
+
+ def _get_failure_index(self):
+ fail_idx=self._get_fail_idx()/self._fail_idx_thresh
+ if fail_idx>1.0:
+ fail_idx=1.0
+ return fail_idx
+
+ def _check_rhc_failure(self):
+ # we use fail idx viol to detect failures
+ idx = self._get_failure_index()
+ return idx>=1.0
+
+ def _update_rhc_internal(self):
+ # data which is not enabled in the config is not actually
+ # written so overhead is minimal for non-enabled data
+ self.rhc_internal.write_q(data= self._get_q_from_sol(),
+ retry=True)
+ self.rhc_internal.write_v(data= self._get_v_from_sol(),
+ retry=True)
+ self.rhc_internal.write_a(data= self._get_a_from_sol(),
+ retry=True)
+ self.rhc_internal.write_a_dot(data= self._get_a_dot_from_sol(),
+ retry=True)
+ self.rhc_internal.write_f(data= self._get_f_from_sol(),
+ retry=True)
+ self.rhc_internal.write_f_dot(data= self._get_f_dot_from_sol(),
+ retry=True)
+ self.rhc_internal.write_eff(data= self._get_eff_from_sol(),
+ retry=True)
+ for cost_idx in range(self.rhc_internal.config.n_costs):
+ # iterate over all costs and update all values
+ cost_name = self.rhc_internal.config.cost_names[cost_idx]
+ self.rhc_internal.write_cost(data= self._get_cost_from_sol(cost_name = cost_name),
+ cost_name = cost_name,
+ retry=True)
+ for constr_idx in range(self.rhc_internal.config.n_constr):
+ # iterate over all constraints and update all values
+ constr_name = self.rhc_internal.config.constr_names[constr_idx]
+ self.rhc_internal.write_constr(data= self._get_constr_from_sol(constr_name=constr_name),
+ constr_name = constr_name,
+ retry=True)
+
+ def _get_contacts(self):
+ contact_names = self._get_contact_names()
+ self._got_contact_names = True
+ return contact_names
+
+ def _get_q_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_v_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_a_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_a_dot_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_f_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_f_dot_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_eff_from_sol(self):
+ # to be overridden by child class
+ return None
+
+ def _get_cost_from_sol(self,
+ cost_name: str):
+ # to be overridden by child class
+ return None
+
+ def _get_constr_from_sol(self,
+ constr_name: str):
+ # to be overridden by child class
+ return None
+
+ @abstractmethod
+ def _reset(self):
+ pass
+
+ @abstractmethod
+ def _init_rhc_task_cmds(self):
+ pass
+
+ @abstractmethod
+ def _get_robot_jnt_names(self):
+ pass
+
+ @abstractmethod
+ def _get_contact_names(self):
+ pass
+
+ @abstractmethod
+ def _get_jnt_q_from_sol(self, node_idx=1) -> np.ndarray:
+ pass
+
+ @abstractmethod
+ def _get_jnt_v_from_sol(self, node_idx=1) -> np.ndarray:
+ pass
+
+ @abstractmethod
+ def _get_jnt_a_from_sol(self, node_idx=0) -> np.ndarray:
+ pass
+
+ @abstractmethod
+ def _get_jnt_eff_from_sol(self, node_idx=0) -> np.ndarray:
+ pass
+
+ @abstractmethod
+ def _get_root_full_q_from_sol(self, node_idx=1) -> np.ndarray:
+ pass
+
+ @abstractmethod
+ def _get_full_q_from_sol(self, node_idx=1) -> np.ndarray:
+ pass
+
+ @abstractmethod
+ def _get_root_twist_from_sol(self, node_idx=1) -> np.ndarray:
+ pass
+
+ @abstractmethod
+ def _get_root_a_from_sol(self, node_idx=0) -> np.ndarray:
+ pass
+
+ def _get_norm_grav_vector_from_sol(self, node_idx=1) -> np.ndarray:
+ rhc_q=self._get_root_full_q_from_sol(node_idx=node_idx)[:, 3:7]
+ world2base_frame(v_w=self._norm_grav_vector_w,q_b=rhc_q,v_out=self._norm_grav_vector_base_loc,
+ is_q_wijk=False)
+ return self._norm_grav_vector_base_loc
+
+ def _get_rhc_cost(self):
+ # to be overridden
+ return np.nan
+
+ def _get_rhc_constr_viol(self):
+ # to be overridden
+ return np.nan
+
+ def _get_rhc_nodes_cost(self):
+ # to be overridden
+ return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype)
+
+ def _get_rhc_nodes_constr_viol(self):
+ # to be overridden
+ return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype)
+
+ def _get_rhc_niter_to_sol(self) -> np.ndarray:
+ # to be overridden
+ return np.nan
+
+ @abstractmethod
+ def _update_open_loop(self):
+ # updates rhc controller
+ # using the internal state
+ pass
+
+ @abstractmethod
+ def _update_closed_loop(self):
+ # uses meas. from robot
+ pass
+
+ @abstractmethod
+ def _solve(self) -> bool:
+ pass
+
+ @abstractmethod
+ def _bootstrap(self) -> bool:
+ pass
+
+ @abstractmethod
+ def _get_ndofs(self):
+ pass
+
+ abstractmethod
+ def _get_robot_mass(self):
+ pass
+
+ @abstractmethod
+ def _init_problem(self):
+ # initialized horizon's TO problem
+ pass
+
+ @abstractmethod
+ def _post_problem_init(self):
+ pass
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/metadata.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9285c7f180c366e71c4a03bf77f55e6528f437ff
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/metadata.yaml
@@ -0,0 +1,68 @@
+rosbag2_bagfile_information:
+ version: 5
+ storage_identifier: sqlite3
+ duration:
+ nanoseconds: 89879882813
+ starting_time:
+ nanoseconds_since_epoch: 140136718
+ message_count: 133027
+ topics_with_message_count:
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_robot_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 19003
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_robot_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 19003
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 19003
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_contacts
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 19003
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_hl_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 19004
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 19005
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 19005
+ - topic_metadata:
+ name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_HandShake
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 1
+ compression_format: zstd
+ compression_mode: FILE
+ relative_file_paths:
+ - rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd
+ files:
+ - path: rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3
+ starting_time:
+ nanoseconds_since_epoch: 140136718
+ duration:
+ nanoseconds: 89879882813
+ message_count: 133027
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3 b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3
new file mode 100644
index 0000000000000000000000000000000000000000..662676248fff7fdfc237f952b0a7397e0618e60f
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8528347c6927e6eadcfbea4365ba2255a3ed3782b9d903dc4431d8b8e185c82c
+size 29442048
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..6a0ad1a9f315490e12adfbf17ee01ea33647662e
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ef90075d478fee30b0d8a14d46983d1c4402b5b7e763eb036eeaa25b7d30d20
+size 2011910
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sac.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sac.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd40a13287ccf6ccda694c218d114ede8d4b9d4b
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sac.py
@@ -0,0 +1,680 @@
+import torch
+import torch.nn as nn
+from torch.distributions.normal import Normal
+import math
+
+from aug_mpc.utils.nn.normalization_utils import RunningNormalizer
+from aug_mpc.utils.nn.layer_utils import llayer_init
+
+from typing import List
+
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+from EigenIPC.PyEigenIPC import VLevel
+
+class SACAgent(nn.Module):
+ def __init__(self,
+ obs_dim: int,
+ actions_dim: int,
+ obs_ub: List[float] = None,
+ obs_lb: List[float] = None,
+ actions_ub: List[float] = None,
+ actions_lb: List[float] = None,
+ rescale_obs: bool = False,
+ norm_obs: bool = True,
+ use_action_rescale_for_critic: bool = True,
+ device:str="cuda",
+ dtype=torch.float32,
+ is_eval:bool=False,
+ load_qf:bool=False,
+ epsilon:float=1e-8,
+ debug:bool=False,
+ compression_ratio:float=-1.0, # > 0; if [0, 1] compression, >1 "expansion"
+ layer_width_actor:int=256,
+ n_hidden_layers_actor:int=2,
+ layer_width_critic:int=512,
+ n_hidden_layers_critic:int=4,
+ torch_compile: bool = False,
+ add_weight_norm: bool = False,
+ add_layer_norm: bool = False,
+ add_batch_norm: bool = False):
+
+ super().__init__()
+
+ self._use_torch_compile=torch_compile
+
+ self._layer_width_actor=layer_width_actor
+ self._layer_width_critic=layer_width_critic
+ self._n_hidden_layers_actor=n_hidden_layers_actor
+ self._n_hidden_layers_critic=n_hidden_layers_critic
+
+ self._obs_dim=obs_dim
+ self._actions_dim=actions_dim
+ self._actions_ub=actions_ub
+ self._actions_lb=actions_lb
+
+ self._add_weight_norm=add_weight_norm
+ self._add_layer_norm=add_layer_norm
+ self._add_batch_norm=add_batch_norm
+
+ self._is_eval=is_eval
+ self._load_qf=load_qf
+
+ self._epsilon=epsilon
+
+ if compression_ratio > 0.0:
+ self._layer_width_actor=int(compression_ratio*obs_dim)
+ self._layer_width_critic=int(compression_ratio*(obs_dim+actions_dim))
+
+ self._normalize_obs = norm_obs
+ self._rescale_obs=rescale_obs
+ if self._rescale_obs and self._normalize_obs:
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Both running normalization and obs rescaling is enabled. Was this intentional?",
+ LogType.WARN,
+ throw_when_excep = True)
+
+ self._use_action_rescale_for_critic=use_action_rescale_for_critic
+
+ self._rescaling_epsi=1e-9
+
+ self._debug = debug
+
+ self._torch_device = device
+ self._torch_dtype = dtype
+
+ # obs scale and bias
+ if obs_ub is None:
+ obs_ub = [1] * obs_dim
+ if obs_lb is None:
+ obs_lb = [-1] * obs_dim
+ if (len(obs_ub) != obs_dim):
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Observations ub list length should be equal to {obs_dim}, but got {len(obs_ub)}",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if (len(obs_lb) != obs_dim):
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Observations lb list length should be equal to {obs_dim}, but got {len(obs_lb)}",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ self._obs_ub = torch.tensor(obs_ub, dtype=self._torch_dtype,
+ device=self._torch_device)
+ self._obs_lb = torch.tensor(obs_lb, dtype=self._torch_dtype,
+ device=self._torch_device)
+ obs_scale = torch.full((obs_dim, ),
+ fill_value=0.0,
+ dtype=self._torch_dtype,
+ device=self._torch_device)
+ obs_scale[:] = (self._obs_ub-self._obs_lb)/2.0
+ self.register_buffer(
+ "obs_scale", obs_scale
+ )
+ obs_bias = torch.full((obs_dim, ),
+ fill_value=0.0,
+ dtype=self._torch_dtype,
+ device=self._torch_device)
+ obs_bias[:] = (self._obs_ub+self._obs_lb)/2.0
+ self.register_buffer(
+ "obs_bias", obs_bias)
+
+ self._build_nets()
+
+ self._init_obs_norm()
+
+ msg=f"Created SAC agent with actor [{self._layer_width_actor}, {self._n_hidden_layers_actor}]\
+ and critic [{self._layer_width_critic}, {self._n_hidden_layers_critic}] sizes.\
+ \n Runningobs normalizer: {type(self.obs_running_norm)} \
+ \n Batch normalization: {self._add_batch_norm} \
+ \n Layer normalization: {self._add_layer_norm} \
+ \n Weight normalization: {self._add_weight_norm} \
+ \n Critic input actions are descaled: {self._use_action_rescale_for_critic}"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ msg,
+ LogType.INFO)
+
+ def _init_obs_norm(self):
+
+ self.obs_running_norm=None
+ if self._normalize_obs:
+ self.obs_running_norm = RunningNormalizer((self._obs_dim,),
+ epsilon=self._epsilon,
+ device=self._torch_device, dtype=self._torch_dtype,
+ freeze_stats=True, # always start with freezed stats
+ debug=self._debug)
+ self.obs_running_norm.type(self._torch_dtype) # ensuring correct dtype for whole module
+
+ def _build_nets(self):
+
+ if self._add_weight_norm:
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Will use weight normalization reparametrization\n",
+ LogType.INFO)
+
+ self.actor=None
+ self.qf1=None
+ self.qf2=None
+ self.qf1_target=None
+ self.qf2_target=None
+
+ self.actor = Actor(obs_dim=self._obs_dim,
+ actions_dim=self._actions_dim,
+ actions_ub=self._actions_ub,
+ actions_lb=self._actions_lb,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ layer_width=self._layer_width_actor,
+ n_hidden_layers=self._n_hidden_layers_actor,
+ add_weight_norm=self._add_weight_norm,
+ add_layer_norm=self._add_layer_norm,
+ add_batch_norm=self._add_batch_norm,
+ )
+
+ if (not self._is_eval) or self._load_qf: # just needed for training or during eval
+ # for debug, if enabled
+ self.qf1 = CriticQ(obs_dim=self._obs_dim,
+ actions_dim=self._actions_dim,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ layer_width=self._layer_width_critic,
+ n_hidden_layers=self._n_hidden_layers_critic,
+ add_weight_norm=self._add_weight_norm,
+ add_layer_norm=self._add_layer_norm,
+ add_batch_norm=self._add_batch_norm)
+ self.qf1_target = CriticQ(obs_dim=self._obs_dim,
+ actions_dim=self._actions_dim,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ layer_width=self._layer_width_critic,
+ n_hidden_layers=self._n_hidden_layers_critic,
+ add_weight_norm=self._add_weight_norm,
+ add_layer_norm=self._add_layer_norm,
+ add_batch_norm=self._add_batch_norm)
+
+ self.qf2 = CriticQ(obs_dim=self._obs_dim,
+ actions_dim=self._actions_dim,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ layer_width=self._layer_width_critic,
+ n_hidden_layers=self._n_hidden_layers_critic,
+ add_weight_norm=self._add_weight_norm,
+ add_layer_norm=self._add_layer_norm,
+ add_batch_norm=self._add_batch_norm)
+ self.qf2_target = CriticQ(obs_dim=self._obs_dim,
+ actions_dim=self._actions_dim,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ layer_width=self._layer_width_critic,
+ n_hidden_layers=self._n_hidden_layers_critic,
+ add_weight_norm=self._add_weight_norm,
+ add_layer_norm=self._add_layer_norm,
+ add_batch_norm=self._add_batch_norm)
+
+ self.qf1_target.load_state_dict(self.qf1.state_dict())
+ self.qf2_target.load_state_dict(self.qf2.state_dict())
+
+ if self._use_torch_compile:
+ self.obs_running_norm=torch.compile(self.obs_running_norm)
+ self.actor = torch.compile(self.actor)
+ if (not self._is_eval) or self._load_qf:
+ self.qf1 = torch.compile(self.qf1)
+ self.qf2 = torch.compile(self.qf2)
+ self.qf1_target = torch.compile(self.qf1_target)
+ self.qf2_target = torch.compile(self.qf2_target)
+
+ def reset(self, reset_stats: bool = False):
+ # we should just reinitialize the parameters, but for easiness
+ # we recreate the networks
+
+ # force deallocation of objects
+ import gc
+ del self.actor
+ del self.qf1
+ del self.qf2
+ del self.qf1_target
+ del self.qf2_target
+ gc.collect()
+
+ self._build_nets()
+
+ if reset_stats: # we also reinitialize obs norm
+ self._init_obs_norm()
+
+ # self.obs_running_norm.reset()
+
+ def layer_width_actor(self):
+ return self._layer_width_actor
+
+ def n_hidden_layers_actor(self):
+ return self._n_hidden_layers_actor
+
+ def layer_width_critic(self):
+ return self._layer_width_critic
+
+ def n_hidden_layers_critic(self):
+ return self._n_hidden_layers_critic
+
+ def get_impl_path(self):
+ import os
+ return os.path.abspath(__file__)
+
+ def update_obs_bnorm(self, x):
+ self.obs_running_norm.unfreeze()
+ self.obs_running_norm.manual_stat_update(x)
+ self.obs_running_norm.freeze()
+
+ def _obs_scaling_layer(self, x):
+ x=(x-self.obs_bias)
+ x=x/(self.obs_scale+self._rescaling_epsi)
+ return x
+
+ def _preprocess_obs(self, x):
+ if self._rescale_obs:
+ x = self._obs_scaling_layer(x)
+ if self.obs_running_norm is not None:
+ x = self.obs_running_norm(x)
+ return x
+
+ def _preprocess_actions(self, a):
+ if self._use_action_rescale_for_critic:
+ a=self.actor.remove_scaling(a=a) # rescale to be in range [-1, 1]
+ return a
+
+ def get_action(self, x):
+ x = self._preprocess_obs(x)
+ return self.actor.get_action(x)
+
+ def get_qf1_val(self, x, a):
+ x = self._preprocess_obs(x)
+ a = self._preprocess_actions(a)
+ return self.qf1(x, a)
+
+ def get_qf2_val(self, x, a):
+ x = self._preprocess_obs(x)
+ a = self._preprocess_actions(a)
+ return self.qf2(x, a)
+
+ def get_qf1t_val(self, x, a):
+ x = self._preprocess_obs(x)
+ a = self._preprocess_actions(a)
+ return self.qf1_target(x, a)
+
+ def get_qf2t_val(self, x, a):
+ x = self._preprocess_obs(x)
+ a = self._preprocess_actions(a)
+ return self.qf2_target(x, a)
+
+ def load_state_dict(self, param_dict):
+
+ missing, unexpected = super().load_state_dict(param_dict,
+ strict=False)
+ if not len(missing)==0:
+ Journal.log(self.__class__.__name__,
+ "load_state_dict",
+ f"These parameters are missing from the provided state dictionary: {str(missing)}\n",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not len(unexpected)==0:
+ Journal.log(self.__class__.__name__,
+ "load_state_dict",
+ f"These parameters present in the provided state dictionary are not needed: {str(unexpected)}\n",
+ LogType.WARN)
+
+ # sanity check on running normalizer
+ import re
+ running_norm_pattern = r"running_norm"
+ error=f"Found some keys in model state dictionary associated with a running normalizer. Are you running the agent with norm_obs=True?\n"
+ if any(re.match(running_norm_pattern, key) for key in unexpected):
+ Journal.log(self.__class__.__name__,
+ "load_state_dict",
+ error,
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+class CriticQ(nn.Module):
+ def __init__(self,
+ obs_dim: int,
+ actions_dim: int,
+ device: str = "cuda",
+ dtype = torch.float32,
+ layer_width: int = 512,
+ n_hidden_layers: int = 4,
+ add_weight_norm: bool = False,
+ add_layer_norm: bool = False,
+ add_batch_norm: bool = False):
+
+ super().__init__()
+
+ self._lrelu_slope=0.01
+
+ self._torch_device = device
+ self._torch_dtype = dtype
+
+ self._obs_dim = obs_dim
+ self._actions_dim = actions_dim
+ self._q_net_dim = self._obs_dim + self._actions_dim
+
+ self._first_hidden_layer_width=self._q_net_dim # fist layer fully connected and of same dim
+ # as input
+
+ init_type="kaiming_uniform" # maintains the variance of activations throughout the network
+ nonlinearity="leaky_relu" # suited for kaiming
+
+ # Input layer
+ layers=llayer_init(
+ layer=nn.Linear(self._q_net_dim, self._first_hidden_layer_width),
+ init_type=init_type,
+ nonlinearity=nonlinearity,
+ a_leaky_relu=self._lrelu_slope,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=add_weight_norm,
+ add_layer_norm=add_layer_norm,
+ add_batch_norm=add_batch_norm,
+ uniform_biases=False, # constant bias init
+ bias_const=0.0
+ )
+ layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+ # Hidden layers
+ layers.extend(
+ llayer_init(
+ layer=nn.Linear(self._first_hidden_layer_width, layer_width),
+ init_type=init_type,
+ nonlinearity=nonlinearity,
+ a_leaky_relu=self._lrelu_slope,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=add_weight_norm,
+ add_layer_norm=add_layer_norm,
+ add_batch_norm=add_batch_norm,
+ uniform_biases=False, # constant bias init
+ bias_const=0.0
+ )
+ )
+ layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+ for _ in range(n_hidden_layers - 1):
+ layers.extend(
+ llayer_init(
+ layer=nn.Linear(layer_width, layer_width),
+ init_type=init_type,
+ nonlinearity=nonlinearity,
+ a_leaky_relu=self._lrelu_slope,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=add_weight_norm,
+ add_layer_norm=add_layer_norm,
+ add_batch_norm=add_batch_norm,
+ uniform_biases=False, # constant bias init
+ bias_const=0.0
+ )
+ )
+ layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+ # Output layer
+ layers.extend(
+ llayer_init(
+ layer=nn.Linear(layer_width, 1),
+ init_type="uniform",
+ uniform_biases=False, # contact biases
+ bias_const=-0.1, # negative to prevent overestimation
+ scale_weight=1e-2,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=False,
+ add_layer_norm=False,
+ add_batch_norm=False
+ )
+ )
+
+ # Creating the full sequential network
+ self._q_net = nn.Sequential(*layers)
+ self._q_net.to(self._torch_device).type(self._torch_dtype)
+
+ print("Critic architecture")
+ print(self._q_net)
+
+ def get_n_params(self):
+ return sum(p.numel() for p in self.parameters())
+
+ def forward(self, x, a):
+ x = torch.cat([x, a], dim=1)
+ return self._q_net(x)
+
+class Actor(nn.Module):
+ def __init__(self,
+ obs_dim: int,
+ actions_dim: int,
+ actions_ub: List[float] = None,
+ actions_lb: List[float] = None,
+ device: str = "cuda",
+ dtype = torch.float32,
+ layer_width: int = 256,
+ n_hidden_layers: int = 2,
+ add_weight_norm: bool = False,
+ add_layer_norm: bool = False,
+ add_batch_norm: bool = False):
+
+ super().__init__()
+
+ self._lrelu_slope=0.01
+
+ self._torch_device = device
+ self._torch_dtype = dtype
+
+ self._obs_dim = obs_dim
+ self._actions_dim = actions_dim
+
+ self._first_hidden_layer_width=self._obs_dim # fist layer fully connected and of same dim
+
+ # Action scale and bias
+ if actions_ub is None:
+ actions_ub = [1] * actions_dim
+ if actions_lb is None:
+ actions_lb = [-1] * actions_dim
+ if (len(actions_ub) != actions_dim):
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Actions ub list length should be equal to {actions_dim}, but got {len(actions_ub)}",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if (len(actions_lb) != actions_dim):
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Actions lb list length should be equal to {actions_dim}, but got {len(actions_lb)}",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ self._actions_ub = torch.tensor(actions_ub, dtype=self._torch_dtype,
+ device=self._torch_device)
+ self._actions_lb = torch.tensor(actions_lb, dtype=self._torch_dtype,
+ device=self._torch_device)
+ action_scale = torch.full((actions_dim, ),
+ fill_value=0.0,
+ dtype=self._torch_dtype,
+ device=self._torch_device)
+ action_scale[:] = (self._actions_ub-self._actions_lb)/2.0
+ self.register_buffer(
+ "action_scale", action_scale
+ )
+ actions_bias = torch.full((actions_dim, ),
+ fill_value=0.0,
+ dtype=self._torch_dtype,
+ device=self._torch_device)
+ actions_bias[:] = (self._actions_ub+self._actions_lb)/2.0
+ self.register_buffer(
+ "action_bias", actions_bias)
+
+ # Network configuration
+ self.LOG_STD_MAX = 2
+ self.LOG_STD_MIN = -5
+
+ # Input layer followed by hidden layers
+ layers=llayer_init(nn.Linear(self._obs_dim, self._first_hidden_layer_width),
+ init_type="kaiming_uniform",
+ nonlinearity="leaky_relu",
+ a_leaky_relu=self._lrelu_slope,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=add_weight_norm,
+ add_layer_norm=add_layer_norm,
+ add_batch_norm=add_batch_norm,
+ uniform_biases=False, # constant bias init
+ bias_const=0.0
+ )
+ layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+ # Hidden layers
+ layers.extend(
+ llayer_init(nn.Linear(self._first_hidden_layer_width, layer_width),
+ init_type="kaiming_uniform",
+ nonlinearity="leaky_relu",
+ a_leaky_relu=self._lrelu_slope,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=add_weight_norm,
+ add_layer_norm=add_layer_norm,
+ add_batch_norm=add_batch_norm,
+ uniform_biases=False, # constant bias init
+ bias_const=0.0)
+ )
+ layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+ for _ in range(n_hidden_layers - 1):
+ layers.extend(
+ llayer_init(nn.Linear(layer_width, layer_width),
+ init_type="kaiming_uniform",
+ nonlinearity="leaky_relu",
+ a_leaky_relu=self._lrelu_slope,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=add_weight_norm,
+ add_layer_norm=add_layer_norm,
+ add_batch_norm=add_batch_norm,
+ uniform_biases=False, # constant bias init
+ bias_const=0.0)
+ )
+ layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+ # Sequential layers for the feature extractor
+ self._fc12 = nn.Sequential(*layers)
+
+ # Mean and log_std layers
+ out_fc_mean=llayer_init(nn.Linear(layer_width, self._actions_dim),
+ init_type="uniform",
+ uniform_biases=False, # constant bias init
+ bias_const=0.0,
+ scale_weight=1e-3, # scaling (output layer)
+ scale_bias=1.0,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=False,
+ add_layer_norm=False,
+ add_batch_norm=False
+ )
+ self.fc_mean = nn.Sequential(*out_fc_mean)
+ out_fc_logstd= llayer_init(nn.Linear(layer_width, self._actions_dim),
+ init_type="uniform",
+ uniform_biases=False,
+ bias_const=math.log(0.5),
+ scale_weight=1e-3, # scaling (output layer)
+ scale_bias=1.0,
+ device=self._torch_device,
+ dtype=self._torch_dtype,
+ add_weight_norm=False,
+ add_layer_norm=False,
+ add_batch_norm=False,
+ )
+ self.fc_logstd = nn.Sequential(*out_fc_logstd)
+
+ # Move all components to the specified device and dtype
+ self._fc12.to(device=self._torch_device, dtype=self._torch_dtype)
+ self.fc_mean.to(device=self._torch_device, dtype=self._torch_dtype)
+ self.fc_logstd.to(device=self._torch_device, dtype=self._torch_dtype)
+
+ print("Actor architecture")
+ print(self._fc12)
+ print(self.fc_mean)
+ print(self.fc_logstd)
+
+ def get_n_params(self):
+ return sum(p.numel() for p in self.parameters())
+
+ def forward(self, x):
+ x = self._fc12(x)
+ mean = self.fc_mean(x)
+ log_std = self.fc_logstd(x)
+ log_std = torch.tanh(log_std)
+ log_std = self.LOG_STD_MIN + 0.5 * (self.LOG_STD_MAX - self.LOG_STD_MIN) * (log_std + 1) # From SpinUp / Denis Yarats
+ return mean, log_std
+
+ def get_action(self, x):
+ mean, log_std = self(x)
+ std = log_std.exp()
+ normal = torch.distributions.Normal(mean, std)
+ x_t = normal.rsample() # Reparameterization trick (for SAC we neex action
+ # to be differentible since we use Q nets. Using sample() would break the
+ # comp. graph and not allow gradients to flow)
+ y_t = torch.tanh(x_t)
+ action = y_t * self.action_scale + self.action_bias
+ log_prob_vec = normal.log_prob(x_t) # per-dimension log prob before tanh
+ log_prob_vec = log_prob_vec - torch.log(self.action_scale * (1 - y_t.pow(2)) + 1e-6) # tanh Jacobian + scaling
+ log_prob_sum = log_prob_vec.sum(1, keepdim=True)
+ mean = torch.tanh(mean) * self.action_scale + self.action_bias
+ return action, (log_prob_sum, log_prob_vec), mean
+
+ def remove_scaling(self, a):
+ return (a - self.action_bias)/self.action_scale
+
+if __name__ == "__main__":
+ device = "cpu" # or "cpu"
+ import time
+ obs_dim = 273
+ agent = SACAgent(
+ obs_dim=obs_dim,
+ actions_dim=10,
+ actions_lb=None,
+ actions_ub=None,
+ obs_lb=None,
+ obs_ub=None,
+ rescale_obs=False,
+ norm_obs=True,
+ use_action_rescale_for_critic=True,
+ is_eval=True,
+ compression_ratio=0.6,
+ layer_width_actor=128,
+ layer_width_critic=256,
+ n_hidden_layers_actor=3,
+ n_hidden_layers_critic=3,
+ device=device,
+ dtype=torch.float32,
+ add_weight_norm=True,
+ add_layer_norm=False,
+ add_batch_norm=False
+ )
+
+ n_samples = 10000
+ random_obs = torch.rand((1, obs_dim), dtype=torch.float32, device=device)
+
+ if device == "cuda":
+ torch.cuda.synchronize()
+ start = time.time()
+
+ for i in range(n_samples):
+ actions, _, mean = agent.get_action(x=random_obs)
+ actions = actions.detach()
+ actions[:, :] = mean.detach()
+
+ if device == "cuda":
+ torch.cuda.synchronize()
+ end = time.time()
+
+ avrg_eval_time = (end - start) / n_samples
+ print(f"Average policy evaluation time on {device}: {avrg_eval_time:.6f} s")
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sactor_critic_algo.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sactor_critic_algo.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a578cd8bbb4404b39ac47ebd3f178728bbef08f
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sactor_critic_algo.py
@@ -0,0 +1,2397 @@
+from aug_mpc.agents.sactor_critic.sac import SACAgent
+from aug_mpc.agents.dummies.dummy import DummyAgent
+
+from aug_mpc.utils.shared_data.algo_infos import SharedRLAlgorithmInfo, QfVal, QfTrgt
+from aug_mpc.utils.shared_data.training_env import SubReturns, TotReturns
+from aug_mpc.utils.model_bundle import write_bundle_manifest
+from aug_mpc.utils.nn.rnd import RNDFull
+
+import torch
+import torch.optim as optim
+import torch.nn as nn
+
+import random
+import math
+from typing import Dict
+
+import os
+import shutil
+
+import time
+
+import wandb
+import h5py
+import numpy as np
+
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+from EigenIPC.PyEigenIPC import VLevel
+
+from abc import ABC, abstractmethod
+
+class SActorCriticAlgoBase(ABC):
+
+ # base class for actor-critic RL algorithms
+
+ def __init__(self,
+ env,
+ debug = False,
+ remote_db = False,
+ seed: int = 1):
+
+ self._env = env
+ self._seed = seed
+
+ self._eval = False
+ self._det_eval = True
+
+ self._full_env_db=False
+
+ self._agent = None
+
+ self._debug = debug
+ self._remote_db = remote_db
+
+ self._writer = None
+
+ self._run_name = None
+ self._drop_dir = None
+ self._dbinfo_drop_fname = None
+ self._model_path = None
+
+ self._policy_update_db_data_dict = {}
+ self._custom_env_data_db_dict = {}
+ self._rnd_db_data_dict = {}
+ self._hyperparameters = {}
+ self._wandb_d={}
+
+ # get params from env
+ self._get_params_from_env()
+
+ self._torch_device = torch.device("cpu") # defaults to cpu
+
+ self._setup_done = False
+
+ self._verbose = False
+
+ self._is_done = False
+
+ self._shared_algo_data = None
+
+ self._this_child_path = None
+ self._this_basepath = os.path.abspath(__file__)
+
+ def __del__(self):
+
+ self.done()
+
+ def _get_params_from_env(self):
+
+ self._env_name = self._env.name()
+ self._episodic_reward_metrics = self._env.ep_rewards_metrics()
+ self._use_gpu = self._env.using_gpu()
+ self._dtype = self._env.dtype()
+ self._env_opts=self._env.env_opts()
+ self._num_envs = self._env.n_envs()
+ self._obs_dim = self._env.obs_dim()
+ self._actions_dim = self._env.actions_dim()
+ self._episode_timeout_lb, self._episode_timeout_ub = self._env.episode_timeout_bounds()
+ self._task_rand_timeout_lb, self._task_rand_timeout_ub = self._env.task_rand_timeout_bounds()
+ self._env_n_action_reps = self._env.n_action_reps()
+ self._is_continuous_actions_bool=self._env.is_action_continuous()
+ self._is_continuous_actions=torch.where(self._is_continuous_actions_bool)[0]
+ self._is_discrete_actions_bool=self._env.is_action_discrete()
+ self._is_discrete_actions=torch.where(self._is_discrete_actions_bool)[0]
+
+ # default to all debug envs
+ self._db_env_selector=torch.tensor(list(range(0, self._num_envs)), dtype=torch.int)
+ self._db_env_selector_bool=torch.full((self._num_envs, ),
+ dtype=torch.bool, device="cpu",
+ fill_value=True)
+ # default to no expl envs
+ self._expl_env_selector=None
+ self._expl_env_selector_bool=torch.full((self._num_envs, ), dtype=torch.bool, device="cpu",
+ fill_value=False)
+ self._pert_counter=0.0
+ # demo envs
+ self._demo_stop_thresh=None # performance metrics above which demo envs are deactivated
+ # (can be overridden thorugh the provided options)
+ self._demo_env_selector=self._env.demo_env_idxs()
+ self._demo_env_selector_bool=self._env.demo_env_idxs(get_bool=True)
+
+ def learn(self):
+
+ if not self._setup_done:
+ self._should_have_called_setup()
+
+ self._start_time = time.perf_counter()
+
+ # experience collection
+ with torch.no_grad(): # don't need grad computation here
+ for i in range(self._collection_freq):
+ if not self._collect_transition():
+ return False
+ self._vec_transition_counter+=1
+
+ self._collection_t = time.perf_counter()
+
+ if self._vec_transition_counter % self._bnorm_vecfreq == 0:
+ with torch.no_grad(): # don't need grad computation here
+ self._update_batch_norm(bsize=self._bnorm_bsize)
+
+ # policy update
+ self._policy_update_t_start = time.perf_counter()
+ for i in range(self._update_freq):
+ self._update_policy()
+ self._update_counter+=1
+
+ self._policy_update_t = time.perf_counter()
+
+ with torch.no_grad():
+ if self._validate and (self._vec_transition_counter % self._validation_db_vecstep_freq == 0):
+ # validation
+ self._update_validation_losses()
+ self._validation_t = time.perf_counter()
+ self._post_step()
+
+ if self._use_period_resets:
+ # periodic policy resets
+ if not self._adaptive_resets:
+ self._periodic_resets_on=(self._vec_transition_counter >= self._reset_vecstep_start) and \
+ (self._vec_transition_counter < self._reset_vecstep_end)
+
+ if self._periodic_resets_on and \
+ (self._vec_transition_counter-self._reset_vecstep_start) % self._periodic_resets_vecfreq == 0:
+ self._reset_agent()
+ else: # trigger reset based on overfit metric
+ if self._overfit_idx > self._overfit_idx_thresh:
+ self._reset_agent()
+
+ return not self.is_done()
+
+ def eval(self):
+
+ if not self._setup_done:
+ self._should_have_called_setup()
+
+ self._start_time = time.perf_counter()
+
+ if not self._collect_eval_transition():
+ return False
+ self._vec_transition_counter+=1
+
+ self._collection_t = time.perf_counter()
+
+ self._post_step()
+
+ return not self.is_done()
+
+ @abstractmethod
+ def _collect_transition(self)->bool:
+ pass
+
+ @abstractmethod
+ def _collect_eval_transition(self)->bool:
+ pass
+
+ @abstractmethod
+ def _update_policy(self):
+ pass
+
+ @abstractmethod
+ def _update_validation_losses(self):
+ pass
+
+ def _update_overfit_idx(self, loss, val_loss):
+ overfit_now=(val_loss-loss)/loss
+ self._overfit_idx=self._overfit_idx_alpha*overfit_now+\
+ (1-self._overfit_idx_alpha)*self._overfit_idx
+
+ def setup(self,
+ run_name: str,
+ ns: str,
+ custom_args: Dict = {},
+ verbose: bool = False,
+ drop_dir_name: str = None,
+ eval: bool = False,
+ resume: bool = False,
+ model_path: str = None,
+ n_eval_timesteps: int = None,
+ comment: str = "",
+ dump_checkpoints: bool = False,
+ norm_obs: bool = True,
+ rescale_obs: bool = False):
+
+ tot_tsteps=int(100e6)
+ if "tot_tsteps" in custom_args:
+ tot_tsteps=custom_args["tot_tsteps"]
+
+ self._verbose = verbose
+
+ self._ns=ns # only used for shared mem stuff
+
+ self._dump_checkpoints = dump_checkpoints
+
+ self._init_algo_shared_data(static_params=self._hyperparameters) # can only handle dicts with
+ # numeric values
+
+ if "full_env_db" in custom_args:
+ self._full_env_db=custom_args["full_env_db"]
+
+ self._eval = eval
+ self._resume=resume
+ if self._eval and self._resume:
+ Journal.log(self.__class__.__name__,
+ "setup",
+ f"Cannot set both eval and resume to true. Exiting.",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ self._load_qf=False
+ if self._eval:
+ if "load_qf" in custom_args:
+ self._load_qf=custom_args["load_qf"]
+ if self._resume:
+ self._load_qf=True # must load qf when resuming
+ self._eval=False
+ try:
+ self._det_eval=custom_args["det_eval"]
+ except:
+ pass
+
+ self._override_agent_actions=False
+ if "override_agent_actions" in custom_args:
+ self._override_agent_actions=custom_args["override_agent_actions"]
+
+ if self._override_agent_actions: # force evaluation mode
+ Journal.log(self.__class__.__name__,
+ "setup",
+ "will force evaluation mode since override_agent_actions was set to true",
+ LogType.INFO,
+ throw_when_excep = True)
+ self._eval=True
+ self._validate=False
+ self._load_qf=False
+ self._det_eval=False
+ self._resume=False
+
+ self._run_name = run_name
+ from datetime import datetime
+ self._time_id = datetime.now().strftime('d%Y_%m_%d_h%H_m%M_s%S')
+ self._unique_id = self._time_id + "-" + self._run_name
+
+ self._hyperparameters["unique_run_id"]=self._unique_id
+ self._hyperparameters.update(custom_args)
+
+ self._torch_device = torch.device("cuda" if torch.cuda.is_available() and self._use_gpu else "cpu")
+
+ try:
+ layer_width_actor=self._hyperparameters["actor_lwidth"]
+ layer_width_critic=self._hyperparameters["critic_lwidth"]
+ n_hidden_layers_actor=self._hyperparameters["actor_n_hlayers"]
+ n_hidden_layers_critic=self._hyperparameters["critic_n_hlayers"]
+ except:
+ layer_width_actor=256
+ layer_width_critic=512
+ n_hidden_layers_actor=2
+ n_hidden_layers_critic=4
+ pass
+
+ use_torch_compile=False
+ add_weight_norm=False
+ add_layer_norm=False
+ add_batch_norm=False
+ compression_ratio=-1.0
+ if "use_torch_compile" in self._hyperparameters and \
+ self._hyperparameters["use_torch_compile"]:
+ use_torch_compile=True
+ if "add_weight_norm" in self._hyperparameters and \
+ self._hyperparameters["add_weight_norm"]:
+ add_weight_norm=True
+ if "add_layer_norm" in self._hyperparameters and \
+ self._hyperparameters["add_layer_norm"]:
+ add_layer_norm=True
+ if "add_batch_norm" in self._hyperparameters and \
+ self._hyperparameters["add_batch_norm"]:
+ add_batch_norm=True
+ if "compression_ratio" in self._hyperparameters:
+ compression_ratio=self._hyperparameters["compression_ratio"]
+
+ act_rescale_critic=False
+ if "act_rescale_critic" in self._hyperparameters:
+ act_rescale_critic=self._hyperparameters["act_rescale_critic"]
+ if not self._override_agent_actions:
+ self._agent = SACAgent(obs_dim=self._env.obs_dim(),
+ obs_ub=self._env.get_obs_ub().flatten().tolist(),
+ obs_lb=self._env.get_obs_lb().flatten().tolist(),
+ actions_dim=self._env.actions_dim(),
+ actions_ub=None, # agent will assume actions are properly normalized in [-1, 1] by the env
+ actions_lb=None,
+ rescale_obs=rescale_obs,
+ norm_obs=norm_obs,
+ use_action_rescale_for_critic=act_rescale_critic,
+ compression_ratio=compression_ratio,
+ device=self._torch_device,
+ dtype=self._dtype,
+ is_eval=self._eval,
+ load_qf=self._load_qf,
+ debug=self._debug,
+ layer_width_actor=layer_width_actor,
+ layer_width_critic=layer_width_critic,
+ n_hidden_layers_actor=n_hidden_layers_actor,
+ n_hidden_layers_critic=n_hidden_layers_critic,
+ torch_compile=use_torch_compile,
+ add_weight_norm=add_weight_norm,
+ add_layer_norm=add_layer_norm,
+ add_batch_norm=add_batch_norm)
+ else: # we use a fake agent
+ self._agent = DummyAgent(obs_dim=self._env.obs_dim(),
+ actions_dim=self._env.actions_dim(),
+ actions_ub=None,
+ actions_lb=None,
+ device=self._torch_device,
+ dtype=self._dtype,
+ debug=self._debug)
+
+ # loging actual widths and layers in case they were override inside agent init
+ self._hyperparameters["actor_lwidth_actual"]=self._agent.layer_width_actor()
+ self._hyperparameters["actor_n_hlayers_actual"]=self._agent.n_hidden_layers_actor()
+ self._hyperparameters["critic_lwidth_actual"]=self._agent.layer_width_critic()
+ self._hyperparameters["critic_n_hlayers_actual"]=self._agent.n_hidden_layers_critic()
+
+ # load model if necessary
+ if self._eval and (not self._override_agent_actions): # load pretrained model
+ if model_path is None:
+ msg = f"No model path provided in eval mode! Was this intentional? \
+ No jnt remapping will be available and a randomly init agent will be used."
+ Journal.log(self.__class__.__name__,
+ "setup",
+ msg,
+ LogType.WARN,
+ throw_when_excep = True)
+ if n_eval_timesteps is None:
+ Journal.log(self.__class__.__name__,
+ "setup",
+ f"When eval is True, n_eval_timesteps should be provided!!",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ # everything is ok
+ self._model_path = model_path
+ if self._model_path is not None:
+ self._load_model(self._model_path)
+
+ # overwrite init params
+ self._init_params(tot_tsteps=n_eval_timesteps,
+ custom_args=custom_args)
+ else:
+ if self._resume:
+ if model_path is None:
+ msg = f"No model path provided in resume mode! Please provide a valid checkpoint path."
+ Journal.log(self.__class__.__name__,
+ "setup",
+ msg,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ self._model_path = model_path
+ if self._model_path is not None:
+ self._load_model(self._model_path) # load model from checkpoint (including q functions and running normalizers)
+ self._init_params(tot_tsteps=tot_tsteps,
+ custom_args=custom_args)
+
+ # adding additional db info
+ self._hyperparameters["obs_names"]=self._env.obs_names()
+ self._hyperparameters["action_names"]=self._env.action_names()
+ self._hyperparameters["sub_reward_names"]=self._env.sub_rew_names()
+ self._hyperparameters["sub_trunc_names"]=self._env.sub_trunc_names()
+ self._hyperparameters["sub_term_names"]=self._env.sub_term_names()
+
+ self._allow_expl_during_eval=False
+ if "allow_expl_during_eval" in self._hyperparameters:
+ self._allow_expl_during_eval=self._hyperparameters["allow_expl_during_eval"]
+
+ # reset environment
+ self._env.reset()
+ if self._eval:
+ self._env.switch_random_reset(on=False)
+
+ if self._debug and (not self._override_agent_actions):
+ with torch.no_grad():
+ init_obs = self._env.get_obs(clone=True)
+ _, init_log_pi, _ = self._agent.get_action(init_obs)
+ init_policy_entropy = (-init_log_pi[0]).mean().item()
+ init_policy_entropy_per_action = init_policy_entropy / float(self._actions_dim)
+ Journal.log(self.__class__.__name__,
+ "setup",
+ f"Initial policy entropy per action: {init_policy_entropy_per_action:.4f})",
+ LogType.INFO,
+ throw_when_excep = True)
+
+ # create dump directory + copy important files for debug
+ self._init_drop_dir(drop_dir_name)
+ self._hyperparameters["drop_dir"]=self._drop_dir
+
+ # add env options to hyperparameters
+ self._hyperparameters.update(self._env_opts)
+
+ if not self._eval:
+ self._init_agent_optimizers()
+
+ self._init_replay_buffers() # only needed when training
+ if self._validate:
+ self._init_validation_buffers()
+
+ if self._autotune:
+ self._init_alpha_autotuning()
+
+ if self._use_rnd:
+ self._rnd_net = RNDFull(input_dim=self._rnd_indim, output_dim=self._rnd_outdim,
+ layer_width=self._rnd_lwidth, n_hidden_layers=self._rnd_hlayers,
+ device=self._torch_device,
+ dtype=self._dtype,
+ normalize=norm_obs # normalize if also used for SAC agent
+ )
+ self._rnd_optimizer = torch.optim.Adam(self._rnd_net.rnd_predictor_net.parameters(),
+ lr=self._rnd_lr)
+
+ self._rnd_input = torch.full(size=(self._batch_size, self._rnd_net.input_dim()),
+ fill_value=0.0,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._rnd_bnorm_input = torch.full(size=(self._bnorm_bsize, self._rnd_net.input_dim()),
+ fill_value=0.0,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+
+ self._proc_exp_bonus_all = torch.full(size=(self._batch_size, 1),
+ fill_value=0.0,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._raw_exp_bonus_all = torch.full(size=(self._batch_size, 1),
+ fill_value=0.0,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ # if self._autotune_rnd_scale:
+ # self._reward_normalizer=RunningNormalizer((1,), epsilon=1e-8,
+ # device=self._torch_device, dtype=self._dtype,
+ # freeze_stats=False,
+ # debug=self._debug)
+
+ self._init_dbdata()
+
+ if (self._debug):
+ if self._remote_db:
+ job_type = "evaluation" if self._eval else "training"
+ project="IBRIDO-ablations"
+ wandb.init(
+ project=project,
+ group=self._run_name,
+ name=self._unique_id,
+ id=self._unique_id,
+ job_type=job_type,
+ # tags=None,
+ notes=comment,
+ resume="never", # do not allow runs with the same unique id
+ mode="online", # "online", "offline" or "disabled"
+ entity=None,
+ sync_tensorboard=True,
+ config=self._hyperparameters,
+ monitor_gym=True,
+ save_code=True,
+ dir=self._drop_dir
+ )
+ wandb.watch((self._agent), log="all", log_freq=1000, log_graph=False)
+
+ if "demo_stop_thresh" in self._hyperparameters:
+ self._demo_stop_thresh=self._hyperparameters["demo_stop_thresh"]
+
+ actions = self._env.get_actions()
+ self._random_uniform = torch.full_like(actions, fill_value=0.0) # used for sampling random actions (preallocated
+ # for efficiency)
+ self._random_normal = torch.full_like(self._random_uniform,fill_value=0.0)
+ # for efficiency)
+
+ self._actions_override=None
+ if self._override_agent_actions:
+ from aug_mpc.utils.shared_data.training_env import Actions
+ self._actions_override = Actions(namespace=ns+"_override",
+ n_envs=self._num_envs,
+ action_dim=actions.shape[1],
+ action_names=self._env.action_names(),
+ env_names=None,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=0.0)
+ self._actions_override.run()
+
+ self._start_time_tot = time.perf_counter()
+
+ self._start_time = time.perf_counter()
+
+ self._replay_bf_full = False
+ self._validation_bf_full = False
+ self._bpos=0
+ self._bpos_val=0
+
+ self._is_done = False
+ self._setup_done = True
+
+ def is_done(self):
+
+ return self._is_done
+
+ def model_path(self):
+
+ return self._model_path
+
+ def _init_params(self,
+ tot_tsteps: int,
+ custom_args: Dict = {}):
+
+ self._collection_freq=1
+ self._update_freq=4
+
+ self._replay_buffer_size_vec=10*self._task_rand_timeout_ub # cover at least a number of eps
+ self._replay_buffer_size = self._replay_buffer_size_vec*self._num_envs
+ if self._replay_buffer_size_vec < 0: # in case env did not properly define _task_rand_timeout_ub
+ self._replay_buffer_size = int(1e6)
+ self._replay_buffer_size_vec = self._replay_buffer_size//self._num_envs
+ self._replay_buffer_size=self._replay_buffer_size_vec*self._num_envs
+
+ self._batch_size = 16394
+
+ new_transitions_per_batch=self._collection_freq*self._num_envs/self._replay_buffer_size # assumes uniform sampling
+ self._utd_ratio=self._update_freq/(new_transitions_per_batch*self._batch_size)
+
+ self._lr_policy = 1e-3
+ self._lr_q = 5e-4
+
+ self._discount_factor = 0.99
+ if "discount_factor" in custom_args:
+ self._discount_factor=custom_args["discount_factor"]
+
+ self._smoothing_coeff = 0.01
+
+ self._policy_freq = 2
+ self._trgt_net_freq = 1
+ self._rnd_freq = 1
+
+ # exploration
+
+ # entropy regularization (separate "discrete" and "continuous" actions)
+ self._entropy_metric_high = 0.5
+ self._entropy_metric_low = 0.0
+
+ # self._entropy_disc_start = -0.05
+ # self._entropy_disc_end = -0.5
+
+ # self._entropy_cont_start = -0.05
+ # self._entropy_cont_end = -2.0
+
+ self._entropy_disc_start = -0.3
+ self._entropy_disc_end = -0.3
+
+ self._entropy_cont_start = -0.5
+ self._entropy_cont_end = -0.5
+
+ # enable/disable entropy annealing (default: enabled)
+ self._anneal_entropy = False
+
+ self._trgt_avrg_entropy_per_action_disc = self._entropy_disc_start
+ self._trgt_avrg_entropy_per_action_cont = self._entropy_cont_start
+
+ self._disc_idxs = self._is_discrete_actions.clone().to(torch.long)
+ self._cont_idxs = self._is_continuous_actions.clone().to(torch.long)
+
+ self._target_entropy_disc = float(self._disc_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_disc)
+ self._target_entropy_cont = float(self._cont_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_cont)
+ self._target_entropy = self._target_entropy_disc + self._target_entropy_cont
+ self._trgt_avrg_entropy_per_action = self._target_entropy / float(max(self._actions_dim, 1))
+ self._hyperparameters["anneal_entropy"] = self._anneal_entropy
+
+ self._autotune = True
+ self._alpha_disc = 0.2 # initial values
+ self._alpha_cont = 0.2
+ self._alpha = 0.5*(self._alpha_disc + self._alpha_cont)
+ self._log_alpha_disc = math.log(self._alpha_disc)
+ self._log_alpha_cont = math.log(self._alpha_cont)
+ self._a_optimizer_disc = None
+ self._a_optimizer_cont = None
+
+ # random expl ens
+ self._expl_envs_perc=0.0 # [0, 1]
+ if "expl_envs_perc" in custom_args:
+ self._expl_envs_perc=custom_args["expl_envs_perc"]
+ self._n_expl_envs = int(self._num_envs*self._expl_envs_perc) # n of random envs on which noisy actions will be applied
+ self._noise_freq_vec = 100 # substeps
+ self._noise_duration_vec = 5 # should be less than _noise_freq
+ # correct with env substepping
+ self._noise_freq_vec=self._noise_freq_vec//self._env_n_action_reps
+ self._noise_duration_vec=self._noise_duration_vec//self._env_n_action_reps
+
+ self._continuous_act_expl_noise_std=0.3 # wrt actions scale
+ self._discrete_act_expl_noise_std=1.2 # setting it a bit > 1 helps in ensuring discr. actions range is explored
+
+ # rnd
+ self._use_rnd=True
+ self._rnd_net=None
+ self._rnd_optimizer = None
+ self._rnd_lr = 1e-3
+ if "use_rnd" in custom_args and (not self._eval):
+ self._use_rnd=custom_args["use_rnd"]
+ self._rnd_weight=1.0
+ self._alpha_rnd=0.0
+ self._novelty_scaler=None
+ if self._use_rnd:
+ from adarl.utils.NoveltyScaler import NoveltyScaler
+
+ self._novelty_scaler=NoveltyScaler(th_device=self._torch_device,
+ bonus_weight=self._rnd_weight,
+ avg_alpha=self._alpha_rnd)
+
+ self._rnd_lwidth=512
+ self._rnd_hlayers=3
+ self._rnd_outdim=16
+ self._rnd_indim=self._obs_dim+self._actions_dim
+
+ # batch normalization
+ self._bnorm_bsize = 4096
+ self._bnorm_vecfreq_nom = 5 # wrt vec steps
+ # make sure _bnorm_vecfreq is a multiple of _collection_freq
+ self._bnorm_vecfreq = (self._bnorm_vecfreq_nom//self._collection_freq)*self._collection_freq
+ if self._bnorm_vecfreq == 0:
+ self._bnorm_vecfreq=self._collection_freq
+ self._reward_normalizer=None
+
+ self._total_timesteps = int(tot_tsteps)
+ # self._total_timesteps = self._total_timesteps//self._env_n_action_reps # correct with n of action reps
+ self._total_timesteps_vec = self._total_timesteps // self._num_envs
+ self._total_steps = self._total_timesteps_vec//self._collection_freq
+ self._total_timesteps_vec = self._total_steps*self._collection_freq # correct to be a multiple of self._total_steps
+ self._total_timesteps = self._total_timesteps_vec*self._num_envs # actual n transitions
+
+ # self._warmstart_timesteps = int(5e3)
+ warmstart_length_single_env=min(self._episode_timeout_lb, self._episode_timeout_ub,
+ self._task_rand_timeout_lb, self._task_rand_timeout_ub)
+ self._warmstart_timesteps=warmstart_length_single_env*self._num_envs
+ if self._warmstart_timesteps < self._batch_size: # ensure we collect sufficient experience before
+ # starting training
+ self._warmstart_timesteps=4*self._batch_size
+ self._warmstart_vectimesteps = self._warmstart_timesteps//self._num_envs
+ # ensuring multiple of collection_freq
+ self._warmstart_timesteps = self._num_envs*self._warmstart_vectimesteps # actual
+
+ # period nets resets (for tackling the primacy bias)
+ self._use_period_resets=False
+ if "use_period_resets" in custom_args:
+ self._use_period_resets=custom_args["use_period_resets"]
+ self._adaptive_resets=True # trigger reset based on overfit metric
+ self._just_one_reset=False
+ self._periodic_resets_freq=int(4e6)
+ self._periodic_resets_start=int(1.5e6)
+ self._periodic_resets_end=int(0.8*self._total_timesteps)
+
+ self._periodic_resets_vecfreq=self._periodic_resets_freq//self._num_envs
+ self._periodic_resets_vecfreq = (self._periodic_resets_vecfreq//self._collection_freq)*self._collection_freq
+ self._reset_vecstep_start=self._periodic_resets_start//self._num_envs
+ self._reset_vecstep_end=self._periodic_resets_end//self._num_envs
+
+ if self._just_one_reset:
+ # we set the end as the fist reset + a fraction of the reset frequency (this way only one reset will happen)
+ self._reset_vecstep_end=int(self._reset_vecstep_start+0.8*self._periodic_resets_vecfreq)
+
+ self._periodic_resets_on=False
+
+ # debug
+ self._m_checkpoint_freq_nom = 1e6 # n totoal timesteps after which a checkpoint model is dumped
+ self._m_checkpoint_freq= self._m_checkpoint_freq_nom//self._num_envs
+
+ # expl envs
+ if self._n_expl_envs>0 and ((self._num_envs-self._n_expl_envs)>0): # log data only from envs which are not altered (e.g. by exploration noise)
+ # computing expl env selector
+ self._expl_env_selector = torch.randperm(self._num_envs, device="cpu")[:self._n_expl_envs]
+ self._expl_env_selector_bool[self._expl_env_selector]=True
+
+ # demo envs
+ if self._demo_env_selector_bool is None:
+ self._db_env_selector_bool[:]=~self._expl_env_selector_bool
+ else: # we log db data separately for env which are neither for demo nor for random exploration
+ self._demo_env_selector_bool=self._demo_env_selector_bool.cpu()
+ self._demo_env_selector=self._demo_env_selector.cpu()
+ self._db_env_selector_bool[:]=torch.logical_and(~self._expl_env_selector_bool, ~self._demo_env_selector_bool)
+
+ self._n_expl_envs = self._expl_env_selector_bool.count_nonzero()
+ self._num_db_envs = self._db_env_selector_bool.count_nonzero()
+
+ if not self._num_db_envs>0:
+ Journal.log(self.__class__.__name__,
+ "_init_params",
+ "No indipendent db env can be computed (check your demo and expl settings)! Will use all envs.",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ self._num_db_envs=self._num_envs
+ self._db_env_selector_bool[:]=True
+ self._db_env_selector=torch.nonzero(self._db_env_selector_bool).flatten()
+
+ self._transition_noise_freq=float(self._noise_duration_vec)/float(self._noise_freq_vec)
+ self._env_noise_freq=float(self._n_expl_envs)/float(self._num_envs)
+ self._noise_buff_freq=self._transition_noise_freq*self._env_noise_freq
+
+ self._db_vecstep_frequency = 32 # log db data every n (vectorized) SUB timesteps
+ self._db_vecstep_frequency=round(self._db_vecstep_frequency/self._env_n_action_reps) # correcting with actions reps
+ # correct db vecstep frequency to ensure it's a multiple of self._collection_freq
+ self._db_vecstep_frequency=(self._db_vecstep_frequency//self._collection_freq)*self._collection_freq
+ if self._db_vecstep_frequency == 0:
+ self._db_vecstep_frequency=self._collection_freq
+
+ self._env_db_checkpoints_vecfreq=150*self._db_vecstep_frequency # detailed db data from envs
+
+ self._validate=True
+ self._validation_collection_vecfreq=50 # add vec transitions to val buffer with some vec freq
+ self._validation_ratio=1.0/self._validation_collection_vecfreq # [0, 1], 0.1 10% size of training buffer
+ self._validation_buffer_size_vec = int(self._replay_buffer_size*self._validation_ratio)//self._num_envs
+ self._validation_buffer_size = self._validation_buffer_size_vec*self._num_envs
+ self._validation_batch_size = int(self._batch_size*self._validation_ratio)
+ self._validation_db_vecstep_freq=self._db_vecstep_frequency
+ if self._eval: # no need for validation transitions during evaluation
+ self._validate=False
+ self._overfit_idx=0.0
+ self._overfit_idx_alpha=0.03 # exponential MA
+ self._overfit_idx_thresh=2.0
+
+ self._n_policy_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq #TD3 delayed update
+ self._n_qf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq # qf updated at each vec timesteps
+ self._n_tqf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq//self._trgt_net_freq
+
+ self._exp_to_policy_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_policy_updates_to_be_done)
+ self._exp_to_qf_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_qf_updates_to_be_done)
+ self._exp_to_qft_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_tqf_updates_to_be_done)
+
+ self._db_data_size = round(self._total_timesteps_vec/self._db_vecstep_frequency)+self._db_vecstep_frequency
+
+ # write them to hyperparam dictionary for debugging
+ self._hyperparameters["n_envs"] = self._num_envs
+ self._hyperparameters["obs_dim"] = self._obs_dim
+ self._hyperparameters["actions_dim"] = self._actions_dim
+
+ self._hyperparameters["seed"] = self._seed
+ self._hyperparameters["using_gpu"] = self._use_gpu
+ self._hyperparameters["total_timesteps_vec"] = self._total_timesteps_vec
+
+ self._hyperparameters["collection_freq"]=self._collection_freq
+ self._hyperparameters["update_freq"]=self._update_freq
+ self._hyperparameters["total_steps"]=self._total_steps
+
+ self._hyperparameters["utd_ratio"] = self._utd_ratio
+
+ self._hyperparameters["n_policy_updates_when_done"] = self._n_policy_updates_to_be_done
+ self._hyperparameters["n_qf_updates_when_done"] = self._n_qf_updates_to_be_done
+ self._hyperparameters["n_tqf_updates_when_done"] = self._n_tqf_updates_to_be_done
+ self._hyperparameters["experience_to_policy_grad_steps_ratio"] = self._exp_to_policy_grad_ratio
+ self._hyperparameters["experience_to_quality_fun_grad_steps_ratio"] = self._exp_to_qf_grad_ratio
+ self._hyperparameters["experience_to_trgt_quality_fun_grad_steps_ratio"] = self._exp_to_qft_grad_ratio
+
+ self._hyperparameters["episodes timeout lb"] = self._episode_timeout_lb
+ self._hyperparameters["episodes timeout ub"] = self._episode_timeout_ub
+ self._hyperparameters["task rand timeout lb"] = self._task_rand_timeout_lb
+ self._hyperparameters["task rand timeout ub"] = self._task_rand_timeout_ub
+
+ self._hyperparameters["warmstart_timesteps"] = self._warmstart_timesteps
+ self._hyperparameters["warmstart_vectimesteps"] = self._warmstart_vectimesteps
+ self._hyperparameters["replay_buffer_size"] = self._replay_buffer_size
+ self._hyperparameters["batch_size"] = self._batch_size
+ self._hyperparameters["total_timesteps"] = self._total_timesteps
+ self._hyperparameters["lr_policy"] = self._lr_policy
+ self._hyperparameters["lr_q"] = self._lr_q
+ self._hyperparameters["discount_factor"] = self._discount_factor
+ self._hyperparameters["smoothing_coeff"] = self._smoothing_coeff
+ self._hyperparameters["policy_freq"] = self._policy_freq
+ self._hyperparameters["trgt_net_freq"] = self._trgt_net_freq
+ self._hyperparameters["autotune"] = self._autotune
+ self._hyperparameters["target_entropy"] = self._target_entropy
+ self._hyperparameters["target_entropy_disc"] = self._target_entropy_disc
+ self._hyperparameters["target_entropy_cont"] = self._target_entropy_cont
+ self._hyperparameters["disc_entropy_idxs"] = self._disc_idxs.tolist()
+ self._hyperparameters["cont_entropy_idxs"] = self._cont_idxs.tolist()
+ self._hyperparameters["log_alpha_disc"] = None if self._log_alpha_disc is None else self._log_alpha_disc
+ self._hyperparameters["log_alpha_cont"] = None if self._log_alpha_cont is None else self._log_alpha_cont
+ self._hyperparameters["alpha"] = self._alpha
+ self._hyperparameters["alpha_disc"] = self._alpha_disc
+ self._hyperparameters["alpha_cont"] = self._alpha_cont
+ self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq
+ self._hyperparameters["db_vecstep_frequency"] = self._db_vecstep_frequency
+ self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq
+
+ self._hyperparameters["use_period_resets"]= self._use_period_resets
+ self._hyperparameters["just_one_reset"]= self._just_one_reset
+ self._hyperparameters["period_resets_vecfreq"]= self._periodic_resets_vecfreq
+ self._hyperparameters["period_resets_vecstart"]= self._reset_vecstep_start
+ self._hyperparameters["period_resets_vecend"]= self._reset_vecstep_end
+ self._hyperparameters["period_resets_freq"]= self._periodic_resets_freq
+ self._hyperparameters["period_resets_start"]= self._periodic_resets_start
+ self._hyperparameters["period_resets_end"]= self._periodic_resets_end
+
+ self._hyperparameters["use_rnd"] = self._use_rnd
+ self._hyperparameters["rnd_lwidth"] = self._rnd_lwidth
+ self._hyperparameters["rnd_hlayers"] = self._rnd_hlayers
+ self._hyperparameters["rnd_outdim"] = self._rnd_outdim
+ self._hyperparameters["rnd_indim"] = self._rnd_indim
+
+ self._hyperparameters["n_db_envs"] = self._num_db_envs
+ self._hyperparameters["n_expl_envs"] = self._n_expl_envs
+ self._hyperparameters["noise_freq"] = self._noise_freq_vec
+ self._hyperparameters["noise_duration_vec"] = self._noise_duration_vec
+ self._hyperparameters["noise_buff_freq"] = self._noise_buff_freq
+ self._hyperparameters["continuous_act_expl_noise_std"] = self._continuous_act_expl_noise_std
+ self._hyperparameters["discrete_act_expl_noise_std"] = self._discrete_act_expl_noise_std
+
+ self._hyperparameters["n_demo_envs"] = self._env.n_demo_envs()
+
+ self._hyperparameters["bnorm_bsize"] = self._bnorm_bsize
+ self._hyperparameters["bnorm_vecfreq"] = self._bnorm_vecfreq
+
+ self._hyperparameters["validate"] = self._validate
+ self._hyperparameters["validation_ratio"] = self._validation_ratio
+ self._hyperparameters["validation_buffer_size_vec"] = self._validation_buffer_size_vec
+ self._hyperparameters["validation_buffer_size"] = self._validation_buffer_size
+ self._hyperparameters["validation_batch_size"] = self._validation_batch_size
+ self._hyperparameters["validation_collection_vecfreq"] = self._validation_collection_vecfreq
+
+ # small debug log
+ info = f"\nUsing \n" + \
+ f"total (vectorized) timesteps to be simulated {self._total_timesteps_vec}\n" + \
+ f"total timesteps to be simulated {self._total_timesteps}\n" + \
+ f"warmstart timesteps {self._warmstart_timesteps}\n" + \
+ f"training replay buffer size {self._replay_buffer_size}\n" + \
+ f"training replay buffer vec size {self._replay_buffer_size_vec}\n" + \
+ f"training batch size {self._batch_size}\n" + \
+ f"validation enabled {self._validate}\n" + \
+ f"validation buffer size {self._validation_buffer_size}\n" + \
+ f"validation buffer vec size {self._validation_buffer_size_vec}\n" + \
+ f"validation collection freq {self._validation_collection_vecfreq}\n" + \
+ f"validation update freq {self._validation_db_vecstep_freq}\n" + \
+ f"validation batch size {self._validation_batch_size}\n" + \
+ f"policy update freq {self._policy_freq}\n" + \
+ f"target networks freq {self._trgt_net_freq}\n" + \
+ f"episode timeout max steps {self._episode_timeout_ub}\n" + \
+ f"episode timeout min steps {self._episode_timeout_lb}\n" + \
+ f"task rand. max n steps {self._task_rand_timeout_ub}\n" + \
+ f"task rand. min n steps {self._task_rand_timeout_lb}\n" + \
+ f"number of action reps {self._env_n_action_reps}\n" + \
+ f"total policy updates to be performed: {self._n_policy_updates_to_be_done}\n" + \
+ f"total q fun updates to be performed: {self._n_qf_updates_to_be_done}\n" + \
+ f"total trgt q fun updates to be performed: {self._n_tqf_updates_to_be_done}\n" + \
+ f"experience to policy grad ratio: {self._exp_to_policy_grad_ratio}\n" + \
+ f"experience to q fun grad ratio: {self._exp_to_qf_grad_ratio}\n" + \
+ f"experience to trgt q fun grad ratio: {self._exp_to_qft_grad_ratio}\n" + \
+ f"amount of noisy transitions in replay buffer: {self._noise_buff_freq*100}% \n"
+ db_env_idxs=", ".join(map(str, self._db_env_selector.tolist()))
+ n_db_envs_str=f"db envs {self._num_db_envs}/{self._num_envs} \n"
+ info=info + n_db_envs_str + "Debug env. indexes are [" + db_env_idxs+"]\n"
+ if self._env.demo_env_idxs() is not None:
+ demo_idxs_str=", ".join(map(str, self._env.demo_env_idxs().tolist()))
+ n_demo_envs_str=f"demo envs {self._env.n_demo_envs()}/{self._num_envs} \n"
+ info=info + n_demo_envs_str + "Demo env. indexes are [" + demo_idxs_str+"]\n"
+ if self._expl_env_selector is not None:
+ random_expl_idxs=", ".join(map(str, self._expl_env_selector.tolist()))
+ n_expl_envs_str=f"expl envs {self._n_expl_envs}/{self._num_envs} \n"
+ info=info + n_expl_envs_str + "Random exploration env. indexes are [" + random_expl_idxs+"]\n"
+
+ Journal.log(self.__class__.__name__,
+ "_init_params",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+
+ # init counters
+ self._step_counter = 0
+ self._vec_transition_counter = 0
+ self._update_counter = 0
+ self._log_it_counter = 0
+
+ def _init_dbdata(self):
+
+ # initalize some debug data
+ self._collection_dt = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+ self._collection_t = -1.0
+
+ self._env_step_fps = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+ self._env_step_rt_factor = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+
+ self._batch_norm_update_dt = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+
+ self._policy_update_t_start = -1.0
+ self._policy_update_t = -1.0
+ self._policy_update_dt = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+ self._policy_update_fps = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+
+ self._validation_t = -1.0
+ self._validation_dt = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+
+ self._n_of_played_episodes = torch.full((self._db_data_size, 1),
+ dtype=torch.int32, fill_value=0, device="cpu")
+ self._n_timesteps_done = torch.full((self._db_data_size, 1),
+ dtype=torch.int32, fill_value=0, device="cpu")
+ self._n_policy_updates = torch.full((self._db_data_size, 1),
+ dtype=torch.int32, fill_value=0, device="cpu")
+ self._n_qfun_updates = torch.full((self._db_data_size, 1),
+ dtype=torch.int32, fill_value=0, device="cpu")
+ self._n_tqfun_updates = torch.full((self._db_data_size, 1),
+ dtype=torch.int32, fill_value=0, device="cpu")
+ self._elapsed_min = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+
+ self._ep_tsteps_env_distribution = torch.full((self._db_data_size, self._num_db_envs, 1),
+ dtype=torch.int32, fill_value=-1, device="cpu")
+
+ self._reward_names = self._episodic_reward_metrics.reward_names()
+ self._reward_names_str = "[" + ', '.join(self._reward_names) + "]"
+ self._n_rewards = self._episodic_reward_metrics.n_rewards()
+
+ # db environments
+ self._tot_rew_max = torch.full((self._db_data_size, self._num_db_envs, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._tot_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._tot_rew_min = torch.full((self._db_data_size, self._num_db_envs, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._tot_rew_max_over_envs = torch.full((self._db_data_size, 1, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._tot_rew_min_over_envs = torch.full((self._db_data_size, 1, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._tot_rew_avrg_over_envs = torch.full((self._db_data_size, 1, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._tot_rew_std_over_envs = torch.full((self._db_data_size, 1, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._sub_rew_max = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_min = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_max_over_envs = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_min_over_envs = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_avrg_over_envs = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_std_over_envs = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ # custom data from env # (log data just from db envs for simplicity)
+ self._custom_env_data = {}
+ db_data_names = list(self._env.custom_db_data.keys())
+ for dbdatan in db_data_names: # loop thorugh custom data
+
+ self._custom_env_data[dbdatan] = {}
+
+ max = self._env.custom_db_data[dbdatan].get_max(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1)
+ avrg = self._env.custom_db_data[dbdatan].get_avrg(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1)
+ min = self._env.custom_db_data[dbdatan].get_min(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1)
+ max_over_envs = self._env.custom_db_data[dbdatan].get_max_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+ min_over_envs = self._env.custom_db_data[dbdatan].get_min_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+ avrg_over_envs = self._env.custom_db_data[dbdatan].get_avrg_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+ std_over_envs = self._env.custom_db_data[dbdatan].get_std_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+
+ self._custom_env_data[dbdatan]["max"] =torch.full((self._db_data_size,
+ max.shape[0],
+ max.shape[1]),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._custom_env_data[dbdatan]["avrg"] =torch.full((self._db_data_size,
+ avrg.shape[0],
+ avrg.shape[1]),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._custom_env_data[dbdatan]["min"] =torch.full((self._db_data_size,
+ min.shape[0],
+ min.shape[1]),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._custom_env_data[dbdatan]["max_over_envs"] =torch.full((self._db_data_size,
+ max_over_envs.shape[0],
+ max_over_envs.shape[1]),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._custom_env_data[dbdatan]["min_over_envs"] =torch.full((self._db_data_size,
+ min_over_envs.shape[0],
+ min_over_envs.shape[1]),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._custom_env_data[dbdatan]["avrg_over_envs"] =torch.full((self._db_data_size,
+ avrg_over_envs.shape[0],
+ avrg_over_envs.shape[1]),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._custom_env_data[dbdatan]["std_over_envs"] =torch.full((self._db_data_size,
+ std_over_envs.shape[0],
+ std_over_envs.shape[1]),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ # exploration envs
+ if self._n_expl_envs > 0:
+ self._ep_tsteps_expl_env_distribution = torch.full((self._db_data_size, self._n_expl_envs, 1),
+ dtype=torch.int32, fill_value=-1, device="cpu")
+
+ # also log sub rewards metrics for exploration envs
+ self._sub_rew_max_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_avrg_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_min_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_max_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_min_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_avrg_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_std_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ # demo environments
+ self._demo_envs_active = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._demo_perf_metric = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ if self._env.demo_env_idxs() is not None:
+ n_demo_envs=self._env.demo_env_idxs().shape[0]
+
+ self._ep_tsteps_demo_env_distribution = torch.full((self._db_data_size, n_demo_envs, 1),
+ dtype=torch.int32, fill_value=-1, device="cpu")
+
+ # also log sub rewards metrics for exploration envs
+ self._sub_rew_max_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_avrg_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_min_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_max_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_min_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_avrg_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._sub_rew_std_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ # algorithm-specific db info
+ self._qf1_vals_mean = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf2_vals_mean = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._min_qft_vals_mean = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf1_vals_std = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf2_vals_std = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._min_qft_vals_std = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf1_vals_max = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf1_vals_min = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf2_vals_max = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf2_vals_min = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._qf1_loss = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf2_loss = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._actor_loss= torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alpha_loss = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alpha_loss_disc = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alpha_loss_cont = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ if self._validate: # add db data for validation losses
+ self._overfit_index = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf1_loss_validation = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._qf2_loss_validation = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._actor_loss_validation= torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alpha_loss_validation = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alpha_loss_disc_validation = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alpha_loss_cont_validation = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._alphas = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alphas_disc = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._alphas_cont = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._policy_entropy_mean=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_std=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_max=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_min=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_disc_mean=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_disc_std=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_disc_max=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_disc_min=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_cont_mean=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_cont_std=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_cont_max=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._policy_entropy_cont_min=torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._running_mean_obs=None
+ self._running_std_obs=None
+ if self._agent.obs_running_norm is not None and not self._eval:
+ # some db data for the agent
+ self._running_mean_obs = torch.full((self._db_data_size, self._env.obs_dim()),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+ self._running_std_obs = torch.full((self._db_data_size, self._env.obs_dim()),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+
+ # RND
+ self._rnd_loss=None
+ if self._use_rnd:
+ self._rnd_loss = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._expl_bonus_raw_avrg = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._expl_bonus_raw_std = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ # self._expl_bonus_raw_min = torch.full((self._db_data_size, 1),
+ # dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ # self._expl_bonus_raw_max = torch.full((self._db_data_size, 1),
+ # dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._expl_bonus_proc_avrg = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ self._expl_bonus_proc_std = torch.full((self._db_data_size, 1),
+ dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ # self._expl_bonus_proc_min = torch.full((self._db_data_size, 1),
+ # dtype=torch.float32, fill_value=torch.nan, device="cpu")
+ # self._expl_bonus_proc_max = torch.full((self._db_data_size, 1),
+ # dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+ self._n_rnd_updates = torch.full((self._db_data_size, 1),
+ dtype=torch.int32, fill_value=0, device="cpu")
+ self._running_mean_rnd_input = None
+ self._running_std_rnd_input = None
+ if self._rnd_net.obs_running_norm is not None:
+ self._running_mean_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+ self._running_std_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()),
+ dtype=torch.float32, fill_value=0.0, device="cpu")
+
+ def _init_agent_optimizers(self):
+ self._qf_optimizer = optim.Adam(list(self._agent.qf1.parameters()) + list(self._agent.qf2.parameters()),
+ lr=self._lr_q)
+ self._actor_optimizer = optim.Adam(list(self._agent.actor.parameters()),
+ lr=self._lr_policy)
+
+ def _init_alpha_autotuning(self):
+ self._log_alpha_disc = torch.full((1,), fill_value=math.log(self._alpha_disc), requires_grad=True, device=self._torch_device)
+ self._log_alpha_cont = torch.full((1,), fill_value=math.log(self._alpha_cont), requires_grad=True, device=self._torch_device)
+ self._alpha_disc = self._log_alpha_disc.exp().item()
+ self._alpha_cont = self._log_alpha_cont.exp().item()
+ self._alpha = 0.5*(self._alpha_disc + self._alpha_cont)
+ self._a_optimizer_disc = optim.Adam([self._log_alpha_disc], lr=self._lr_q)
+ self._a_optimizer_cont = optim.Adam([self._log_alpha_cont], lr=self._lr_q)
+
+ def _init_replay_buffers(self):
+
+ self._bpos = 0
+
+ self._obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._actions = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._actions_dim),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._rewards = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._next_obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._next_terminal = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1),
+ fill_value=False,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+
+ def _init_validation_buffers(self):
+
+ self._bpos_val = 0
+
+ self._obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._actions_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._actions_dim),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._rewards_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._next_obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim),
+ fill_value=torch.nan,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+ self._next_terminal_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1),
+ fill_value=False,
+ dtype=self._dtype,
+ device=self._torch_device,
+ requires_grad=False)
+
+ def _save_model(self,
+ is_checkpoint: bool = False):
+
+ path = self._model_path
+ if is_checkpoint: # use iteration as id
+ path = path + "_checkpoint" + str(self._log_it_counter)
+ info = f"Saving model to {path}"
+ Journal.log(self.__class__.__name__,
+ "_save_model",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+ agent_state_dict=self._agent.state_dict()
+ if not self._eval: # training
+ # we log the joints which were observed during training
+ observed_joints=self._env.get_observed_joints()
+ if observed_joints is not None:
+ agent_state_dict["observed_jnts"]=self._env.get_observed_joints()
+
+ torch.save(agent_state_dict, path) # saves whole agent state
+ # torch.save(self._agent.parameters(), path) # only save agent parameters
+ info = f"Done."
+ Journal.log(self.__class__.__name__,
+ "_save_model",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+
+ def _dump_env_checkpoints(self):
+
+ path = self._env_db_checkpoints_fname+str(self._log_it_counter)
+
+ if path is not None:
+ info = f"Saving env db checkpoint data to {path}"
+ Journal.log(self.__class__.__name__,
+ "_dump_env_checkpoints",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+
+ with h5py.File(path+".hdf5", 'w') as hf:
+
+ for key, value in self._hyperparameters.items():
+ if value is None:
+ value = "None"
+ hf.attrs[key] = value
+
+ # full training envs
+ sub_rew_full=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._db_env_selector)
+ tot_rew_full=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._db_env_selector)
+
+ if self._n_expl_envs > 0:
+ sub_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._expl_env_selector)
+ tot_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._expl_env_selector)
+ if self._env.n_demo_envs() > 0:
+ sub_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._demo_env_selector)
+ tot_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._demo_env_selector)
+
+ ep_vec_freq=self._episodic_reward_metrics.ep_vec_freq() # assuming all db data was collected with the same ep_vec_freq
+
+ hf.attrs['sub_reward_names'] = self._reward_names
+ hf.attrs['log_iteration'] = self._log_it_counter
+ hf.attrs['n_timesteps_done'] = self._n_timesteps_done[self._log_it_counter]
+ hf.attrs['n_policy_updates'] = self._n_policy_updates[self._log_it_counter]
+ hf.attrs['elapsed_min'] = self._elapsed_min[self._log_it_counter]
+ hf.attrs['ep_vec_freq'] = ep_vec_freq
+ hf.attrs['n_expl_envs'] = self._n_expl_envs
+ hf.attrs['n_demo_envs'] = self._env.n_demo_envs()
+
+ # first dump custom db data names
+ db_data_names = list(self._env.custom_db_data.keys())
+ for db_dname in db_data_names:
+ episodic_data_names = self._env.custom_db_data[db_dname].data_names()
+ var_name = db_dname
+ hf.attrs[var_name+"_data_names"] = episodic_data_names
+
+ for ep_idx in range(ep_vec_freq): # create separate datasets for each episode
+ ep_prefix=f'ep_{ep_idx}_'
+
+ # rewards
+ hf.create_dataset(ep_prefix+'sub_rew',
+ data=sub_rew_full[ep_idx, :, :, :])
+ hf.create_dataset(ep_prefix+'tot_rew',
+ data=tot_rew_full[ep_idx, :, :, :])
+ if self._n_expl_envs > 0:
+ hf.create_dataset(ep_prefix+'sub_rew_expl',
+ data=sub_rew_full_expl[ep_idx, :, :, :])
+ hf.create_dataset(ep_prefix+'tot_rew_expl',
+ data=tot_rew_full_expl[ep_idx, :, :, :])
+ hf.create_dataset('expl_env_selector', data=self._expl_env_selector.cpu().numpy())
+ if self._env.n_demo_envs() > 0:
+ hf.create_dataset(ep_prefix+'sub_rew_demo',
+ data=sub_rew_full_demo)
+ hf.create_dataset(ep_prefix+'tot_rew_demo',
+ data=tot_rew_full_demo[ep_idx, :, :, :])
+ hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().cpu().numpy())
+
+ # dump all custom env data
+ db_data_names = list(self._env.custom_db_data.keys())
+ for db_dname in db_data_names:
+ episodic_data=self._env.custom_db_data[db_dname]
+ var_name = db_dname
+ hf.create_dataset(ep_prefix+var_name,
+ data=episodic_data.get_full_episodic_data(env_selector=self._db_env_selector)[ep_idx, :, :, :])
+ if self._n_expl_envs > 0:
+ hf.create_dataset(ep_prefix+var_name+"_expl",
+ data=episodic_data.get_full_episodic_data(env_selector=self._expl_env_selector)[ep_idx, :, :, :])
+ if self._env.n_demo_envs() > 0:
+ hf.create_dataset(ep_prefix+var_name+"_demo",
+ data=episodic_data.get_full_episodic_data(env_selector=self._demo_env_selector)[ep_idx, :, :, :])
+
+ Journal.log(self.__class__.__name__,
+ "_dump_env_checkpoints",
+ "done.",
+ LogType.INFO,
+ throw_when_excep = True)
+
+ def done(self):
+
+ if not self._is_done:
+
+ if not self._eval:
+ self._save_model()
+
+ self._dump_dbinfo_to_file()
+
+ if self._full_env_db:
+ self._dump_env_checkpoints()
+
+ if self._shared_algo_data is not None:
+ self._shared_algo_data.write(dyn_info_name=["is_done"],
+ val=[1.0])
+ self._shared_algo_data.close() # close shared memory
+
+ self._env.close()
+
+ self._is_done = True
+
+ def _dump_dbinfo_to_file(self):
+
+ info = f"Dumping debug info at {self._dbinfo_drop_fname}"
+ Journal.log(self.__class__.__name__,
+ "_dump_dbinfo_to_file",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+
+ with h5py.File(self._dbinfo_drop_fname+".hdf5", 'w') as hf:
+ n_valid = int(max(0, min(self._log_it_counter, self._db_data_size)))
+
+ def _slice_valid(arr):
+ if isinstance(arr, torch.Tensor):
+ return arr[:n_valid]
+ if isinstance(arr, np.ndarray):
+ return arr[:n_valid]
+ return arr
+
+ def _ds(name, arr):
+ data = _slice_valid(arr)
+ hf.create_dataset(name, data=data.numpy() if isinstance(data, torch.Tensor) else data)
+
+ # hf.create_dataset('numpy_data', data=numpy_data)
+ # Write dictionaries to HDF5 as attributes
+ for key, value in self._hyperparameters.items():
+ if value is None:
+ value = "None"
+ hf.attrs[key] = value
+
+ # rewards
+ hf.create_dataset('sub_reward_names', data=self._reward_names,
+ dtype='S40')
+ _ds('sub_rew_max', self._sub_rew_max)
+ _ds('sub_rew_avrg', self._sub_rew_avrg)
+ _ds('sub_rew_min', self._sub_rew_min)
+ _ds('sub_rew_max_over_envs', self._sub_rew_max_over_envs)
+ _ds('sub_rew_min_over_envs', self._sub_rew_min_over_envs)
+ _ds('sub_rew_avrg_over_envs', self._sub_rew_avrg_over_envs)
+ _ds('sub_rew_std_over_envs', self._sub_rew_std_over_envs)
+
+ _ds('tot_rew_max', self._tot_rew_max)
+ _ds('tot_rew_avrg', self._tot_rew_avrg)
+ _ds('tot_rew_min', self._tot_rew_min)
+ _ds('tot_rew_max_over_envs', self._tot_rew_max_over_envs)
+ _ds('tot_rew_min_over_envs', self._tot_rew_min_over_envs)
+ _ds('tot_rew_avrg_over_envs', self._tot_rew_avrg_over_envs)
+ _ds('tot_rew_std_over_envs', self._tot_rew_std_over_envs)
+
+ _ds('ep_tsteps_env_distr', self._ep_tsteps_env_distribution)
+
+ if self._n_expl_envs > 0:
+ # expl envs
+ _ds('sub_rew_max_expl', self._sub_rew_max_expl)
+ _ds('sub_rew_avrg_expl', self._sub_rew_avrg_expl)
+ _ds('sub_rew_min_expl', self._sub_rew_min_expl)
+ _ds('sub_rew_max_over_envs_expl', self._sub_rew_max_over_envs_expl)
+ _ds('sub_rew_min_over_envs_expl', self._sub_rew_min_over_envs_expl)
+ _ds('sub_rew_avrg_over_envs_expl', self._sub_rew_avrg_over_envs_expl)
+ _ds('sub_rew_std_over_envs_expl', self._sub_rew_std_over_envs_expl)
+
+ _ds('ep_timesteps_expl_env_distr', self._ep_tsteps_expl_env_distribution)
+
+ hf.create_dataset('expl_env_selector', data=self._expl_env_selector.numpy())
+
+ _ds('demo_envs_active', self._demo_envs_active)
+ _ds('demo_perf_metric', self._demo_perf_metric)
+
+ if self._env.n_demo_envs() > 0:
+ # demo envs
+ _ds('sub_rew_max_demo', self._sub_rew_max_demo)
+ _ds('sub_rew_avrg_demo', self._sub_rew_avrg_demo)
+ _ds('sub_rew_min_demo', self._sub_rew_min_demo)
+ _ds('sub_rew_max_over_envs_demo', self._sub_rew_max_over_envs_demo)
+ _ds('sub_rew_min_over_envs_demo', self._sub_rew_min_over_envs_demo)
+ _ds('sub_rew_avrg_over_envs_demo', self._sub_rew_avrg_over_envs_demo)
+ _ds('sub_rew_std_over_envs_demo', self._sub_rew_std_over_envs_demo)
+
+ _ds('ep_timesteps_demo_env_distr', self._ep_tsteps_demo_env_distribution)
+
+ hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().numpy())
+
+ # profiling data
+ _ds('env_step_fps', self._env_step_fps)
+ _ds('env_step_rt_factor', self._env_step_rt_factor)
+ _ds('collection_dt', self._collection_dt)
+ _ds('batch_norm_update_dt', self._batch_norm_update_dt)
+ _ds('policy_update_dt', self._policy_update_dt)
+ _ds('policy_update_fps', self._policy_update_fps)
+ _ds('validation_dt', self._validation_dt)
+
+ _ds('n_of_played_episodes', self._n_of_played_episodes)
+ _ds('n_timesteps_done', self._n_timesteps_done)
+ _ds('n_policy_updates', self._n_policy_updates)
+ _ds('n_qfun_updates', self._n_qfun_updates)
+ _ds('n_tqfun_updates', self._n_tqfun_updates)
+
+ _ds('elapsed_min', self._elapsed_min)
+
+ # algo data
+ _ds('qf1_vals_mean', self._qf1_vals_mean)
+ _ds('qf2_vals_mean', self._qf2_vals_mean)
+ _ds('qf1_vals_std', self._qf1_vals_std)
+ _ds('qf2_vals_std', self._qf2_vals_std)
+ _ds('qf1_vals_max', self._qf1_vals_max)
+ _ds('qf1_vals_min', self._qf1_vals_min)
+ _ds('qf2_vals_max', self._qf2_vals_max)
+ _ds('qf2_vals_min', self._qf1_vals_min)
+
+ _ds('min_qft_vals_mean', self._min_qft_vals_mean)
+ _ds('min_qft_vals_std', self._min_qft_vals_std)
+
+ _ds('qf1_loss', self._qf1_loss)
+ _ds('qf2_loss', self._qf2_loss)
+ _ds('actor_loss', self._actor_loss)
+ _ds('alpha_loss', self._alpha_loss)
+ _ds('alpha_loss_disc', self._alpha_loss_disc)
+ _ds('alpha_loss_cont', self._alpha_loss_cont)
+ if self._validate:
+ _ds('qf1_loss_validation', self._qf1_loss_validation)
+ _ds('qf2_loss_validation', self._qf2_loss_validation)
+ _ds('actor_loss_validation', self._actor_loss_validation)
+ _ds('alpha_loss_validation', self._alpha_loss_validation)
+ _ds('alpha_loss_disc_validation', self._alpha_loss_disc_validation)
+ _ds('alpha_loss_cont_validation', self._alpha_loss_cont_validation)
+ _ds('overfit_index', self._overfit_index)
+
+ _ds('alphas', self._alphas)
+ _ds('alphas_disc', self._alphas_disc)
+ _ds('alphas_cont', self._alphas_cont)
+
+ _ds('policy_entropy_mean', self._policy_entropy_mean)
+ _ds('policy_entropy_std', self._policy_entropy_std)
+ _ds('policy_entropy_max', self._policy_entropy_max)
+ _ds('policy_entropy_min', self._policy_entropy_min)
+ _ds('policy_entropy_disc_mean', self._policy_entropy_disc_mean)
+ _ds('policy_entropy_disc_std', self._policy_entropy_disc_std)
+ _ds('policy_entropy_disc_max', self._policy_entropy_disc_max)
+ _ds('policy_entropy_disc_min', self._policy_entropy_disc_min)
+ _ds('policy_entropy_cont_mean', self._policy_entropy_cont_mean)
+ _ds('policy_entropy_cont_std', self._policy_entropy_cont_std)
+ _ds('policy_entropy_cont_max', self._policy_entropy_cont_max)
+ _ds('policy_entropy_cont_min', self._policy_entropy_cont_min)
+ hf.create_dataset('target_entropy', data=self._target_entropy)
+ hf.create_dataset('target_entropy_disc', data=self._target_entropy_disc)
+ hf.create_dataset('target_entropy_cont', data=self._target_entropy_cont)
+
+ if self._use_rnd:
+ _ds('n_rnd_updates', self._n_rnd_updates)
+ _ds('expl_bonus_raw_avrg', self._expl_bonus_raw_avrg)
+ _ds('expl_bonus_raw_std', self._expl_bonus_raw_std)
+ _ds('expl_bonus_proc_avrg', self._expl_bonus_proc_avrg)
+ _ds('expl_bonus_proc_std', self._expl_bonus_proc_std)
+
+ if self._rnd_net.obs_running_norm is not None:
+ if self._running_mean_rnd_input is not None:
+ _ds('running_mean_rnd_input', self._running_mean_rnd_input)
+ if self._running_std_rnd_input is not None:
+ _ds('running_std_rnd_input', self._running_std_rnd_input)
+
+ # dump all custom env data
+ db_data_names = list(self._env.custom_db_data.keys())
+ for db_dname in db_data_names:
+ data=self._custom_env_data[db_dname]
+ subnames = list(data.keys())
+ for subname in subnames:
+ var_name = db_dname + "_" + subname
+ _ds(var_name, data[subname])
+
+ # other data
+ if self._agent.obs_running_norm is not None:
+ if self._running_mean_obs is not None:
+ _ds('running_mean_obs', self._running_mean_obs)
+ if self._running_std_obs is not None:
+ _ds('running_std_obs', self._running_std_obs)
+
+ info = f"done."
+ Journal.log(self.__class__.__name__,
+ "_dump_dbinfo_to_file",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+
+ def _load_model(self,
+ model_path: str):
+
+ info = f"Loading model at {model_path}"
+
+ Journal.log(self.__class__.__name__,
+ "_load_model",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+ model_dict=torch.load(model_path,
+ map_location=self._torch_device,
+ weights_only=False)
+
+ observed_joints=self._env.get_observed_joints()
+ if not ("observed_jnts" in model_dict):
+ Journal.log(self.__class__.__name__,
+ "_load_model",
+ "No observed joints key found in loaded model dictionary! Let's hope joints are ordered in the same way.",
+ LogType.WARN)
+ else:
+ required_joints=model_dict["observed_jnts"]
+ self._check_observed_joints(observed_joints,required_joints)
+
+ self._agent.load_state_dict(model_dict)
+
+ if self._eval:
+ self._switch_training_mode(False)
+
+ def _check_observed_joints(self,
+ observed_joints,
+ required_joints):
+
+ observed=set(observed_joints)
+ required=set(required_joints)
+
+ all_required_joints_avail = required.issubset(observed)
+ if not all_required_joints_avail:
+ missing=[item for item in required if item not in observed]
+ missing_str=', '.join(missing)
+ Journal.log(self.__class__.__name__,
+ "_check_observed_joints",
+ f"not all required joints are available. Missing {missing_str}",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ exceeding=observed-required
+ if not len(exceeding)==0:
+ # do not support having more joints than the required
+ exc_jnts=" ".join(list(exceeding))
+ Journal.log(self.__class__.__name__,
+ "_check_observed_joints",
+ f"more than the required joints found in the observed joint: {exc_jnts}",
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ # here we are sure that required and observed sets match
+ self._to_agent_jnt_remap=None
+ if not required_joints==observed_joints:
+ Journal.log(self.__class__.__name__,
+ "_check_observed_joints",
+ f"required jnt obs from agent have different ordering from observed ones. Will compute a remapping.",
+ LogType.WARN,
+ throw_when_excep = True)
+ self._to_agent_jnt_remap = [observed_joints.index(element) for element in required_joints]
+
+ self._env.set_jnts_remapping(remapping= self._to_agent_jnt_remap)
+
+ def drop_dir(self):
+ return self._drop_dir
+
+ def _init_drop_dir(self,
+ drop_dir_name: str = None):
+
+ # main drop directory
+ if drop_dir_name is None:
+ # drop to current directory
+ self._drop_dir = "./" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id
+ else:
+ self._drop_dir = drop_dir_name + "/" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id
+ os.makedirs(self._drop_dir)
+
+ self._env_db_checkpoints_dropdir=None
+ self._env_db_checkpoints_fname=None
+ if self._full_env_db>0:
+ self._env_db_checkpoints_dropdir=self._drop_dir+"/env_db_checkpoints"
+ self._env_db_checkpoints_fname = self._env_db_checkpoints_dropdir + \
+ "/" + self._unique_id + "_env_db_checkpoint"
+ os.makedirs(self._env_db_checkpoints_dropdir)
+ # model
+ if not self._eval or (self._model_path is None):
+ self._model_path = self._drop_dir + "/" + self._unique_id + "_model"
+ else: # we copy the model under evaluation to the drop dir
+ shutil.copy(self._model_path, self._drop_dir)
+
+ # debug info
+ self._dbinfo_drop_fname = self._drop_dir + "/" + self._unique_id + "db_info" # extension added later
+
+ # other auxiliary db files
+ aux_drop_dir = self._drop_dir + "/other"
+ os.makedirs(aux_drop_dir)
+ filepaths = self._env.get_file_paths() # envs implementation
+ filepaths.append(self._this_basepath) # algorithm implementation
+ filepaths.append(self._this_child_path)
+ filepaths.append(self._agent.get_impl_path()) # agent implementation
+ for file in filepaths:
+ shutil.copy(file, self._drop_dir)
+ aux_dirs = self._env.get_aux_dir()
+ for aux_dir in aux_dirs:
+ shutil.copytree(aux_dir, aux_drop_dir, dirs_exist_ok=True)
+
+ write_bundle_manifest(
+ bundle_dir=self._drop_dir,
+ checkpoint_file=os.path.basename(self._model_path),
+ )
+
+ def _get_performance_metric(self):
+ # to be overridden
+ return 0.0
+
+ def _post_step(self):
+
+ self._collection_dt[self._log_it_counter] += \
+ (self._collection_t-self._start_time)
+ self._batch_norm_update_dt[self._log_it_counter] += \
+ (self._policy_update_t_start-self._collection_t)
+ self._policy_update_dt[self._log_it_counter] += \
+ (self._policy_update_t - self._policy_update_t_start)
+ if self._validate:
+ self._validation_dt[self._log_it_counter] += \
+ (self._validation_t - self._policy_update_t)
+
+ self._step_counter+=1 # counts algo steps
+
+ self._demo_envs_active[self._log_it_counter]=self._env.demo_active()
+ self._demo_perf_metric[self._log_it_counter]=self._get_performance_metric()
+ if self._env.n_demo_envs() > 0 and (self._demo_stop_thresh is not None):
+ # check if deactivation condition applies
+ self._env.switch_demo(active=self._demo_perf_metric[self._log_it_counter] 0:
+ self._ep_tsteps_expl_env_distribution[self._log_it_counter, :]=\
+ self._episodic_reward_metrics.step_counters(env_selector=self._expl_env_selector)*self._env_n_action_reps
+
+ self._sub_rew_max_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._expl_env_selector)
+ self._sub_rew_avrg_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._expl_env_selector)
+ self._sub_rew_min_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._expl_env_selector)
+ self._sub_rew_max_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._expl_env_selector)
+ self._sub_rew_min_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._expl_env_selector)
+ self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._expl_env_selector)
+ self._sub_rew_std_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._expl_env_selector)
+
+ # demo envs
+ if self._env.n_demo_envs() > 0 and self._env.demo_active():
+ # only log if demo envs are active (db data will remaing to nan if that case)
+ self._ep_tsteps_demo_env_distribution[self._log_it_counter, :]=\
+ self._episodic_reward_metrics.step_counters(env_selector=self._demo_env_selector)*self._env_n_action_reps
+
+ self._sub_rew_max_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._demo_env_selector)
+ self._sub_rew_avrg_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._demo_env_selector)
+ self._sub_rew_min_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._demo_env_selector)
+ self._sub_rew_max_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._demo_env_selector)
+ self._sub_rew_min_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._demo_env_selector)
+ self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._demo_env_selector)
+ self._sub_rew_std_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._demo_env_selector)
+
+ # other data
+ if self._agent.obs_running_norm is not None:
+ if self._running_mean_obs is not None:
+ self._running_mean_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_mean()
+ if self._running_std_obs is not None:
+ self._running_std_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_std()
+
+ if self._use_rnd:
+ if self._running_mean_rnd_input is not None:
+ self._running_mean_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_mean()
+ if self._running_std_rnd_input is not None:
+ self._running_std_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_std()
+
+ # write some episodic db info on shared mem
+ sub_returns=self._sub_returns.get_torch_mirror(gpu=False)
+ sub_returns[:, :]=self._episodic_reward_metrics.get_sub_rew_avrg()
+ tot_returns=self._tot_returns.get_torch_mirror(gpu=False)
+ tot_returns[:, :]=self._episodic_reward_metrics.get_tot_rew_avrg()
+ self._sub_returns.synch_all(read=False)
+ self._tot_returns.synch_all(read=False)
+
+ self._log_info()
+
+ self._log_it_counter+=1
+
+ if self._dump_checkpoints and \
+ (self._vec_transition_counter % self._m_checkpoint_freq == 0):
+ self._save_model(is_checkpoint=True)
+
+ if self._full_env_db and \
+ (self._vec_transition_counter % self._env_db_checkpoints_vecfreq == 0):
+ self._dump_env_checkpoints()
+
+ if self._vec_transition_counter == self._total_timesteps_vec:
+ self.done()
+
+ def _should_have_called_setup(self):
+
+ exception = f"setup() was not called!"
+
+ Journal.log(self.__class__.__name__,
+ "_should_have_called_setup",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ def _log_info(self):
+
+ if self._debug or self._verbose:
+ elapsed_h = self._elapsed_min[self._log_it_counter].item()/60.0
+ est_remaining_time_h = elapsed_h * 1/(self._vec_transition_counter) * (self._total_timesteps_vec-self._vec_transition_counter)
+ is_done=self._vec_transition_counter==self._total_timesteps_vec
+
+ actual_tsteps_with_updates=-1
+ experience_to_policy_grad_ratio=-1
+ experience_to_qfun_grad_ratio=-1
+ experience_to_tqfun_grad_ratio=-1
+ if not self._eval:
+ actual_tsteps_with_updates=(self._n_timesteps_done[self._log_it_counter].item()-self._warmstart_timesteps)
+ epsi=1e-6 # to avoid div by 0
+ experience_to_policy_grad_ratio=actual_tsteps_with_updates/(self._n_policy_updates[self._log_it_counter].item()-epsi)
+ experience_to_qfun_grad_ratio=actual_tsteps_with_updates/(self._n_qfun_updates[self._log_it_counter].item()-epsi)
+ experience_to_tqfun_grad_ratio=actual_tsteps_with_updates/(self._n_tqfun_updates[self._log_it_counter].item()-epsi)
+
+ if self._debug:
+
+ if self._remote_db:
+ # write general algo debug info to shared memory
+ info_names=self._shared_algo_data.dynamic_info.get()
+ info_data = [
+ self._n_timesteps_done[self._log_it_counter].item(),
+ self._n_policy_updates[self._log_it_counter].item(),
+ experience_to_policy_grad_ratio,
+ elapsed_h,
+ est_remaining_time_h,
+ self._env_step_fps[self._log_it_counter].item(),
+ self._env_step_rt_factor[self._log_it_counter].item(),
+ self._collection_dt[self._log_it_counter].item(),
+ self._policy_update_fps[self._log_it_counter].item(),
+ self._policy_update_dt[self._log_it_counter].item(),
+ is_done,
+ self._n_of_played_episodes[self._log_it_counter].item(),
+ self._batch_norm_update_dt[self._log_it_counter].item(),
+ ]
+ self._shared_algo_data.write(dyn_info_name=info_names,
+ val=info_data)
+
+ # write debug info to remote wandb server
+ db_data_names = list(self._env.custom_db_data.keys())
+ for dbdatan in db_data_names:
+ data = self._custom_env_data[dbdatan]
+ data_names = self._env.custom_db_data[dbdatan].data_names()
+
+ self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_max":
+ wandb.Histogram(data["max"][self._log_it_counter, :, :].numpy())})
+ self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_avrg":
+ wandb.Histogram(data["avrg"][self._log_it_counter, :, :].numpy())})
+ self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_min":
+ wandb.Histogram(data["min"][self._log_it_counter, :, :].numpy())})
+
+ self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_max_over_envs":
+ data["max_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+ self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_min_over_envs":
+ data["min_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+ self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_avrg_over_envs":
+ data["avrg_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+ self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_std_over_envs":
+ data["std_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+
+ self._wandb_d.update({'log_iteration' : self._log_it_counter})
+ self._wandb_d.update(dict(zip(info_names, info_data)))
+
+ # debug environments
+ self._wandb_d.update({'correlation_db/ep_timesteps_env_distr':
+ wandb.Histogram(self._ep_tsteps_env_distribution[self._log_it_counter, :, :].numpy())})
+
+ self._wandb_d.update({'tot_reward/tot_rew_max': wandb.Histogram(self._tot_rew_max[self._log_it_counter, :, :].numpy()),
+ 'tot_reward/tot_rew_avrg': wandb.Histogram(self._tot_rew_avrg[self._log_it_counter, :, :].numpy()),
+ 'tot_reward/tot_rew_min': wandb.Histogram(self._tot_rew_min[self._log_it_counter, :, :].numpy()),
+ 'tot_reward/tot_rew_max_over_envs': self._tot_rew_max_over_envs[self._log_it_counter, :, :].item(),
+ 'tot_reward/tot_rew_min_over_envs': self._tot_rew_min_over_envs[self._log_it_counter, :, :].item(),
+ 'tot_reward/tot_rew_avrg_over_envs': self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item(),
+ 'tot_reward/tot_rew_std_over_envs': self._tot_rew_std_over_envs[self._log_it_counter, :, :].item(),
+ })
+ # sub rewards from db envs
+ self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max":
+ wandb.Histogram(self._sub_rew_max.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min":
+ wandb.Histogram(self._sub_rew_min.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg":
+ wandb.Histogram(self._sub_rew_avrg.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+
+
+ self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max_over_envs":
+ self._sub_rew_max_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min_over_envs":
+ self._sub_rew_min_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg_over_envs":
+ self._sub_rew_avrg_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_std_over_envs":
+ self._sub_rew_std_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+
+ # exploration envs
+ if self._n_expl_envs > 0:
+ self._wandb_d.update({'correlation_db/ep_timesteps_expl_env_distr':
+ wandb.Histogram(self._ep_tsteps_expl_env_distribution[self._log_it_counter, :, :].numpy())})
+
+ # sub reward from expl envs
+ self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_expl":
+ wandb.Histogram(self._sub_rew_max_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_expl":
+ wandb.Histogram(self._sub_rew_avrg_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_expl":
+ wandb.Histogram(self._sub_rew_min_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+
+ self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_over_envs_expl":
+ self._sub_rew_max_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_over_envs_expl":
+ self._sub_rew_min_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_over_envs_expl":
+ self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_std_over_envs_expl":
+ self._sub_rew_std_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+
+ # demo envs (only log if active, otherwise we log nans which wandb doesn't like)
+ if self._env.n_demo_envs() > 0:
+ if self._env.demo_active():
+ # log hystograms only if there are no nan in the data
+ self._wandb_d.update({'correlation_db/ep_timesteps_demo_env_distr':
+ wandb.Histogram(self._ep_tsteps_demo_env_distribution[self._log_it_counter, :, :].numpy())})
+
+ # sub reward from expl envs
+ self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_demo":
+ wandb.Histogram(self._sub_rew_max_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_demo":
+ wandb.Histogram(self._sub_rew_avrg_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_demo":
+ wandb.Histogram(self._sub_rew_min_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+
+ self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_over_envs_demo":
+ self._sub_rew_max_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_over_envs_demo":
+ self._sub_rew_min_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_over_envs_demo":
+ self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+ self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_std_over_envs_demo":
+ self._sub_rew_std_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+
+ if self._vec_transition_counter > (self._warmstart_vectimesteps-1):
+ # algo info
+ self._policy_update_db_data_dict.update({
+ "sac_q_info/qf1_vals_mean": self._qf1_vals_mean[self._log_it_counter, 0],
+ "sac_q_info/qf2_vals_mean": self._qf2_vals_mean[self._log_it_counter, 0],
+ "sac_q_info/min_qft_vals_mean": self._min_qft_vals_mean[self._log_it_counter, 0],
+ "sac_q_info/qf1_vals_std": self._qf1_vals_std[self._log_it_counter, 0],
+ "sac_q_info/qf2_vals_std": self._qf2_vals_std[self._log_it_counter, 0],
+ "sac_q_info/min_qft_vals_std": self._min_qft_vals_std[self._log_it_counter, 0],
+ "sac_q_info/qf1_vals_max": self._qf1_vals_max[self._log_it_counter, 0],
+ "sac_q_info/qf2_vals_max": self._qf2_vals_max[self._log_it_counter, 0],
+ "sac_q_info/qf1_vals_min": self._qf1_vals_min[self._log_it_counter, 0],
+ "sac_q_info/qf2_vals_min": self._qf2_vals_min[self._log_it_counter, 0],
+
+ "sac_actor_info/policy_entropy_mean": self._policy_entropy_mean[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_std": self._policy_entropy_std[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_max": self._policy_entropy_max[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_min": self._policy_entropy_min[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_disc_mean": self._policy_entropy_disc_mean[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_disc_std": self._policy_entropy_disc_std[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_disc_max": self._policy_entropy_disc_max[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_disc_min": self._policy_entropy_disc_min[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_cont_mean": self._policy_entropy_cont_mean[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_cont_std": self._policy_entropy_cont_std[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_cont_max": self._policy_entropy_cont_max[self._log_it_counter, 0],
+ "sac_actor_info/policy_entropy_cont_min": self._policy_entropy_cont_min[self._log_it_counter, 0],
+
+ "sac_q_info/qf1_loss": self._qf1_loss[self._log_it_counter, 0],
+ "sac_q_info/qf2_loss": self._qf2_loss[self._log_it_counter, 0],
+ "sac_actor_info/actor_loss": self._actor_loss[self._log_it_counter, 0]})
+ alpha_logs = {
+ "sac_alpha_info/alpha": self._alphas[self._log_it_counter, 0],
+ "sac_alpha_info/alpha_disc": self._alphas_disc[self._log_it_counter, 0],
+ "sac_alpha_info/alpha_cont": self._alphas_cont[self._log_it_counter, 0],
+ "sac_alpha_info/target_entropy": self._target_entropy,
+ "sac_alpha_info/target_entropy_disc": self._target_entropy_disc,
+ "sac_alpha_info/target_entropy_cont": self._target_entropy_cont
+ }
+ if self._autotune:
+ alpha_logs.update({
+ "sac_alpha_info/alpha_loss": self._alpha_loss[self._log_it_counter, 0],
+ "sac_alpha_info/alpha_loss_disc": self._alpha_loss_disc[self._log_it_counter, 0],
+ "sac_alpha_info/alpha_loss_cont": self._alpha_loss_cont[self._log_it_counter, 0],
+ })
+ self._policy_update_db_data_dict.update(alpha_logs)
+
+ if self._validate:
+ self._policy_update_db_data_dict.update({
+ "sac_q_info/qf1_loss_validation": self._qf1_loss_validation[self._log_it_counter, 0],
+ "sac_q_info/qf2_loss_validation": self._qf2_loss_validation[self._log_it_counter, 0],
+ "sac_q_info/overfit_index": self._overfit_index[self._log_it_counter, 0],
+ "sac_actor_info/actor_loss_validation": self._actor_loss_validation[self._log_it_counter, 0]})
+ if self._autotune:
+ self._policy_update_db_data_dict.update({
+ "sac_alpha_info/alpha_loss_validation": self._alpha_loss_validation[self._log_it_counter, 0],
+ "sac_alpha_info/alpha_loss_disc_validation": self._alpha_loss_disc_validation[self._log_it_counter, 0],
+ "sac_alpha_info/alpha_loss_cont_validation": self._alpha_loss_cont_validation[self._log_it_counter, 0]})
+
+ self._wandb_d.update(self._policy_update_db_data_dict)
+
+ if self._use_rnd:
+ self._rnd_db_data_dict.update({
+ "rnd_info/expl_bonus_raw_avrg": self._expl_bonus_raw_avrg[self._log_it_counter, 0],
+ "rnd_info/expl_bonus_raw_std": self._expl_bonus_raw_std[self._log_it_counter, 0],
+ "rnd_info/expl_bonus_proc_avrg": self._expl_bonus_proc_avrg[self._log_it_counter, 0],
+ "rnd_info/expl_bonus_proc_std": self._expl_bonus_proc_std[self._log_it_counter, 0],
+ "rnd_info/rnd_loss": self._rnd_loss[self._log_it_counter, 0],
+ })
+ self._wandb_d.update(self._rnd_db_data_dict)
+
+ if self._rnd_net.obs_running_norm is not None:
+ # adding info on running normalizer if used
+ if self._running_mean_rnd_input is not None:
+ self._wandb_d.update({f"rnd_info/running_mean_rhc_input": self._running_mean_rnd_input[self._log_it_counter, :]})
+ if self._running_std_rnd_input is not None:
+ self._wandb_d.update({f"rnd_info/running_std_rhc_input": self._running_std_rnd_input[self._log_it_counter, :]})
+
+ if self._agent.obs_running_norm is not None:
+ # adding info on running normalizer if used
+ if self._running_mean_obs is not None:
+ self._wandb_d.update({f"running_norm/mean": self._running_mean_obs[self._log_it_counter, :]})
+ if self._running_std_obs is not None:
+ self._wandb_d.update({f"running_norm/std": self._running_std_obs[self._log_it_counter, :]})
+
+ self._wandb_d.update(self._custom_env_data_db_dict)
+
+ wandb.log(self._wandb_d)
+
+ if self._verbose:
+
+ info =f"\nTotal n. timesteps simulated: {self._n_timesteps_done[self._log_it_counter].item()}/{self._total_timesteps}\n" + \
+ f"N. policy updates performed: {self._n_policy_updates[self._log_it_counter].item()}/{self._n_policy_updates_to_be_done}\n" + \
+ f"N. q fun updates performed: {self._n_qfun_updates[self._log_it_counter].item()}/{self._n_qf_updates_to_be_done}\n" + \
+ f"N. trgt q fun updates performed: {self._n_tqfun_updates[self._log_it_counter].item()}/{self._n_tqf_updates_to_be_done}\n" + \
+ f"experience to policy grad ratio: {experience_to_policy_grad_ratio}\n" + \
+ f"experience to q fun grad ratio: {experience_to_qfun_grad_ratio}\n" + \
+ f"experience to trgt q fun grad ratio: {experience_to_tqfun_grad_ratio}\n"+ \
+ f"Warmstart completed: {self._vec_transition_counter > self._warmstart_vectimesteps or self._eval} ; ({self._vec_transition_counter}/{self._warmstart_vectimesteps})\n" +\
+ f"Replay buffer full: {self._replay_bf_full}; current position {self._bpos}/{self._replay_buffer_size_vec}\n" +\
+ f"Validation buffer full: {self._validation_bf_full}; current position {self._bpos_val}/{self._validation_buffer_size_vec}\n" +\
+ f"Elapsed time: {self._elapsed_min[self._log_it_counter].item()/60.0} h\n" + \
+ f"Estimated remaining training time: " + \
+ f"{est_remaining_time_h} h\n" + \
+ f"Total reward episodic data --> \n" + \
+ f"max: {self._tot_rew_max_over_envs[self._log_it_counter, :, :].item()}\n" + \
+ f"avg: {self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item()}\n" + \
+ f"min: {self._tot_rew_min_over_envs[self._log_it_counter, :, :].item()}\n" + \
+ f"Episodic sub-rewards episodic data --> \nsub rewards names: {self._reward_names_str}\n" + \
+ f"max: {self._sub_rew_max_over_envs[self._log_it_counter, :]}\n" + \
+ f"min: {self._sub_rew_min_over_envs[self._log_it_counter, :]}\n" + \
+ f"avg: {self._sub_rew_avrg_over_envs[self._log_it_counter, :]}\n" + \
+ f"std: {self._sub_rew_std_over_envs[self._log_it_counter, :]}\n" + \
+ f"N. of episodes on which episodic rew stats are computed: {self._n_of_played_episodes[self._log_it_counter].item()}\n" + \
+ f"Current env. step sps: {self._env_step_fps[self._log_it_counter].item()}, time for experience collection {self._collection_dt[self._log_it_counter].item()} s\n" + \
+ f"Current env (sub-stepping) rt factor: {self._env_step_rt_factor[self._log_it_counter].item()}\n" + \
+ f"Current policy update fps: {self._policy_update_fps[self._log_it_counter].item()}, time for policy updates {self._policy_update_dt[self._log_it_counter].item()} s\n" + \
+ f"Time spent updating batch normalizations {self._batch_norm_update_dt[self._log_it_counter].item()} s\n" + \
+ f"Time spent for computing validation {self._validation_dt[self._log_it_counter].item()} s\n" + \
+ f"Demo envs are active: {self._demo_envs_active[self._log_it_counter].item()}. N demo envs if active {self._env.n_demo_envs()}\n" + \
+ f"Performance metric now: {self._demo_perf_metric[self._log_it_counter].item()}\n" + \
+ f"Entropy (disc): current {float(self._policy_entropy_disc_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_disc:.4f}\n" + \
+ f"Entropy (cont): current {float(self._policy_entropy_cont_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_cont:.4f}\n"
+ if self._use_rnd:
+ info = info + f"N. rnd updates performed: {self._n_rnd_updates[self._log_it_counter].item()}\n"
+
+ Journal.log(self.__class__.__name__,
+ "_post_step",
+ info,
+ LogType.INFO,
+ throw_when_excep = True)
+
+ def _add_experience(self,
+ obs: torch.Tensor, actions: torch.Tensor, rewards: torch.Tensor,
+ next_obs: torch.Tensor,
+ next_terminal: torch.Tensor) -> None:
+
+ if self._validate and \
+ (self._vec_transition_counter % self._validation_collection_vecfreq == 0):
+ # fill validation buffer
+
+ self._obs_val[self._bpos_val] = obs
+ self._next_obs_val[self._bpos_val] = next_obs
+ self._actions_val[self._bpos_val] = actions
+ self._rewards_val[self._bpos_val] = rewards
+ self._next_terminal_val[self._bpos_val] = next_terminal
+
+ self._bpos_val += 1
+ if self._bpos_val == self._validation_buffer_size_vec:
+ self._validation_bf_full = True
+ self._bpos_val = 0
+
+ else: # fill normal replay buffer
+ self._obs[self._bpos] = obs
+ self._next_obs[self._bpos] = next_obs
+ self._actions[self._bpos] = actions
+ self._rewards[self._bpos] = rewards
+ self._next_terminal[self._bpos] = next_terminal
+
+ self._bpos += 1
+ if self._bpos == self._replay_buffer_size_vec:
+ self._replay_bf_full = True
+ self._bpos = 0
+
+ def _sample(self, size: int = None):
+
+ if size is None:
+ size=self._batch_size
+
+ batched_obs = self._obs.view((-1, self._env.obs_dim()))
+ batched_next_obs = self._next_obs.view((-1, self._env.obs_dim()))
+ batched_actions = self._actions.view((-1, self._env.actions_dim()))
+ batched_rewards = self._rewards.view(-1)
+ batched_terminal = self._next_terminal.view(-1)
+
+ # sampling from the batched buffer
+ up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs
+ shuffled_buffer_idxs = torch.randint(0, up_to,
+ (size,))
+
+ sampled_obs = batched_obs[shuffled_buffer_idxs]
+ sampled_next_obs = batched_next_obs[shuffled_buffer_idxs]
+ sampled_actions = batched_actions[shuffled_buffer_idxs]
+ sampled_rewards = batched_rewards[shuffled_buffer_idxs]
+ sampled_terminal = batched_terminal[shuffled_buffer_idxs]
+
+ return sampled_obs, sampled_actions,\
+ sampled_next_obs,\
+ sampled_rewards, \
+ sampled_terminal
+
+ def _sample_validation(self, size: int = None):
+
+ if size is None:
+ size=self._validation_batch_size
+
+ batched_obs = self._obs_val.view((-1, self._env.obs_dim()))
+ batched_next_obs = self._next_obs_val.view((-1, self._env.obs_dim()))
+ batched_actions = self._actions_val.view((-1, self._env.actions_dim()))
+ batched_rewards = self._rewards_val.view(-1)
+ batched_terminal = self._next_terminal_val.view(-1)
+
+ # sampling from the batched buffer
+ up_to = self._validation_buffer_size if self._validation_bf_full else self._bpos_val*self._num_envs
+ shuffled_buffer_idxs = torch.randint(0, up_to,
+ (size,))
+
+ sampled_obs = batched_obs[shuffled_buffer_idxs]
+ sampled_next_obs = batched_next_obs[shuffled_buffer_idxs]
+ sampled_actions = batched_actions[shuffled_buffer_idxs]
+ sampled_rewards = batched_rewards[shuffled_buffer_idxs]
+ sampled_terminal = batched_terminal[shuffled_buffer_idxs]
+
+ return sampled_obs, sampled_actions,\
+ sampled_next_obs,\
+ sampled_rewards, \
+ sampled_terminal
+
+ def _sample_random_actions(self):
+
+ self._random_uniform.uniform_(-1,1)
+ random_actions = self._random_uniform
+
+ return random_actions
+
+ def _perturb_some_actions(self,
+ actions: torch.Tensor):
+
+ if self._is_continuous_actions_bool.any(): # if there are any continuous actions
+ self._perturb_actions(actions,
+ action_idxs=self._is_continuous_actions,
+ env_idxs=self._expl_env_selector.to(actions.device),
+ normal=True, # use normal for continuous
+ scaling=self._continuous_act_expl_noise_std)
+ if self._is_discrete_actions_bool.any(): # actions to be treated as discrete
+ self._perturb_actions(actions,
+ action_idxs=self._is_discrete_actions,
+ env_idxs=self._expl_env_selector.to(actions.device),
+ normal=False, # use uniform distr for discrete
+ scaling=self._discrete_act_expl_noise_std)
+ self._pert_counter+=1
+ if self._pert_counter >= self._noise_duration_vec:
+ self._pert_counter=0
+
+ def _perturb_actions(self,
+ actions: torch.Tensor,
+ action_idxs: torch.Tensor,
+ env_idxs: torch.Tensor,
+ normal: bool = True,
+ scaling: float = 1.0):
+ if normal: # gaussian
+ # not super efficient (in theory random_normal can be made smaller in size)
+ self._random_normal.normal_(mean=0, std=1)
+ noise=self._random_normal
+ else: # uniform
+ self._random_uniform.uniform_(-1,1)
+ noise=self._random_uniform
+
+ env_indices = env_idxs.reshape(-1,1) # Get indices of True environments
+ action_indices = action_idxs.reshape(1,-1) # Get indices of True actions
+ action_indices_flat=action_indices.flatten()
+
+ perturbation=noise[env_indices, action_indices]*scaling
+ perturbed_actions=actions[env_indices, action_indices]+perturbation
+ perturbed_actions.clamp_(-1.0, 1.0) # enforce normalized bounds
+
+ actions[env_indices, action_indices]=\
+ perturbed_actions
+
+ def _update_batch_norm(self, bsize: int = None):
+
+ if bsize is None:
+ bsize=self._batch_size # same used for training
+
+ up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs
+ shuffled_buffer_idxs = torch.randint(0, up_to,
+ (bsize,))
+
+ # update obs normalization
+ # (we should sample also next obs, but if most of the transitions are not terminal,
+ # this is not an issue and is more efficient)
+ if (self._agent.obs_running_norm is not None) and \
+ (not self._eval):
+ batched_obs = self._obs.view((-1, self._obs_dim))
+ sampled_obs = batched_obs[shuffled_buffer_idxs]
+ self._agent.update_obs_bnorm(x=sampled_obs)
+
+ if self._use_rnd: # update running norm for RND
+ batched_obs = self._obs.view((-1, self._obs_dim))
+ batched_actions = self._actions.view((-1, self._actions_dim))
+ sampled_obs = batched_obs[shuffled_buffer_idxs]
+ sampled_actions = batched_actions[shuffled_buffer_idxs]
+ torch.cat(tensors=(sampled_obs, sampled_actions), dim=1,
+ out=self._rnd_bnorm_input)
+ self._rnd_net.update_input_bnorm(x=self._rnd_bnorm_input)
+
+ # update running norm on rewards also
+ # if self._reward_normalizer is not None:
+ # batched_rew = self._rewards.view(-1)
+ # sampled_rew = batched_rew[shuffled_buffer_idxs]
+ # self._reward_normalizer.manual_stat_update(x=sampled_rew)
+
+ def _reset_agent(self):
+ # not super efficient, but effective ->
+ # brand new agent, brand new optimizers
+ self._agent.reset()
+ # forcing deallocation of previous optimizers
+ import gc
+ del self._qf_optimizer
+ del self._actor_optimizer
+ if self._autotune:
+ del self._a_optimizer_disc
+ del self._a_optimizer_cont
+ del self._log_alpha_disc
+ del self._log_alpha_cont
+ gc.collect()
+ self._init_agent_optimizers()
+ if self._autotune: # also reinitialize alpha optimization
+ self._init_alpha_autotuning()
+
+ self._overfit_idx=0.0
+
+ def _switch_training_mode(self,
+ train: bool = True):
+
+ self._agent.train(train)
+
+ def _init_algo_shared_data(self,
+ static_params: Dict):
+
+ self._shared_algo_data = SharedRLAlgorithmInfo(namespace=self._ns,
+ is_server=True,
+ static_params=static_params,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ safe=False,
+ force_reconnection=True)
+
+ self._shared_algo_data.run()
+
+ # write some initializations
+ self._shared_algo_data.write(dyn_info_name=["is_done"],
+ val=[0.0])
+
+ # only written to if flags where enabled
+ self._qf_vals=QfVal(namespace=self._ns,
+ is_server=True,
+ n_envs=self._num_envs,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ safe=False,
+ force_reconnection=True)
+ self._qf_vals.run()
+ self._qf_trgt=QfTrgt(namespace=self._ns,
+ is_server=True,
+ n_envs=self._num_envs,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ safe=False,
+ force_reconnection=True)
+ self._qf_trgt.run()
+
+ # episodic returns
+ reward_names=self._episodic_reward_metrics.data_names()
+ self._sub_returns=SubReturns(namespace=self._ns,
+ is_server=True,
+ n_envs=self._num_envs,
+ n_rewards=len(reward_names),
+ reward_names=reward_names,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ safe=False,
+ force_reconnection=True)
+ self._sub_returns.run()
+
+ self._tot_returns=TotReturns(namespace=self._ns,
+ is_server=True,
+ n_envs=self._num_envs,
+ verbose=self._verbose,
+ vlevel=VLevel.V2,
+ safe=False,
+ force_reconnection=True)
+ self._tot_returns.run()
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/training_env_base.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/training_env_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1d82cd55b606a8fe783803efcb26726301c1693
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/training_env_base.py
@@ -0,0 +1,2002 @@
+import torch
+import math
+from aug_mpc.utils.math_utils import quaternion_to_angular_velocity, quaternion_difference
+
+from mpc_hive.utilities.shared_data.rhc_data import RobotState
+from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred
+from mpc_hive.utilities.shared_data.rhc_data import RhcRefs
+from mpc_hive.utilities.shared_data.rhc_data import RhcStatus
+from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo
+
+from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperSrvr
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetSrvr
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest
+
+from aug_mpc.utils.shared_data.agent_refs import AgentRefs
+from aug_mpc.utils.shared_data.training_env import SharedTrainingEnvInfo
+
+from aug_mpc.utils.shared_data.training_env import Observations, NextObservations
+from aug_mpc.utils.shared_data.training_env import TotRewards
+from aug_mpc.utils.shared_data.training_env import SubRewards
+from aug_mpc.utils.shared_data.training_env import Actions
+from aug_mpc.utils.shared_data.training_env import Terminations, SubTerminations
+from aug_mpc.utils.shared_data.training_env import Truncations, SubTruncations
+from aug_mpc.utils.shared_data.training_env import EpisodesCounter,TaskRandCounter,SafetyRandResetsCounter,RandomTruncCounter,SubStepAbsCounter
+
+from aug_mpc.utils.episodic_rewards import EpisodicRewards
+from aug_mpc.utils.episodic_data import EpisodicData
+from aug_mpc.utils.episodic_data import MemBuffer
+from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother
+from aug_mpc.utils.math_utils import check_capsize
+
+from mpc_hive.utilities.math_utils_torch import world2base_frame
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+from EigenIPC.PyEigenIPC import StringTensorClient
+
+from perf_sleep.pyperfsleep import PerfSleep
+
+from abc import abstractmethod, ABC
+
+import os
+from typing import List, Dict
+
+class AugMPCTrainingEnvBase(ABC):
+
+ """Base class for a remote training environment tailored to Learning-based Receding Horizon Control"""
+
+ def __init__(self,
+ namespace: str,
+ obs_dim: int,
+ actions_dim: int,
+ env_name: str = "",
+ verbose: bool = False,
+ vlevel: VLevel = VLevel.V1,
+ debug: bool = True,
+ use_gpu: bool = True,
+ dtype: torch.dtype = torch.float32,
+ override_agent_refs: bool = False,
+ timeout_ms: int = 60000,
+ env_opts: Dict = {}):
+
+ self._this_path = os.path.abspath(__file__)
+
+ self.custom_db_data = None
+
+ self._random_reset_active=False
+
+ self._action_smoother_continuous=None
+ self._action_smoother_discrete=None
+
+ self._closed = False
+ self._ready=False
+
+ self._namespace = namespace
+ self._with_gpu_mirror = True
+ self._safe_shared_mem = False
+
+ self._obs_dim = obs_dim
+ self._actions_dim = actions_dim
+
+ self._use_gpu = use_gpu
+ if self._use_gpu:
+ self._device="cuda"
+ else:
+ self._device="cpu"
+
+ self._dtype = dtype
+
+ self._verbose = verbose
+ self._vlevel = vlevel
+
+ self._is_debug = debug
+
+ self._env_name = env_name
+
+ self._override_agent_refs = override_agent_refs
+
+ self._substep_dt=1.0 # dt [s] between each substep
+
+ self._env_opts={}
+ self._env_opts.update(env_opts)
+ self._process_env_opts()
+
+ self._robot_state = None
+ self._rhc_cmds = None
+ self._rhc_pred = None
+ self._rhc_refs = None
+ self._rhc_status = None
+
+ self._remote_stepper = None
+ self._remote_resetter = None
+ self._remote_reset_req = None
+
+ self._agent_refs = None
+
+ self._n_envs = 0
+
+ self._ep_timeout_counter = None
+ self._task_rand_counter = None
+ self._rand_safety_reset_counter = None
+ self._rand_trunc_counter = None
+
+ self._actions_map={} # to be used to hold info like action idxs
+ self._obs_map={}
+
+ self._obs = None
+ self._obs_ub = None
+ self._obs_lb = None
+ self._next_obs = None
+ self._actions = None
+ self._actual_actions = None
+ self._actions_ub = None
+ self._actions_lb = None
+ self._tot_rewards = None
+ self._sub_rewards = None
+ self._sub_terminations = None
+ self._sub_truncations = None
+ self._terminations = None
+ self._truncations = None
+ self._act_mem_buffer = None
+
+ self._episodic_rewards_metrics = None
+
+ self._timeout = timeout_ms
+
+ self._height_grid_size = None
+ self._height_flat_dim = 0
+
+ self._attach_to_shared_mem()
+
+ self._init_obs(obs_dim)
+ self._init_actions(actions_dim)
+ self._init_rewards()
+ self._init_terminations()
+ self._init_truncations()
+ self._init_custom_db_data()
+
+ self._demo_setup() # setup for demo envs
+
+ # to ensure maps are properly initialized
+ _ = self._get_action_names()
+ _ = self._get_obs_names()
+ _ = self._get_sub_trunc_names()
+ _ = self._get_sub_term_names()
+
+ self._set_substep_rew()
+ self._set_substep_obs()
+
+ self._custom_post_init()
+
+ # update actions scale and offset in case it was modified in _custom_post_init
+ self._actions_scale = (self._actions_ub - self._actions_lb)/2.0
+ self._actions_offset = (self._actions_ub + self._actions_lb)/2.0
+
+ if self._env_opts["use_action_smoothing"]:
+ self._init_action_smoothing()
+
+ self._ready=self._init_step(reset_on_init=self._env_opts["reset_on_init"])
+
+ def _add_env_opt(self,
+ opts: Dict,
+ name: str,
+ default):
+
+ if not name in opts:
+ opts[name]=default
+
+ def _process_env_opts(self, ):
+
+ self._check_for_env_opts("episode_timeout_lb", int)
+ self._check_for_env_opts("episode_timeout_ub", int)
+ self._check_for_env_opts("n_steps_task_rand_lb", int)
+ self._check_for_env_opts("n_steps_task_rand_ub", int)
+ self._check_for_env_opts("use_random_trunc", bool)
+ self._check_for_env_opts("random_trunc_freq", int)
+ self._check_for_env_opts("random_trunc_freq_delta", int)
+ self._check_for_env_opts("use_random_safety_reset", bool)
+ self._check_for_env_opts("random_reset_freq", int)
+
+ self._check_for_env_opts("action_repeat", int)
+
+ self._check_for_env_opts("n_preinit_steps", int)
+
+ self._check_for_env_opts("demo_envs_perc", float)
+
+ self._check_for_env_opts("vec_ep_freq_metrics_db", int)
+
+ self._check_for_env_opts("srew_drescaling", bool)
+
+ self._check_for_env_opts("use_action_history", bool)
+ self._check_for_env_opts("actions_history_size", int)
+
+ self._check_for_env_opts("use_action_smoothing", bool)
+ self._check_for_env_opts("smoothing_horizon_c", float)
+ self._check_for_env_opts("smoothing_horizon_d", float)
+
+ self._check_for_env_opts("add_heightmap_obs", bool)
+
+ self._check_for_env_opts("reset_on_init", bool)
+
+ # parse action repeat opt + get some sim information
+ if self._env_opts["action_repeat"] <=0:
+ self._env_opts["action_repeat"] = 1
+ self._action_repeat=self._env_opts["action_repeat"]
+ # parse remote sim info
+ sim_info = {}
+ sim_info_shared = SharedEnvInfo(namespace=self._namespace,
+ is_server=False,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=self._vlevel)
+ sim_info_shared.run()
+ sim_info_keys = sim_info_shared.param_keys
+ sim_info_data = sim_info_shared.get().flatten()
+ for i in range(len(sim_info_keys)):
+ sim_info[sim_info_keys[i]] = sim_info_data[i]
+ if "substepping_dt" in sim_info_keys:
+ self._substep_dt=sim_info["substepping_dt"]
+ self._env_opts.update(sim_info)
+
+ self._env_opts["substep_dt"]=self._substep_dt
+
+ self._env_opts["override_agent_refs"]=self._override_agent_refs
+
+ self._env_opts["episode_timeout_lb"] = round(self._env_opts["episode_timeout_lb"]/self._action_repeat)
+ self._env_opts["episode_timeout_ub"] = round(self._env_opts["episode_timeout_ub"]/self._action_repeat)
+
+ self._env_opts["n_steps_task_rand_lb"] = round(self._env_opts["n_steps_task_rand_lb"]/self._action_repeat)
+ self._env_opts["n_steps_task_rand_ub"] = round(self._env_opts["n_steps_task_rand_ub"]/self._action_repeat)
+
+ if self._env_opts["random_reset_freq"] <=0:
+ self._env_opts["use_random_safety_reset"]=False
+ self._env_opts["random_reset_freq"]=-1
+ self._random_reset_active=self._env_opts["use_random_safety_reset"]
+
+ self._env_opts["random_trunc_freq"] = round(self._env_opts["random_trunc_freq"]/self._action_repeat)
+ self._env_opts["random_trunc_freq_delta"] = round(self._env_opts["random_trunc_freq_delta"]/self._action_repeat)
+
+ if self._env_opts["random_trunc_freq"] <=0:
+ self._env_opts["use_random_trunc"]=False
+ self._env_opts["random_trunc_freq"]=-1
+
+ self._full_db=False
+ if "full_env_db" in self._env_opts:
+ self._full_db=self._env_opts["full_env_db"]
+
+ def _check_for_env_opts(self,
+ name: str,
+ expected_type):
+ if not (name in self._env_opts):
+ Journal.log(self.__class__.__name__,
+ "_check_for_env_opts",
+ f"Required option {name} missing for env opts!",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ if not isinstance(self._env_opts[name], expected_type):
+ Journal.log(self.__class__.__name__,
+ "_check_for_env_opts",
+ f"Option {name} in env opts is not of type {expected_type} (got {type(self._env_opts[name])})!",
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+ def __del__(self):
+
+ self.close()
+
+ def _demo_setup(self):
+
+ self._demo_envs_idxs=None
+ self._demo_envs_idxs_bool=None
+ self._n_demo_envs=round(self._env_opts["demo_envs_perc"]*self._n_envs)
+ self._add_demos=False
+ if not self._n_demo_envs >0:
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ "will not use demo environments",
+ LogType.INFO,
+ throw_when_excep=False)
+ else:
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Will run with {self._n_demo_envs} demonstration envs.",
+ LogType.INFO)
+ self._demo_envs_idxs = torch.randperm(self._n_envs, device=self._device)[:self._n_demo_envs]
+ self._demo_envs_idxs_bool=torch.full((self._n_envs, ), dtype=torch.bool, device=self._device,
+ fill_value=False)
+ self._demo_envs_idxs_bool[self._demo_envs_idxs]=True
+
+ self._init_demo_envs() # custom logic
+
+ demo_idxs_str=", ".join(map(str, self._demo_envs_idxs.tolist()))
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ f"Demo env. indexes are [{demo_idxs_str}]",
+ LogType.INFO)
+
+ def env_opts(self):
+ return self._env_opts
+
+ def demo_env_idxs(self, get_bool: bool=False):
+ if get_bool:
+ return self._demo_envs_idxs_bool
+ else:
+ return self._demo_envs_idxs
+
+ def _init_demo_envs(self):
+ pass
+
+ def n_demo_envs(self):
+ return self._n_demo_envs
+
+ def demo_active(self):
+ return self._add_demos
+
+ def switch_demo(self, active: bool = False):
+ if self._demo_envs_idxs is not None:
+ self._add_demos=active
+ else:
+ Journal.log(self.__class__.__name__,
+ "switch_demo",
+ f"Cannot switch demostrations on. No demo envs available!",
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+ def _get_this_file_path(self):
+ return self._this_path
+
+ def episode_timeout_bounds(self):
+ return self._env_opts["episode_timeout_lb"], self._env_opts["episode_timeout_ub"]
+
+ def task_rand_timeout_bounds(self):
+ return self._env_opts["n_steps_task_rand_lb"], self._env_opts["n_steps_task_rand_ub"]
+
+ def n_action_reps(self):
+ return self._action_repeat
+
+ def get_file_paths(self):
+ from aug_mpc.utils.sys_utils import PathsGetter
+ path_getter = PathsGetter()
+ base_paths = []
+ base_paths.append(self._get_this_file_path())
+ base_paths.append(path_getter.REMOTENVPATH)
+ for script_path in path_getter.SCRIPTSPATHS:
+ base_paths.append(script_path)
+
+ # rhc files
+ from EigenIPC.PyEigenIPC import StringTensorClient
+ from perf_sleep.pyperfsleep import PerfSleep
+ shared_rhc_shared_files = StringTensorClient(
+ basename="SharedRhcFilesDropDir",
+ name_space=self._namespace,
+ verbose=self._verbose,
+ vlevel=VLevel.V2)
+ shared_rhc_shared_files.run()
+ shared_rhc_files_vals=[""]*shared_rhc_shared_files.length()
+ while not shared_rhc_shared_files.read_vec(shared_rhc_files_vals, 0):
+ nsecs = 1000000000 # 1 sec
+ PerfSleep.thread_sleep(nsecs) # we just keep it alive
+ rhc_list=[]
+ for rhc_files in shared_rhc_files_vals:
+ file_list = rhc_files.split(", ")
+ rhc_list.extend(file_list)
+ rhc_list = list(set(rhc_list)) # removing duplicates
+ base_paths.extend(rhc_list)
+
+ # world interface files
+ get_world_interface_paths = self.get_world_interface_paths()
+ base_paths.extend(get_world_interface_paths)
+ return base_paths
+
+ def get_world_interface_paths(self):
+ paths = []
+ shared_world_iface_files = StringTensorClient(
+ basename="SharedWorldInterfaceFilesDropDir",
+ name_space=self._namespace,
+ verbose=self._verbose,
+ vlevel=VLevel.V2)
+ shared_world_iface_files.run()
+ world_iface_vals=[""]*shared_world_iface_files.length()
+ while not shared_world_iface_files.read_vec(world_iface_vals, 0):
+ nsecs = 1000000000 # 1 sec
+ PerfSleep.thread_sleep(nsecs) # keep alive while waiting
+ shared_world_iface_files.close()
+ for files in world_iface_vals:
+ if files == "":
+ continue
+ file_list = files.split(", ")
+ for f in file_list:
+ if f not in paths:
+ paths.append(f)
+ return paths
+
+ def get_aux_dir(self):
+ empty_list = []
+ return empty_list
+
+ def _init_step(self, reset_on_init: bool = True):
+
+ self._check_controllers_registered(retry=True)
+ self._activate_rhc_controllers()
+
+ # just an auxiliary tensor
+ initial_reset_aux = self._terminations.get_torch_mirror(gpu=self._use_gpu).clone()
+ initial_reset_aux[:, :] = reset_on_init # we reset all sim envs first
+ init_step_ok=True
+ init_step_ok=self._remote_sim_step() and init_step_ok
+ if not init_step_ok:
+ return False
+ init_step_ok=self._remote_reset(reset_mask=initial_reset_aux) and init_step_ok
+ if not init_step_ok:
+ return False
+
+ for i in range(self._env_opts["n_preinit_steps"]): # perform some
+ # dummy remote env stepping to make sure to have meaningful
+ # initializations (doesn't increment step counter)
+ init_step_ok=self._remote_sim_step() and init_step_ok # 1 remote sim. step
+ if not init_step_ok:
+ return False
+ init_step_ok=self._send_remote_reset_req() and init_step_ok # fake reset request
+ if not init_step_ok:
+ return False
+
+ self.reset()
+
+ return init_step_ok
+
+ def _debug(self):
+
+ if self._use_gpu:
+ # using non_blocking which is not safe when GPU->CPU
+ self._obs.synch_mirror(from_gpu=True,non_blocking=True) # copy data from gpu to cpu view
+ self._next_obs.synch_mirror(from_gpu=True,non_blocking=True)
+ self._actions.synch_mirror(from_gpu=True,non_blocking=True)
+ self._truncations.synch_mirror(from_gpu=True,non_blocking=True)
+ self._sub_truncations.synch_mirror(from_gpu=True,non_blocking=True)
+ self._terminations.synch_mirror(from_gpu=True,non_blocking=True)
+ self._sub_terminations.synch_mirror(from_gpu=True,non_blocking=True)
+ self._tot_rewards.synch_mirror(from_gpu=True,non_blocking=True)
+ self._sub_rewards.synch_mirror(from_gpu=True,non_blocking=True)
+ # if we want reliable db data then we should synchronize data streams
+ torch.cuda.synchronize()
+
+ # copy CPU view on shared memory
+ self._obs.synch_all(read=False, retry=True)
+ self._next_obs.synch_all(read=False, retry=True)
+ self._actions.synch_all(read=False, retry=True)
+ self._tot_rewards.synch_all(read=False, retry=True)
+ self._sub_rewards.synch_all(read=False, retry=True)
+ self._truncations.synch_all(read=False, retry = True)
+ self._sub_truncations.synch_all(read=False, retry = True)
+ self._terminations.synch_all(read=False, retry = True)
+ self._sub_terminations.synch_all(read=False, retry = True)
+
+ self._debug_agent_refs()
+
+ def _debug_agent_refs(self):
+ if self._use_gpu:
+ if not self._override_agent_refs:
+ self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False)
+ if not self._override_agent_refs:
+ self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True)
+
+ def _remote_sim_step(self):
+
+ self._remote_stepper.trigger() # triggers simulation + RHC
+ if not self._remote_stepper.wait_ack_from(1, self._timeout):
+ Journal.log(self.__class__.__name__,
+ "_remote_sim_step",
+ "Remote sim. env step ack not received within timeout",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ return False
+ return True
+
+ def _remote_reset(self,
+ reset_mask: torch.Tensor = None):
+
+ reset_reqs = self._remote_reset_req.get_torch_mirror()
+ if reset_mask is None: # just send the signal to allow stepping, but do not reset any of
+ # the remote envs
+ reset_reqs[:, :] = False
+ else:
+ reset_reqs[:, :] = reset_mask # remotely reset envs corresponding to
+ # the mask (True--> to be reset)
+ self._remote_reset_req.synch_all(read=False, retry=True) # write on shared buffer
+ remote_reset_ok = self._send_remote_reset_req() # process remote request
+
+ if reset_mask is not None:
+ self._synch_state(gpu=self._use_gpu) # if some env was reset, we use _obs
+ # to hold the states, including resets, while _next_obs will always hold the
+ # state right after stepping the sim env
+ # (could be a bit more efficient, since in theory we only need to read the envs
+ # corresponding to the reset_mask)
+
+
+ return remote_reset_ok
+
+ def _send_remote_reset_req(self):
+
+ self._remote_resetter.trigger()
+ if not self._remote_resetter.wait_ack_from(1, self._timeout): # remote reset completed
+ Journal.log(self.__class__.__name__,
+ "_post_step",
+ "Remote reset did not complete within the prescribed timeout!",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ return False
+ return True
+
+ def step(self,
+ action):
+
+ actions_norm = action.detach() # IMPORTANT: assumes actions are already normalized in [-1, 1]
+
+ actions = self._actions.get_torch_mirror(gpu=self._use_gpu) # will hold agent actions (real range)
+
+ # scale normalized actions to physical space before interfacing with controllers
+ actions[:, :] = actions_norm*self._actions_scale + self._actions_offset
+
+ self._override_actions_with_demo() # if necessary override some actions with expert demonstrations
+ # (getting actions with get_actions will return the modified actions tensor)
+
+ actions.clamp_(self._actions_lb, self._actions_ub) # just to be safe
+
+ if self._act_mem_buffer is not None: # store norm actions in memory buffer
+ self._act_mem_buffer.update(new_data=actions_norm)
+
+ if self._env_opts["use_action_smoothing"]:
+ self._apply_actions_smoothing() # smooth actions if enabled (the tensor returned by
+ # get_actions does not contain smoothing and can be safely employed for experience collection)
+
+ self._apply_actions_to_rhc() # apply last agent actions to rhc controller
+
+ stepping_ok = True
+ tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu)
+ sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+ next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu)
+ tot_rewards.zero_()
+ sub_rewards.zero_()
+ self._substep_rewards.zero_()
+ next_obs.zero_() # necessary for substep obs
+
+ for i in range(0, self._action_repeat):
+
+ self._pre_substep() # custom logic @ substep freq
+
+ stepping_ok = stepping_ok and self._check_controllers_registered(retry=False) # does not make sense to run training
+ # if we lost some controllers
+ stepping_ok = stepping_ok and self._remote_sim_step() # blocking,
+
+ # no sim substepping is allowed to fail
+ self._synch_state(gpu=self._use_gpu) # read state from shared mem (done in substeps also,
+ # since substeps rewards will need updated substep obs)
+
+ self._custom_post_substp_pre_rew() # custom substepping logic
+ self._compute_substep_rewards()
+ self._assemble_substep_rewards() # includes rewards clipping
+ self._custom_post_substp_post_rew() # custom substepping logic
+
+ # fill substep obs
+ self._fill_substep_obs(self._substep_obs)
+ self._assemble_substep_obs()
+ if not i==(self._action_repeat-1):
+ # sends reset signal to complete remote step sequence,
+ # but does not reset any remote env
+ stepping_ok = stepping_ok and self._remote_reset(reset_mask=None)
+ else: # last substep
+
+ self._fill_step_obs(next_obs) # update next obs
+ self._clamp_obs(next_obs) # good practice
+ obs = self._obs.get_torch_mirror(gpu=self._use_gpu)
+ obs[:, :] = next_obs # start from next observation, unless reset (handled in post_step())
+
+ self._compute_step_rewards() # implemented by child
+
+ tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu)
+ sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+ self._clamp_rewards(sub_rewards) # clamp all sub rewards
+
+ tot_rewards[:, :] = torch.sum(sub_rewards, dim=1, keepdim=True)
+
+ scale=1 # scale tot rew by the number of action repeats
+ if self._env_opts["srew_drescaling"]: # scale rewards depending on the n of subrewards
+ scale*=sub_rewards.shape[1] # n. dims rescaling
+ tot_rewards.mul_(1/scale)
+
+ self._substep_abs_counter.increment() # @ substep freq
+
+ if not stepping_ok:
+ return False
+
+ stepping_ok = stepping_ok and self._post_step() # post sub-stepping operations
+ # (if action_repeat > 1, then just the db data at the last substep is logged)
+ # also, if a reset of an env occurs, obs will hold the reset state
+
+ return stepping_ok
+
+ def _post_step(self):
+
+ # first increment counters
+ self._ep_timeout_counter.increment() # episode timeout
+ self._task_rand_counter.increment() # task randomization
+ if self._rand_trunc_counter is not None: # random truncations (for removing temp. correlations)
+ self._rand_trunc_counter.increment()
+
+ # check truncation and termination conditions
+ self._check_truncations() # defined in child env
+ self._check_terminations()
+ terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+ truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu)
+ ignore_ep_end=None
+ if self._rand_trunc_counter is not None:
+ ignore_ep_end=self._rand_trunc_counter.time_limits_reached()
+ if self._use_gpu:
+ ignore_ep_end=ignore_ep_end.cuda()
+
+ truncated = torch.logical_or(truncated,
+ ignore_ep_end) # add truncation (sub truncations defined in child env
+ # remain untouched)
+
+ episode_finished = torch.logical_or(terminated,
+ truncated)
+ episode_finished_cpu = episode_finished.cpu()
+
+ if self._rand_safety_reset_counter is not None and self._random_reset_active:
+ self._rand_safety_reset_counter.increment(to_be_incremented=episode_finished_cpu.flatten())
+ # truncated[:,:] = torch.logical_or(truncated,
+ # self._rand_safety_reset_counter.time_limits_reached().cuda())
+
+ if self._act_mem_buffer is not None:
+ self._act_mem_buffer.reset(to_be_reset=episode_finished.flatten(),
+ init_data=self._normalize_actions(self.default_action))
+
+ if self._action_smoother_continuous is not None:
+ self._action_smoother_continuous.reset(to_be_reset=episode_finished.flatten(),
+ reset_val=self.default_action[:, self._is_continuous_actions])
+ if self._action_smoother_discrete is not None:
+ self._action_smoother_discrete.reset(to_be_reset=episode_finished.flatten(),
+ reset_val=self.default_action[:, ~self._is_continuous_actions])
+
+ # debug step if required (IMPORTANT: must be before remote reset so that we always db
+ # actual data from the step and not after reset)
+ if self._is_debug:
+ self._debug() # copies db data on shared memory
+ ignore_ep_end_cpu=ignore_ep_end if not self._use_gpu else ignore_ep_end.cpu()
+ self._update_custom_db_data(episode_finished=episode_finished_cpu,
+ ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc
+ )
+ self._episodic_rewards_metrics.update(rewards = self._sub_rewards.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished_cpu,
+ ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc
+ )
+
+ # remotely reset envs
+ to_be_reset=self._to_be_reset()
+ to_be_reset_custom=self._custom_reset()
+ if to_be_reset_custom is not None:
+ to_be_reset[:, :] = torch.logical_or(to_be_reset,to_be_reset_custom)
+ rm_reset_ok = self._remote_reset(reset_mask=to_be_reset)
+
+ self._custom_post_step(episode_finished=episode_finished) # any additional logic from child env
+ # here, before actual reset taskes place (at this point the state is the reset one)
+
+ # updating also prev pos and orientation in case some env was reset
+ self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+ self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+ obs = self._obs.get_torch_mirror(gpu=self._use_gpu)
+ self._fill_step_obs(obs)
+ self._clamp_obs(obs)
+
+ # updating prev step quantities
+ self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+ self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+ # synchronize and reset counters for finished episodes
+ self._ep_timeout_counter.reset(to_be_reset=episode_finished)
+ self._task_rand_counter.reset(to_be_reset=episode_finished)
+ self._substep_abs_counter.reset(to_be_reset=torch.logical_or(terminated,to_be_reset),
+ randomize_offsets=True # otherwise timers across envs would be strongly correlated
+ ) # reset only if resetting environment or if terminal
+
+ if self._rand_trunc_counter is not None:
+ # only reset when safety truncation was is triggered
+ self._rand_trunc_counter.reset(to_be_reset=self._rand_trunc_counter.time_limits_reached(),
+ randomize_limits=True, # we need to randomize otherwise the other counters will synchronize
+ # with the episode counters
+ randomize_offsets=False # always restart at 0
+ )
+ # safety reset counter is only when it reches its reset interval (just to keep
+ # the counter bounded)
+ if self._rand_safety_reset_counter is not None and self._random_reset_active:
+ self._rand_safety_reset_counter.reset(to_be_reset=self._rand_safety_reset_counter.time_limits_reached())
+
+ return rm_reset_ok
+
+ def _to_be_reset(self):
+ # always reset if a termination occurred or if there's a random safety reset
+ # request
+ terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+ to_be_reset=terminated.clone()
+ if (self._rand_safety_reset_counter is not None) and self._random_reset_active:
+ to_be_reset=torch.logical_or(to_be_reset,
+ self._rand_safety_reset_counter.time_limits_reached())
+
+ return to_be_reset
+
+ def _custom_reset(self):
+ # can be overridden by child
+ return None
+
+ def _apply_actions_smoothing(self):
+
+ actions = self._actions.get_torch_mirror(gpu=self._use_gpu)
+ actual_actions=self.get_actual_actions() # will write smoothed actions here
+ if self._action_smoother_continuous is not None:
+ self._action_smoother_continuous.update(new_signal=
+ actions[:, self._is_continuous_actions])
+ actual_actions[:, self._is_continuous_actions]=self._action_smoother_continuous.get()
+ if self._action_smoother_discrete is not None:
+ self._action_smoother_discrete.update(new_signal=
+ actions[:, ~self._is_continuous_actions])
+ actual_actions[:, ~self._is_continuous_actions]=self._action_smoother_discrete.get()
+
+ def _update_custom_db_data(self,
+ episode_finished,
+ ignore_ep_end):
+
+ # update defaults
+ self.custom_db_data["RhcRefsFlag"].update(new_data=self._rhc_refs.contact_flags.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end) # before potentially resetting the flags, get data
+ self.custom_db_data["Actions"].update(new_data=self._actions.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+ self.custom_db_data["Obs"].update(new_data=self._obs.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+
+ self.custom_db_data["SubTerminations"].update(new_data=self._sub_terminations.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+ self.custom_db_data["SubTruncations"].update(new_data=self._sub_truncations.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+
+ self.custom_db_data["Terminations"].update(new_data=self._terminations.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+ self.custom_db_data["Truncations"].update(new_data=self._truncations.get_torch_mirror(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+
+
+ self._get_custom_db_data(episode_finished=episode_finished, ignore_ep_end=ignore_ep_end)
+
+ def reset_custom_db_data(self, keep_track: bool = False):
+ # to be called periodically to reset custom db data stat. collection
+ for custom_db_data in self.custom_db_data.values():
+ custom_db_data.reset(keep_track=keep_track)
+
+ def _assemble_substep_rewards(self):
+ # by default assemble substep rewards by averaging
+ sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+
+ # average over substeps depending on scale
+ # sub_rewards[:, self._is_substep_rew] = sub_rewards[:, self._is_substep_rew] + \
+ # self._substep_rewards[:, self._is_substep_rew]/self._action_repeat
+ sub_rewards[:, self._is_substep_rew] += self._substep_rewards[:, self._is_substep_rew]/self._action_repeat
+
+ def _assemble_substep_obs(self):
+ next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu)
+ next_obs[:, self._is_substep_obs] += self._substep_obs[:, self._is_substep_obs]/self._action_repeat
+
+ def randomize_task_refs(self,
+ env_indxs: torch.Tensor = None):
+
+ if self._override_agent_refs:
+ self._override_refs(env_indxs=env_indxs)
+ else:
+ self._randomize_task_refs(env_indxs=env_indxs)
+
+ def reset(self):
+
+ self.randomize_task_refs(env_indxs=None) # randomize all refs across envs
+
+ self._obs.reset()
+ self._actions.reset()
+ self._next_obs.reset()
+ self._sub_rewards.reset()
+ self._tot_rewards.reset()
+ self._terminations.reset()
+ self._sub_terminations.reset()
+ self._truncations.reset()
+ self._sub_truncations.reset()
+
+ self._ep_timeout_counter.reset(randomize_offsets=True)
+ self._task_rand_counter.reset()
+ self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter)
+ if self._rand_safety_reset_counter is not None:
+ self._rand_safety_reset_counter.reset()
+ self._substep_abs_counter.reset()
+
+ if self._act_mem_buffer is not None:
+ self._act_mem_buffer.reset_all(init_data=self._normalize_actions(self.default_action))
+
+ if self._action_smoother_continuous is not None:
+ self._action_smoother_continuous.reset(reset_val=self.default_action[:, self._is_continuous_actions])
+ if self._action_smoother_discrete is not None:
+ self._action_smoother_discrete.reset(reset_val=self.default_action[:, ~self._is_continuous_actions])
+
+ self._synch_state(gpu=self._use_gpu) # read obs from shared mem
+
+ # just calling custom post step to ensure tak refs are updated
+ terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+ truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu)
+ episode_finished = torch.logical_or(terminated,
+ truncated)
+ self._custom_post_step(episode_finished=episode_finished)
+
+ obs = self._obs.get_torch_mirror(gpu=self._use_gpu)
+ next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu)
+ self._fill_step_obs(obs) # initialize observations
+ self._clamp_obs(obs) # to avoid bad things
+ self._fill_step_obs(next_obs) # and next obs
+ self._clamp_obs(next_obs)
+
+ self.reset_custom_db_data(keep_track=False)
+ self._episodic_rewards_metrics.reset(keep_track=False)
+
+ self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+ self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+ self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+ self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+ def is_ready(self):
+ return self._ready
+
+ def close(self):
+
+ if not self._closed:
+
+ # close all shared mem. clients
+ self._robot_state.close()
+ self._rhc_cmds.close()
+ self._rhc_pred.close()
+ self._rhc_refs.close()
+ self._rhc_status.close()
+
+ self._remote_stepper.close()
+
+ self._ep_timeout_counter.close()
+ self._task_rand_counter.close()
+ if self._rand_safety_reset_counter is not None:
+ self._rand_safety_reset_counter.close()
+
+ # closing env.-specific shared data
+ self._obs.close()
+ self._next_obs.close()
+ self._actions.close()
+ if self._actual_actions is not None:
+ self._actual_actions.close()
+ self._sub_rewards.close()
+ self._tot_rewards.close()
+
+ self._terminations.close()
+ self._sub_terminations.close()
+ self._truncations.close()
+ self._sub_truncations.close()
+
+ self._closed = True
+
+ def get_obs(self, clone:bool=False):
+ if clone:
+ return self._obs.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+ else:
+ return self._obs.get_torch_mirror(gpu=self._use_gpu).detach()
+
+ def get_next_obs(self, clone:bool=False):
+ if clone:
+ return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+ else:
+ return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach()
+
+ def get_actions(self, clone:bool=False, normalized: bool = False):
+ actions = self._actions.get_torch_mirror(gpu=self._use_gpu).detach()
+ if normalized:
+ normalized_actions = self._normalize_actions(actions)
+ return normalized_actions.clone() if clone else normalized_actions
+ return actions.clone() if clone else actions
+
+ def get_actual_actions(self, clone:bool=False, normalized: bool = False):
+ if self._env_opts["use_action_smoothing"]:
+ actions = self._actual_actions.get_torch_mirror(gpu=self._use_gpu).detach()
+ else: # actual action coincides with the one from the agent + possible modif.
+ actions = self.get_actions(clone=False, normalized=False)
+ if normalized:
+ normalized_actions = self._normalize_actions(actions)
+ return normalized_actions.clone() if clone else normalized_actions
+ return actions.clone() if clone else actions
+
+ def _normalize_actions(self, actions: torch.Tensor):
+ scale = torch.where(self._actions_scale == 0.0,
+ torch.ones_like(self._actions_scale),
+ self._actions_scale)
+ normalized = (actions - self._actions_offset)/scale
+ zero_scale_mask = torch.eq(self._actions_scale, 0.0).squeeze(0)
+ if torch.any(zero_scale_mask):
+ normalized[:, zero_scale_mask] = 0.0
+ return normalized
+
+ def get_rewards(self, clone:bool=False):
+ if clone:
+ return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+ else:
+ return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach()
+
+ def get_terminations(self, clone:bool=False):
+ if clone:
+ return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+ else:
+ return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach()
+
+ def get_truncations(self, clone:bool=False):
+ if clone:
+ return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+ else:
+ return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach()
+
+ def obs_dim(self):
+
+ return self._obs_dim
+
+ def actions_dim(self):
+
+ return self._actions_dim
+
+ def ep_rewards_metrics(self):
+
+ return self._episodic_rewards_metrics
+
+ def using_gpu(self):
+
+ return self._use_gpu
+
+ def name(self):
+
+ return self._env_name
+
+ def n_envs(self):
+
+ return self._n_envs
+
+ def dtype(self):
+
+ return self._dtype
+
+ def obs_names(self):
+ return self._get_obs_names()
+
+ def action_names(self):
+ return self._get_action_names()
+
+ def sub_rew_names(self):
+ return self._get_rewards_names()
+
+ def sub_term_names(self):
+ return self._get_sub_term_names()
+
+ def sub_trunc_names(self):
+ return self._get_sub_trunc_names()
+
+ def _get_obs_names(self):
+ # to be overridden by child class
+ return None
+
+ def get_robot_jnt_names(self):
+ return self._robot_state.jnt_names()
+
+ def _get_action_names(self):
+ # to be overridden by child class
+ return None
+
+ def _get_rewards_names(self):
+ # to be overridden by child class
+ return None
+
+ def _get_sub_term_names(self):
+ # to be overridden by child class
+ sub_term_names = []
+ sub_term_names.append("rhc_failure")
+ sub_term_names.append("robot_capsize")
+ sub_term_names.append("rhc_capsize")
+
+ return sub_term_names
+
+ def _get_sub_trunc_names(self):
+ # to be overridden by child class
+ sub_trunc_names = []
+ sub_trunc_names.append("ep_timeout")
+
+ return sub_trunc_names
+
+ def _get_custom_db_data(self, episode_finished):
+ # to be overridden by child class
+ pass
+
+ def set_observed_joints(self):
+ # ny default observe all joints available
+ return self._robot_state.jnt_names()
+
+ def _set_jnts_blacklist_pattern(self):
+ self._jnt_q_blacklist_patterns=[]
+
+ def get_observed_joints(self):
+ return self._observed_jnt_names
+
+ def _init_obs(self, obs_dim: int):
+
+ device = "cuda" if self._use_gpu else "cpu"
+
+ obs_threshold_default = 1e3
+ self._obs_threshold_lb = -obs_threshold_default # used for clipping observations
+ self._obs_threshold_ub = obs_threshold_default
+
+ self._obs_ub = torch.full((1, obs_dim), dtype=self._dtype, device=device,
+ fill_value=1.0)
+ self._obs_lb = torch.full((1, obs_dim), dtype=self._dtype, device=device,
+ fill_value=-1.0)
+ self._obs_scale = (self._obs_ub - self._obs_lb)/2.0
+ self._obs_offset = (self._obs_ub + self._obs_lb)/2.0
+
+ if not self._obs_dim==len(self._get_obs_names()):
+ error=f"obs dim {self._obs_dim} does not match obs names length {len(self._get_obs_names())}!!"
+ Journal.log(self.__class__.__name__,
+ "_init_obs",
+ error,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ self._obs = Observations(namespace=self._namespace,
+ n_envs=self._n_envs,
+ obs_dim=self._obs_dim,
+ obs_names=self._get_obs_names(),
+ env_names=None,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=0.0)
+
+ self._next_obs = NextObservations(namespace=self._namespace,
+ n_envs=self._n_envs,
+ obs_dim=self._obs_dim,
+ obs_names=self._get_obs_names(),
+ env_names=None,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=0.0)
+
+ self._obs.run()
+ self._next_obs.run()
+
+ self._is_substep_obs = torch.zeros((self.obs_dim(),), dtype=torch.bool, device=device)
+ self._is_substep_obs.fill_(False) # default to all step obs
+
+ # not super memory efficient
+ self._substep_obs=torch.full_like(self._obs.get_torch_mirror(gpu=self._use_gpu), fill_value=0.0)
+
+ def _init_actions(self, actions_dim: int):
+
+ device = "cuda" if self._use_gpu else "cpu"
+ # action scalings to be applied to agent's output
+ self._actions_ub = torch.full((1, actions_dim), dtype=self._dtype, device=device,
+ fill_value=1.0)
+ self._actions_lb = torch.full((1, actions_dim), dtype=self._dtype, device=device,
+ fill_value=-1.0)
+ self._actions_scale = (self._actions_ub - self._actions_lb)/2.0
+ self._actions_offset = (self._actions_ub + self._actions_lb)/2.0
+
+ if not self._actions_dim==len(self._get_action_names()):
+ error=f"action dim {self._actions_dim} does not match action names length {len(self._get_action_names())}!!"
+ Journal.log(self.__class__.__name__,
+ "_init_actions",
+ error,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ self._actions = Actions(namespace=self._namespace,
+ n_envs=self._n_envs,
+ action_dim=self._actions_dim,
+ action_names=self._get_action_names(),
+ env_names=None,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=0.0)
+
+ self._actions.run()
+
+ self.default_action = torch.full_like(input=self.get_actions(),fill_value=0.0)
+ self.safe_action = torch.full_like(input=self.get_actions(),fill_value=0.0)
+
+ if self._env_opts["use_action_history"]:
+ self._act_mem_buffer=MemBuffer(name="ActionMemBuf",
+ data_tensor=self._actions.get_torch_mirror(),
+ data_names=self._get_action_names(),
+ debug=self._debug,
+ horizon=self._env_opts["actions_history_size"],
+ dtype=self._dtype,
+ use_gpu=self._use_gpu)
+
+ # default to all continuous actions (changes the way noise is added)
+ self._is_continuous_actions=torch.full((actions_dim, ),
+ dtype=torch.bool, device=device,
+ fill_value=True)
+
+ def _init_action_smoothing(self):
+
+ continuous_actions=self.get_actions()[:, self._is_continuous_actions]
+ discrete_actions=self.get_actions()[:, ~self._is_continuous_actions]
+ self._action_smoother_continuous=ExponentialSignalSmoother(signal=continuous_actions,
+ update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent
+ smoothing_horizon=self._env_opts["smoothing_horizon_c"],
+ target_smoothing=0.5,
+ debug=self._debug,
+ dtype=self._dtype,
+ use_gpu=self._use_gpu,
+ name="ActionSmootherContinuous")
+ self._action_smoother_discrete=ExponentialSignalSmoother(signal=discrete_actions,
+ update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent
+ smoothing_horizon=self._env_opts["smoothing_horizon_d"],
+ target_smoothing=0.5,
+ debug=self._debug,
+ dtype=self._dtype,
+ use_gpu=self._use_gpu,
+ name="ActionSmootherDiscrete")
+
+ # we also need somewhere to keep the actual actions after smoothing
+ self._actual_actions = Actions(namespace=self._namespace+"_actual",
+ n_envs=self._n_envs,
+ action_dim=self._actions_dim,
+ action_names=self._get_action_names(),
+ env_names=None,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=0.0)
+ self._actual_actions.run()
+
+ def _init_rewards(self):
+
+ reward_thresh_default = 1.0
+ n_sub_rewards = len(self._get_rewards_names())
+ device = "cuda" if self._use_gpu else "cpu"
+ self._reward_thresh_lb = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=-reward_thresh_default, device=device) # used for clipping rewards
+ self._reward_thresh_ub = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=reward_thresh_default, device=device)
+
+ self._sub_rewards = SubRewards(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_rewards=n_sub_rewards,
+ reward_names=self._get_rewards_names(),
+ env_names=None,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=0.0)
+
+ self._tot_rewards = TotRewards(namespace=self._namespace,
+ n_envs=self._n_envs,
+ reward_names=["total_reward"],
+ env_names=None,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=0.0)
+
+ self._sub_rewards.run()
+ self._tot_rewards.run()
+
+ self._substep_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+ # used to hold substep rewards (not super mem. efficient)
+ self._is_substep_rew = torch.zeros((self._substep_rewards.shape[1],),dtype=torch.bool,device=device)
+ self._is_substep_rew.fill_(True) # default to all substep rewards
+
+ self._episodic_rewards_metrics = EpisodicRewards(reward_tensor=self._sub_rewards.get_torch_mirror(),
+ reward_names=self._get_rewards_names(),
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ self._episodic_rewards_metrics.set_constant_data_scaling(scaling=self._get_reward_scaling())
+
+ def _get_reward_scaling(self):
+ # to be overridden by child (default to no scaling)
+ return 1
+
+ def _max_ep_length(self):
+ #.should be overriden by child
+ return self._env_opts["episode_timeout_ub"]
+
+ def _init_custom_db_data(self):
+
+ self.custom_db_data = {}
+ # by default always log this contact data
+ rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror()
+ contact_names = self._rhc_refs.rob_refs.contact_names()
+ stepping_data = EpisodicData("RhcRefsFlag", rhc_latest_contact_ref, contact_names,
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ self._add_custom_db_data(db_data=stepping_data)
+
+ # log also action data
+ actions = self._actions.get_torch_mirror()
+ action_names = self._get_action_names()
+ action_data = EpisodicData("Actions", actions, action_names,
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ self._add_custom_db_data(db_data=action_data)
+
+ # and observations
+ observations = self._obs.get_torch_mirror()
+ observations_names = self._get_obs_names()
+ obs_data = EpisodicData("Obs", observations, observations_names,
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ self._add_custom_db_data(db_data=obs_data)
+
+ # log sub-term and sub-truncations data
+ t_scaling=1 # 1 so that we log an interpretable data in terms of why the episode finished
+ data_scaling = torch.full((self._n_envs, 1),
+ fill_value=t_scaling,
+ dtype=torch.int32,device="cpu")
+ sub_term = self._sub_terminations.get_torch_mirror()
+ term = self._terminations.get_torch_mirror()
+ sub_termination_names = self.sub_term_names()
+
+ sub_term_data = EpisodicData("SubTerminations", sub_term, sub_termination_names,
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ sub_term_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+ self._add_custom_db_data(db_data=sub_term_data)
+ term_data = EpisodicData("Terminations", term, ["terminations"],
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ term_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+ self._add_custom_db_data(db_data=term_data)
+
+ sub_trunc = self._sub_truncations.get_torch_mirror()
+ trunc = self._truncations.get_torch_mirror()
+ sub_truncations_names = self.sub_trunc_names()
+ sub_trunc_data = EpisodicData("SubTruncations", sub_trunc, sub_truncations_names,
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ sub_trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+ self._add_custom_db_data(db_data=sub_trunc_data)
+ trunc_data = EpisodicData("Truncations", trunc, ["truncations"],
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+ self._add_custom_db_data(db_data=trunc_data)
+
+ def _add_custom_db_data(self, db_data: EpisodicData):
+ self.custom_db_data[db_data.name()] = db_data
+
+ def _init_terminations(self):
+
+ # Boolean array indicating whether each environment episode has terminated after
+ # the current step. An episode termination could occur based on predefined conditions
+ # in the environment, such as reaching a goal or exceeding a time limit.
+
+ self._terminations = Terminations(namespace=self._namespace,
+ n_envs=self._n_envs,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=False)
+ self._terminations.run()
+
+ sub_t_names = self.sub_term_names()
+ self._sub_terminations = SubTerminations(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_term=len(sub_t_names),
+ term_names=sub_t_names,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=False)
+ self._sub_terminations.run()
+
+ device = "cuda" if self._use_gpu else "cpu"
+ self._is_capsized=torch.zeros((self._n_envs,1),
+ dtype=torch.bool, device=device)
+ self._is_rhc_capsized=torch.zeros((self._n_envs,1),
+ dtype=torch.bool, device=device)
+ self._max_pitch_angle=60.0*math.pi/180.0
+
+ def _init_truncations(self):
+
+ self._truncations = Truncations(namespace=self._namespace,
+ n_envs=self._n_envs,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=False)
+
+ self._truncations.run()
+
+ sub_trc_names = self.sub_trunc_names()
+ self._sub_truncations = SubTruncations(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_trunc=len(sub_trc_names),
+ truc_names=sub_trc_names,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ fill_value=False)
+ self._sub_truncations.run()
+
+ def _update_jnt_blacklist(self):
+ device = "cuda" if self._use_gpu else "cpu"
+ all_available_jnts=self.get_observed_joints()
+ blacklist=[]
+ for i in range(len(all_available_jnts)):
+ for pattern in self._jnt_q_blacklist_patterns:
+ if pattern in all_available_jnts[i]:
+ # stop at first pattern match
+ blacklist.append(i)
+ break
+ if not len(blacklist)==0:
+ self._jnt_q_blacklist_idxs=torch.tensor(blacklist, dtype=torch.int, device=device)
+
+ def _attach_to_shared_mem(self):
+
+ # runs shared mem clients for getting observation and setting RHC commands
+
+ # remote stepping data
+ self._remote_stepper = RemoteStepperSrvr(namespace=self._namespace,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ force_reconnection=True)
+ self._remote_stepper.run()
+ self._remote_resetter = RemoteResetSrvr(namespace=self._namespace,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ force_reconnection=True)
+ self._remote_resetter.run()
+ self._remote_reset_req = RemoteResetRequest(namespace=self._namespace,
+ is_server=False,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True)
+ self._remote_reset_req.run()
+
+ self._jnts_remapping=None
+ self._jnt_q_blacklist_idxs=None
+
+ self._robot_state = RobotState(namespace=self._namespace,
+ is_server=False,
+ safe=self._safe_shared_mem,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ with_gpu_mirror=self._use_gpu,
+ with_torch_view=True,
+ enable_height_sensor=self._env_opts["add_heightmap_obs"])
+
+ self._rhc_cmds = RhcCmds(namespace=self._namespace,
+ is_server=False,
+ safe=self._safe_shared_mem,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ with_gpu_mirror=self._use_gpu,
+ with_torch_view=True)
+
+ self._rhc_pred = RhcPred(namespace=self._namespace,
+ is_server=False,
+ safe=self._safe_shared_mem,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ with_gpu_mirror=self._use_gpu,
+ with_torch_view=True)
+
+ self._rhc_refs = RhcRefs(namespace=self._namespace,
+ is_server=False,
+ safe=self._safe_shared_mem,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ with_gpu_mirror=self._use_gpu,
+ with_torch_view=True)
+
+ self._rhc_status = RhcStatus(namespace=self._namespace,
+ is_server=False,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ with_gpu_mirror=self._use_gpu,
+ with_torch_view=True)
+
+ self._robot_state.run()
+ self._n_envs = self._robot_state.n_robots()
+ self._n_jnts = self._robot_state.n_jnts()
+ self._n_contacts = self._robot_state.n_contacts() # we assume same n contacts for all rhcs for now
+
+ self._rhc_cmds.run()
+ self._rhc_pred.run()
+ self._rhc_refs.run()
+ self._rhc_status.run()
+ # we read rhc info now and just this time, since it's assumed to be static
+ self._check_controllers_registered(retry=True) # blocking
+ # (we need controllers to be connected to read meaningful data)
+
+ self._rhc_status.rhc_static_info.synch_all(read=True,retry=True)
+ if self._use_gpu:
+ self._rhc_status.rhc_static_info.synch_mirror(from_gpu=False,non_blocking=False)
+ rhc_horizons=self._rhc_status.rhc_static_info.get("horizons",gpu=self._use_gpu)
+ rhc_nnodes=self._rhc_status.rhc_static_info.get("nnodes",gpu=self._use_gpu)
+ rhc_dts=self._rhc_status.rhc_static_info.get("dts",gpu=self._use_gpu)
+
+ # height sensor metadata (client side)
+ if self._env_opts["add_heightmap_obs"]:
+ self._height_grid_size = self._robot_state.height_sensor.grid_size
+ self._height_flat_dim = self._robot_state.height_sensor.n_cols
+ rhc_ncontacts=self._rhc_status.rhc_static_info.get("ncontacts",gpu=self._use_gpu)
+ robot_mass=self._rhc_status.rhc_static_info.get("robot_mass",gpu=self._use_gpu)
+ pred_node_idxs_rhc=self._rhc_status.rhc_static_info.get("pred_node_idx",gpu=self._use_gpu)
+
+ self._n_nodes_rhc=torch.round(rhc_nnodes) # we assume nodes are static during an env lifetime
+ self._rhc_horizons=rhc_horizons
+ self._rhc_dts=rhc_dts
+ self._n_contacts_rhc=rhc_ncontacts
+ self._rhc_robot_masses=robot_mass
+ if (self._rhc_robot_masses == 0).any():
+ zero_indices = torch.nonzero(self._rhc_robot_masses == 0, as_tuple=True)
+ print(zero_indices) # This will print the indices of zero elements
+ Journal.log(self.__class__.__name__,
+ "_attach_to_shared_mem",
+ "Found at least one robot with 0 mass from RHC static info!!",
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+ self._rhc_robot_weight=robot_mass*9.81
+ self._pred_node_idxs_rhc=pred_node_idxs_rhc
+ self._pred_horizon_rhc=self._pred_node_idxs_rhc*self._rhc_dts
+
+ # run server for agent commands
+ self._agent_refs = AgentRefs(namespace=self._namespace,
+ is_server=True,
+ n_robots=self._n_envs,
+ n_jnts=self._robot_state.n_jnts(),
+ n_contacts=self._robot_state.n_contacts(),
+ contact_names=self._robot_state.contact_names(),
+ q_remapping=None,
+ with_gpu_mirror=self._use_gpu,
+ force_reconnection=True,
+ safe=False,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ fill_value=0)
+ self._agent_refs.run()
+ q_init_agent_refs=torch.full_like(self._robot_state.root_state.get(data_type="q", gpu=self._use_gpu),fill_value=0.0)
+ q_init_agent_refs[:, 0]=1.0
+ self._agent_refs.rob_refs.root_state.set(data_type="q", data=q_init_agent_refs,
+ gpu=self._use_gpu)
+ if self._use_gpu:
+ self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=True)
+ self._agent_refs.rob_refs.root_state.synch_all(read=False, retry=True)
+ # episode steps counters (for detecting episode truncations for
+ # time limits)
+ self._ep_timeout_counter = EpisodesCounter(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_steps_lb=self._env_opts["episode_timeout_lb"],
+ n_steps_ub=self._env_opts["episode_timeout_ub"],
+ randomize_offsets_at_startup=True, # this has to be randomized
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ debug=self._debug) # handles step counter through episodes and through envs
+ self._ep_timeout_counter.run()
+ self._task_rand_counter = TaskRandCounter(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_steps_lb=self._env_opts["n_steps_task_rand_lb"],
+ n_steps_ub=self._env_opts["n_steps_task_rand_ub"],
+ randomize_offsets_at_startup=False, # not necessary since it will be synched with the timeout counter
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ debug=self._debug) # handles step counter through episodes and through envs
+ self._task_rand_counter.run()
+ self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter)
+ if self._env_opts["use_random_trunc"]:
+ self._rand_trunc_counter=RandomTruncCounter(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_steps_lb=self._env_opts["random_trunc_freq"]-self._env_opts["random_trunc_freq_delta"],
+ n_steps_ub=self._env_opts["random_trunc_freq"],
+ randomize_offsets_at_startup=True,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ debug=False)
+ self._rand_trunc_counter.run()
+ # self._rand_trunc_counter.sync_counters(other_counter=self._ep_timeout_counter)
+ if self._env_opts["use_random_safety_reset"]:
+ self._rand_safety_reset_counter=SafetyRandResetsCounter(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_steps_lb=self._env_opts["random_reset_freq"],
+ n_steps_ub=self._env_opts["random_reset_freq"],
+ randomize_offsets_at_startup=True,
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ debug=False)
+ self._rand_safety_reset_counter.run()
+ # self._rand_safety_reset_counter.sync_counters(other_counter=self._ep_timeout_counter)
+
+ # timer to track abs time in each env (reset logic to be implemented in child)
+ self._substep_abs_counter = SubStepAbsCounter(namespace=self._namespace,
+ n_envs=self._n_envs,
+ n_steps_lb=1e9,
+ n_steps_ub=1e9,
+ randomize_offsets_at_startup=True, # randomizing startup offsets
+ is_server=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ safe=True,
+ force_reconnection=True,
+ with_gpu_mirror=self._use_gpu,
+ debug=self._debug)
+ self._substep_abs_counter.run()
+
+ # debug data servers
+ traing_env_param_dict = {}
+ traing_env_param_dict["use_gpu"] = self._use_gpu
+ traing_env_param_dict["debug"] = self._is_debug
+ traing_env_param_dict["n_preinit_steps"] = self._env_opts["n_preinit_steps"]
+ traing_env_param_dict["n_preinit_steps"] = self._n_envs
+
+ self._training_sim_info = SharedTrainingEnvInfo(namespace=self._namespace,
+ is_server=True,
+ training_env_params_dict=traing_env_param_dict,
+ safe=False,
+ force_reconnection=True,
+ verbose=self._verbose,
+ vlevel=self._vlevel)
+ self._training_sim_info.run()
+
+ self._observed_jnt_names=self.set_observed_joints()
+ self._set_jnts_blacklist_pattern()
+ self._update_jnt_blacklist()
+
+ self._prev_root_p_substep=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone()
+ self._prev_root_q_substep=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone()
+ self._prev_root_p_step=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone()
+ self._prev_root_q_step=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone()
+
+ def _activate_rhc_controllers(self):
+ self._rhc_status.activation_state.get_torch_mirror()[:, :] = True
+ self._rhc_status.activation_state.synch_all(read=False, retry=True) # activates all controllers
+
+ def _synch_state(self,
+ gpu: bool = True):
+
+ # read from shared memory on CPU
+ # robot state
+ self._robot_state.root_state.synch_all(read = True, retry = True)
+ self._robot_state.jnts_state.synch_all(read = True, retry = True)
+ # rhc cmds
+ self._rhc_cmds.root_state.synch_all(read = True, retry = True)
+ self._rhc_cmds.jnts_state.synch_all(read = True, retry = True)
+ self._rhc_cmds.contact_wrenches.synch_all(read = True, retry = True)
+ # rhc pred
+ self._rhc_pred.root_state.synch_all(read = True, retry = True)
+ # self._rhc_pred.jnts_state.synch_all(read = True, retry = True)
+ # self._rhc_pred.contact_wrenches.synch_all(read = True, retry = True)
+ # refs for root link and contacts
+ self._rhc_refs.rob_refs.root_state.synch_all(read = True, retry = True)
+ self._rhc_refs.contact_flags.synch_all(read = True, retry = True)
+ self._rhc_refs.flight_info.synch_all(read = True, retry = True)
+ self._rhc_refs.flight_settings_req.synch_all(read = True, retry = True)
+ self._rhc_refs.rob_refs.contact_pos.synch_all(read = True, retry = True)
+ # rhc cost
+ self._rhc_status.rhc_cost.synch_all(read = True, retry = True)
+ # rhc constr. violations
+ self._rhc_status.rhc_constr_viol.synch_all(read = True, retry = True)
+ # failure states
+ self._rhc_status.fails.synch_all(read = True, retry = True)
+ # tot cost and cnstr viol on nodes + step variable
+ self._rhc_status.rhc_nodes_cost.synch_all(read = True, retry = True)
+ self._rhc_status.rhc_nodes_constr_viol.synch_all(read = True, retry = True)
+ self._rhc_status.rhc_fcn.synch_all(read = True, retry = True)
+ self._rhc_status.rhc_fail_idx.synch_all(read = True, retry = True)
+ if self._env_opts["add_heightmap_obs"]:
+ self._robot_state.height_sensor.synch_all(read=True, retry=True)
+ if gpu:
+ # copies data to "mirror" on GPU --> we can do it non-blocking since
+ # in this direction it should be safe
+ self._robot_state.root_state.synch_mirror(from_gpu=False,non_blocking=True) # copies shared data on GPU
+ self._robot_state.jnts_state.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_cmds.root_state.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_cmds.jnts_state.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_cmds.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_pred.root_state.synch_mirror(from_gpu=False,non_blocking=True)
+ # self._rhc_pred.jnts_state.synch_mirror(from_gpu=False,non_blocking=True)
+ # self._rhc_pred.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_refs.contact_flags.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_refs.flight_info.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_refs.flight_settings_req.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_status.rhc_cost.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_status.rhc_constr_viol.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_status.fails.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_status.rhc_nodes_cost.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_status.rhc_nodes_constr_viol.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_status.rhc_fcn.synch_mirror(from_gpu=False,non_blocking=True)
+ self._rhc_status.rhc_fail_idx.synch_mirror(from_gpu=False,non_blocking=True)
+ if self._env_opts["add_heightmap_obs"]:
+ self._robot_state.height_sensor.synch_mirror(from_gpu=False, non_blocking=True)
+ torch.cuda.synchronize() # ensuring that all the streams on the GPU are completed \
+ # before the CPU continues execution
+
+ def _override_refs(self,
+ env_indxs: torch.Tensor = None):
+
+ # just used for setting agent refs externally (i.e. from shared mem on CPU)
+ self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem
+ if self._use_gpu:
+ # copies latest refs to GPU
+ self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False)
+
+ def _clamp_obs(self,
+ obs: torch.Tensor):
+ if self._is_debug:
+ self._check_finite(obs, "observations", False)
+ torch.nan_to_num(input=obs, out=obs, nan=self._obs_threshold_ub,
+ posinf=self._obs_threshold_ub,
+ neginf=self._obs_threshold_lb) # prevent nans
+
+ obs.clamp_(self._obs_threshold_lb, self._obs_threshold_ub)
+
+ def _clamp_rewards(self,
+ rewards: torch.Tensor):
+ if self._is_debug:
+ self._check_finite(rewards, "rewards", False)
+ torch.nan_to_num(input=rewards, out=rewards, nan=0.0,
+ posinf=None,
+ neginf=None) # prevent nans
+ rewards.clamp_(self._reward_thresh_lb, self._reward_thresh_ub)
+
+ def get_actions_lb(self):
+ return self._actions_lb
+
+ def get_actions_ub(self):
+ return self._actions_ub
+
+ def get_actions_scale(self):
+ return self._actions_scale
+
+ def get_actions_offset(self):
+ return self._actions_offset
+
+ def get_obs_lb(self):
+ return self._obs_lb
+
+ def get_obs_ub(self):
+ return self._obs_ub
+
+ def get_obs_scale(self):
+ self._obs_scale = (self._obs_ub - self._obs_lb)/2.0
+ return self._obs_scale
+
+ def get_obs_offset(self):
+ self._obs_offset = (self._obs_ub + self._obs_lb)/2.0
+ return self._obs_offset
+
+ def switch_random_reset(self, on: bool = True):
+ self._random_reset_active=on
+
+ def set_jnts_remapping(self,
+ remapping: List = None):
+
+ self._jnts_remapping=remapping
+ if self._jnts_remapping is not None:
+ self._robot_state.set_jnts_remapping(jnts_remapping=self._jnts_remapping)
+ self._rhc_cmds.set_jnts_remapping(jnts_remapping=self._jnts_remapping)
+ self._rhc_pred.set_jnts_remapping(jnts_remapping=self._jnts_remapping)
+ # we need to also update the list of observed joints to match
+ available_joints=self._robot_state.jnt_names()
+ # the remapping ordering
+ self._observed_jnt_names=[]
+ for i in range(len(available_joints)):
+ self._observed_jnt_names.append(available_joints[self._jnts_remapping[i]])
+
+ self._update_jnt_blacklist()
+
+ updated_obs_names=self._get_obs_names() # get updated obs names (should use get_observed_joints
+ # internally, so that jnt names are updated)
+
+ # also update jnt obs names on shared memory
+ names_old=self._obs.get_obs_names()
+ names_old_next=self._next_obs.get_obs_names()
+ names_old[:]=updated_obs_names
+ names_old_next[:]=updated_obs_names
+ self._obs.update_names()
+ self._next_obs.update_names()
+
+ # also update
+ if "Obs" in self.custom_db_data:
+ db_obs_names=self.custom_db_data["Obs"].data_names()
+ db_obs_names[:]=updated_obs_names
+
+ def _check_finite(self,
+ tensor: torch.Tensor,
+ name: str,
+ throw: bool = False):
+ if not torch.isfinite(tensor).all().item():
+ exception = f"Found nonfinite elements in {name} tensor!!"
+ non_finite_idxs=torch.nonzero(~torch.isfinite(tensor))
+ n_nonf_elems=non_finite_idxs.shape[0]
+
+ if name=="observations":
+ for i in range(n_nonf_elems):
+ db_msg=f"{self.obs_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \
+ f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}"
+ print(db_msg)
+ if name=="rewards":
+ for i in range(n_nonf_elems):
+ db_msg=f"{self.sub_rew_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \
+ f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}"
+ print(db_msg)
+ print(tensor)
+ Journal.log(self.__class__.__name__,
+ "_check_finite",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = throw)
+
+ def _check_controllers_registered(self,
+ retry: bool = False):
+
+ if retry:
+ self._rhc_status.controllers_counter.synch_all(read=True, retry=True)
+ n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item()
+ while not (n_connected_controllers == self._n_envs):
+ warn = f"Expected {self._n_envs} controllers to be connected during training, " + \
+ f"but got {n_connected_controllers}. Will wait for all to be connected..."
+ Journal.log(self.__class__.__name__,
+ "_check_controllers_registered",
+ warn,
+ LogType.WARN,
+ throw_when_excep = False)
+ nsecs = int(2 * 1000000000)
+ PerfSleep.thread_sleep(nsecs)
+ self._rhc_status.controllers_counter.synch_all(read=True, retry=True)
+ n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item()
+ info = f"All {n_connected_controllers} controllers connected!"
+ Journal.log(self.__class__.__name__,
+ "_check_controllers_registered",
+ info,
+ LogType.INFO,
+ throw_when_excep = False)
+ return True
+ else:
+ self._rhc_status.controllers_counter.synch_all(read=True, retry=True)
+ n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item()
+ if not (n_connected_controllers == self._n_envs):
+ exception = f"Expected {self._n_envs} controllers to be connected during training, " + \
+ f"but got {n_connected_controllers}. Aborting..."
+ Journal.log(self.__class__.__name__,
+ "_check_controllers_registered",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = False)
+ return False
+ return True
+
+ def _check_truncations(self):
+
+ self._check_sub_truncations()
+ sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu)
+ truncations = self._truncations.get_torch_mirror(gpu=self._use_gpu)
+ truncations[:, :] = torch.any(sub_truncations,dim=1,keepdim=True)
+
+ def _check_terminations(self):
+
+ self._check_sub_terminations()
+ sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu)
+ terminations = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+ terminations[:, :] = torch.any(sub_terminations,dim=1,keepdim=True)
+
+ def _check_sub_truncations(self):
+ # default behaviour-> to be overriden by child
+ sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu)
+ sub_truncations[:, 0:1]=self._ep_timeout_counter.time_limits_reached()
+
+ def _check_sub_terminations(self):
+ # default behaviour-> to be overriden by child
+ sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu)
+ robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+ robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu)
+
+ # terminate when either the real robot or the prediction from the MPC are capsized
+ check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle,
+ output_t=self._is_capsized)
+ check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle,
+ output_t=self._is_rhc_capsized)
+
+ sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu)
+ sub_terminations[:, 1:2] = self._is_capsized
+ sub_terminations[:, 2:3] = self._is_rhc_capsized
+
+ def is_action_continuous(self):
+ return self._is_continuous_actions
+
+ def is_action_discrete(self):
+ return ~self._is_continuous_actions
+
+ @abstractmethod
+ def _pre_substep(self):
+ pass
+
+ @abstractmethod
+ def _custom_post_step(self,episode_finished):
+ pass
+
+ @abstractmethod
+ def _custom_post_substp_post_rew(self):
+ pass
+
+ @abstractmethod
+ def _custom_post_substp_pre_rew(self):
+ pass
+
+ @abstractmethod
+ def _apply_actions_to_rhc(self):
+ pass
+
+ def _override_actions_with_demo(self):
+ pass
+
+ @abstractmethod
+ def _compute_substep_rewards(self):
+ pass
+
+ @abstractmethod
+ def _set_substep_rew(self):
+ pass
+
+ @abstractmethod
+ def _set_substep_obs(self):
+ pass
+
+ @abstractmethod
+ def _compute_step_rewards(self):
+ pass
+
+ @abstractmethod
+ def _fill_substep_obs(self,
+ obs: torch.Tensor):
+ pass
+
+ @abstractmethod
+ def _fill_step_obs(self,
+ obs: torch.Tensor):
+ pass
+
+ @abstractmethod
+ def _randomize_task_refs(self,
+ env_indxs: torch.Tensor = None):
+ pass
+
+ def _custom_post_init(self):
+ pass
+
+ def _get_avrg_substep_root_twist(self,
+ out: torch.Tensor,
+ base_loc: bool = True):
+ # to be called at each substep
+ robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+ robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+ root_v_avrg_w=(robot_p_meas-self._prev_root_p_substep)/self._substep_dt
+ root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_substep,robot_q_meas),\
+ dt=self._substep_dt)
+ twist_w=torch.cat((root_v_avrg_w,
+ root_omega_avrg_w),
+ dim=1)
+ if not base_loc:
+ self._prev_root_p_substep[:, :]=robot_p_meas
+ self._prev_root_q_substep[:, :]=robot_q_meas
+ out[:, :]=twist_w
+ # rotate using the current (end-of-substep) orientation for consistency with other signals
+ world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out)
+ self._prev_root_p_substep[:, :]=robot_p_meas
+ self._prev_root_q_substep[:, :]=robot_q_meas
+
+ def _get_avrg_step_root_twist(self,
+ out: torch.Tensor,
+ base_loc: bool = True):
+ # to be called after substeps of actions repeats
+ robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+ robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+ dt=self._substep_dt*self._action_repeat # accounting for frame skipping
+ root_v_avrg_w=(robot_p_meas-self._prev_root_p_step)/(dt)
+ root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_step,robot_q_meas),\
+ dt=dt)
+ twist_w=torch.cat((root_v_avrg_w,
+ root_omega_avrg_w),
+ dim=1)
+ if not base_loc:
+ out[:, :]=twist_w
+ # rotate using the current (end-of-step) orientation for consistency with other signals
+ world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out)
+
+ def _get_avrg_rhc_root_twist(self,
+ out: torch.Tensor,
+ base_loc: bool = True):
+
+ rhc_root_p =self._rhc_cmds.root_state.get(data_type="p",gpu=self._use_gpu)
+ rhc_root_q =self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu)
+ rhc_root_p_pred =self._rhc_pred.root_state.get(data_type="p",gpu=self._use_gpu)
+ rhc_root_q_pred =self._rhc_pred.root_state.get(data_type="q",gpu=self._use_gpu)
+
+ rhc_root_v_avrg_rhc_w=(rhc_root_p_pred-rhc_root_p)/self._pred_horizon_rhc
+ rhc_root_omega_avrg_rhc_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(rhc_root_q,rhc_root_q_pred),\
+ dt=self._pred_horizon_rhc)
+
+ rhc_pred_avrg_twist_rhc_w = torch.cat((rhc_root_v_avrg_rhc_w,
+ rhc_root_omega_avrg_rhc_w),
+ dim=1)
+ if not base_loc:
+ out[:, :]=rhc_pred_avrg_twist_rhc_w
+ # to rhc base frame (using first node as reference)
+ world2base_frame(t_w=rhc_pred_avrg_twist_rhc_w, q_b=rhc_root_q, t_out=out)
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/twist_tracking_env.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/twist_tracking_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..adeb8da8fdb11895a2c306052abc80f3b9126c39
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/twist_tracking_env.py
@@ -0,0 +1,1395 @@
+from typing import Dict
+
+import os
+
+import torch
+
+from EigenIPC.PyEigenIPC import VLevel, LogType, Journal
+
+from mpc_hive.utilities.shared_data.rhc_data import RobotState, RhcStatus, RhcRefs
+from mpc_hive.utilities.math_utils_torch import world2base_frame, base2world_frame, w2hor_frame
+
+from aug_mpc.utils.sys_utils import PathsGetter
+from aug_mpc.utils.timers import PeriodicTimer
+from aug_mpc.utils.episodic_data import EpisodicData
+from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother
+from aug_mpc.utils.math_utils import check_capsize
+from aug_mpc.training_envs.training_env_base import AugMPCTrainingEnvBase
+
+class TwistTrackingEnv(AugMPCTrainingEnvBase):
+ """Base AugMPC training env that tracks commanded twists by pushing velocity and contact targets into the RHC controller while handling locomotion rewards/resets."""
+
+ def __init__(self,
+ namespace: str,
+ actions_dim: int = 10,
+ verbose: bool = False,
+ vlevel: VLevel = VLevel.V1,
+ use_gpu: bool = True,
+ dtype: torch.dtype = torch.float32,
+ debug: bool = True,
+ override_agent_refs: bool = False,
+ timeout_ms: int = 60000,
+ env_opts: Dict = {}):
+
+ env_name = "TwistTrackingEnv"
+
+ self._add_env_opt(env_opts, "srew_drescaling",
+ False)
+
+ self._add_env_opt(env_opts, "step_thresh", 0.) # when step action < thresh, a step is requested
+
+ # counters settings
+ self._add_env_opt(env_opts, "single_task_ref_per_episode",
+ True # if True, the task ref is constant over the episode (ie
+ # episodes are truncated when task is changed)
+ )
+ self._add_env_opt(env_opts, "add_angvel_ref_rand", default=True) # randomize also agular vel ref (just z component)
+
+ self._add_env_opt(env_opts, "episode_timeout_lb",
+ 1024)
+ self._add_env_opt(env_opts, "episode_timeout_ub",
+ 1024)
+ self._add_env_opt(env_opts, "n_steps_task_rand_lb",
+ 512)
+ self._add_env_opt(env_opts, "n_steps_task_rand_ub",
+ 512)
+ self._add_env_opt(env_opts, "use_random_safety_reset",
+ True)
+ self._add_env_opt(env_opts, "random_reset_freq",
+ 10) # a random reset once every n-episodes (per env)
+ self._add_env_opt(env_opts, "use_random_trunc",
+ True)
+ self._add_env_opt(env_opts, "random_trunc_freq",
+ env_opts["episode_timeout_ub"]*5) # to remove temporal correlations between envs
+ self._add_env_opt(env_opts, "random_trunc_freq_delta",
+ env_opts["episode_timeout_ub"]*2) # to randomize trunc frequency between envs
+
+ if not env_opts["single_task_ref_per_episode"]:
+ env_opts["random_reset_freq"]=int(env_opts["random_reset_freq"]/\
+ (env_opts["episode_timeout_lb"]/float(env_opts["n_steps_task_rand_lb"])))
+
+ self._add_env_opt(env_opts, "action_repeat", 1) # frame skipping (different agent action every action_repeat
+ # env substeps)
+
+ self._add_env_opt(env_opts, "n_preinit_steps", 1) # n steps of the controllers to properly initialize everything
+
+ self._add_env_opt(env_opts, "vec_ep_freq_metrics_db", 1) # n eps over which debug metrics are reported
+ self._add_env_opt(env_opts, "demo_envs_perc", 0.0)
+ self._add_env_opt(env_opts, "max_cmd_v", 1.0) # maximum cmd v for lin v actions (single component)
+ self._add_env_opt(env_opts, "max_cmd_omega", 0.5) # maximum cmd v for omega v actions (single component)
+
+ # action smoothing
+ self._add_env_opt(env_opts, "use_action_smoothing", False)
+ self._add_env_opt(env_opts, "smoothing_horizon_c", 0.01)
+ self._add_env_opt(env_opts, "smoothing_horizon_d", 0.03)
+
+ # whether to smooth vel error signal
+ self._add_env_opt(env_opts, "use_track_reward_smoother", False)
+ self._add_env_opt(env_opts, "smoothing_horizon_vel_err", 0.08)
+ self._add_env_opt(env_opts, "track_rew_smoother", None)
+
+ # rewards
+ self._reward_map={}
+ self._reward_lb_map={}
+
+ self._add_env_opt(env_opts, "reward_lb_default", -0.5)
+ self._add_env_opt(env_opts, "reward_ub_default", 1e6)
+
+ self._add_env_opt(env_opts, "task_error_reward_lb", -0.5)
+ self._add_env_opt(env_opts, "CoT_reward_lb", -0.5)
+ self._add_env_opt(env_opts, "power_reward_lb", -0.5)
+ self._add_env_opt(env_opts, "action_rate_reward_lb", -0.5)
+ self._add_env_opt(env_opts, "jnt_vel_reward_lb", -0.5)
+ self._add_env_opt(env_opts, "rhc_avrg_vel_reward_lb", -0.5)
+
+ self._add_env_opt(env_opts, "add_power_reward", False)
+ self._add_env_opt(env_opts, "add_CoT_reward", True)
+ self._add_env_opt(env_opts, "use_CoT_wrt_ref", False)
+ self._add_env_opt(env_opts, "add_action_rate_reward", True)
+ self._add_env_opt(env_opts, "add_jnt_v_reward", False)
+
+ self._add_env_opt(env_opts, "use_rhc_avrg_vel_tracking", False)
+
+ # task tracking
+ self._add_env_opt(env_opts, "use_relative_error", default=False) # use relative vel error (wrt current task norm)
+ self._add_env_opt(env_opts, "directional_tracking", default=True) # whether to compute tracking error based on reference direction
+ # if env_opts["add_angvel_ref_rand"]:
+ # env_opts["directional_tracking"]=False
+
+ self._add_env_opt(env_opts, "use_L1_norm", default=True) # whether to use L1 norm for the error (otherwise L2)
+ self._add_env_opt(env_opts, "use_exp_track_rew", default=True) # whether to use a reward of the form A*e^(B*x),
+ # otherwise A*(1-B*x)
+
+ self._add_env_opt(env_opts, "use_fail_idx_weight", default=False)
+ self._add_env_opt(env_opts, "task_track_offset_exp", default=1.0)
+ self._add_env_opt(env_opts, "task_track_scale_exp", default=5.0)
+ self._add_env_opt(env_opts, "task_track_offset", default=1.0)
+ self._add_env_opt(env_opts, "task_track_scale", default=1.5)
+ self._add_env_opt(env_opts, "task_track_front_weight", default=1.0)
+ self._add_env_opt(env_opts, "task_track_lat_weight", default=0.05)
+ self._add_env_opt(env_opts, "task_track_vert_weight", default=0.05)
+ self._add_env_opt(env_opts, "task_track_omega_z_weight", default=1.0)
+ self._add_env_opt(env_opts, "task_track_omega_x_weight", default=0.05)
+ self._add_env_opt(env_opts, "task_track_omega_y_weight", default=0.05)
+ # if env_opts["add_angvel_ref_rand"]:
+ # env_opts["task_track_omega_x_weight"]=0.0
+ # env_opts["task_track_omega_y_weight"]=0.0
+ # env_opts["task_track_omega_z_weight"]=1.0
+
+ # task pred tracking
+ self._add_env_opt(env_opts, "task_pred_track_offset", default=1.0)
+ self._add_env_opt(env_opts, "task_pred_track_scale", default=3.0)
+
+ # energy penalties
+ self._add_env_opt(env_opts, "CoT_offset", default=0.1)
+ self._add_env_opt(env_opts, "CoT_scale", default=0.1)
+ self._add_env_opt(env_opts, "power_offset", default=0.1)
+ self._add_env_opt(env_opts, "power_scale", default=8e-4)
+
+ # action rate penalty
+ self._add_env_opt(env_opts, "action_rate_offset", default=0.1)
+ self._add_env_opt(env_opts, "action_rate_scale", default=2.0)
+ self._add_env_opt(env_opts, "action_rate_rew_d_weight", default=0.1)
+ self._add_env_opt(env_opts, "action_rate_rew_c_weight", default=1.0)
+
+ # jnt vel penalty
+ self._add_env_opt(env_opts, "jnt_vel_offset", default=0.1)
+ self._add_env_opt(env_opts, "jnt_vel_scale", default=2.0)
+
+ # terminations
+ self._add_env_opt(env_opts, "add_term_mpc_capsize", default=False) # add termination based on mpc capsizing prediction
+
+ # observations
+ self._add_env_opt(env_opts, "rhc_fail_idx_scale", default=1.0)
+ self._add_env_opt(env_opts, "use_action_history", default=True) # whether to add information on past actions to obs
+ self._add_env_opt(env_opts, "add_prev_actions_stats_to_obs", default=False) # add actions std, mean + last action over a horizon to obs (if self._use_action_history True)
+ self._add_env_opt(env_opts, "actions_history_size", default=3)
+
+ self._add_env_opt(env_opts, "add_mpc_contact_f_to_obs", default=True) # add estimate vertical contact f to obs
+ self._add_env_opt(env_opts, "add_fail_idx_to_obs", default=True) # we need to obserse mpc failure idx to correlate it with terminations
+
+ self._add_env_opt(env_opts, "use_linvel_from_rhc", default=True) # no lin vel meas available, we use est. from mpc
+ self._add_env_opt(env_opts, "add_flight_info", default=True) # add feedback info on pos, remamining duration, length,
+ # apex and landing height of flight phases
+ self._add_env_opt(env_opts, "add_flight_settings", default=False) # add feedback info on current flight requests for mpc
+
+ self._add_env_opt(env_opts, "use_prob_based_stepping", default=False) # interpret actions as stepping prob (never worked)
+
+ self._add_env_opt(env_opts, "add_rhc_cmds_to_obs", default=True) # add the rhc cmds which are being applied now to the robot
+
+ if not "add_periodic_clock_to_obs" in env_opts:
+ # add a sin/cos clock to obs (useful if task is explicitly
+ # time-dependent)
+ self._add_env_opt(env_opts, "add_periodic_clock_to_obs", default=False)
+
+ self._add_env_opt(env_opts, "add_heightmap_obs", default=False)
+
+ # temporarily creating robot state client to get some data
+ robot_state_tmp = RobotState(namespace=namespace,
+ is_server=False,
+ safe=False,
+ verbose=verbose,
+ vlevel=vlevel,
+ with_gpu_mirror=False,
+ with_torch_view=False,
+ enable_height_sensor=env_opts["add_heightmap_obs"])
+ robot_state_tmp.run()
+ rhc_status_tmp = RhcStatus(is_server=False,
+ namespace=namespace,
+ verbose=verbose,
+ vlevel=vlevel,
+ with_torch_view=False,
+ with_gpu_mirror=False)
+ rhc_status_tmp.run()
+ rhc_refs_tmp = RhcRefs(namespace=namespace,
+ is_server=False,
+ safe=False,
+ verbose=verbose,
+ vlevel=vlevel,
+ with_gpu_mirror=False,
+ with_torch_view=False)
+ rhc_refs_tmp.run()
+ n_jnts = robot_state_tmp.n_jnts()
+ self._contact_names = robot_state_tmp.contact_names()
+ self._n_contacts = len(self._contact_names)
+ self._flight_info_size=rhc_refs_tmp.flight_info.n_cols
+ self._flight_setting_size=rhc_refs_tmp.flight_settings_req.n_cols
+ # height sensor metadata (if present)
+ self._height_grid_size = None
+ self._height_flat_dim = 0
+ if env_opts["add_heightmap_obs"]:
+ self._height_grid_size = robot_state_tmp.height_sensor.grid_size
+ self._height_flat_dim = robot_state_tmp.height_sensor.n_cols
+
+ robot_state_tmp.close()
+ rhc_status_tmp.close()
+ rhc_refs_tmp.close()
+
+ # defining obs dimension
+ obs_dim=3 # normalized gravity vector in base frame
+ obs_dim+=6 # meas twist in base frame
+ obs_dim+=2*n_jnts # joint pos + vel
+ if env_opts["add_mpc_contact_f_to_obs"]:
+ obs_dim+=3*self._n_contacts
+ obs_dim+=6 # twist reference in base frame frame
+ if env_opts["add_fail_idx_to_obs"]:
+ obs_dim+=1 # rhc controller failure index
+ if env_opts["add_term_mpc_capsize"]:
+ obs_dim+=3 # gravity vec from mpc
+ if env_opts["use_rhc_avrg_vel_tracking"]:
+ obs_dim+=6 # mpc avrg twist
+ if env_opts["add_flight_info"]: # contact pos, remaining duration, length, apex, landing height, landing dx, dy
+ obs_dim+=self._flight_info_size
+ if env_opts["add_flight_settings"]:
+ obs_dim+=self._flight_setting_size
+ if env_opts["add_rhc_cmds_to_obs"]:
+ obs_dim+=3*n_jnts
+ if env_opts["use_action_history"]:
+ if env_opts["add_prev_actions_stats_to_obs"]:
+ obs_dim+=3*actions_dim # previous agent actions statistics (mean, std + last action)
+ else: # full action history
+ obs_dim+=env_opts["actions_history_size"]*actions_dim
+ if env_opts["use_action_smoothing"]:
+ obs_dim+=actions_dim # it's better to also add the smoothed actions as obs
+ if env_opts["add_periodic_clock_to_obs"]:
+ obs_dim+=2
+ if env_opts["add_heightmap_obs"]:
+ obs_dim+=self._height_flat_dim
+ # Agent task reference
+ self._add_env_opt(env_opts, "use_pof0", default=True) # with some prob, references will be null
+ self._add_env_opt(env_opts, "pof0_linvel", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr)
+ self._add_env_opt(env_opts, "pof0_omega", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr)
+ self._add_env_opt(env_opts, "max_linvel_ref", default=0.3) # m/s
+ self._add_env_opt(env_opts, "max_angvel_ref", default=0.0) # rad/s
+ if env_opts["add_angvel_ref_rand"]:
+ env_opts["max_angvel_ref"]=0.4
+
+ # ready to init base class
+ self._this_child_path = os.path.abspath(__file__)
+ AugMPCTrainingEnvBase.__init__(self,
+ namespace=namespace,
+ obs_dim=obs_dim,
+ actions_dim=actions_dim,
+ env_name=env_name,
+ verbose=verbose,
+ vlevel=vlevel,
+ use_gpu=use_gpu,
+ dtype=dtype,
+ debug=debug,
+ override_agent_refs=override_agent_refs,
+ timeout_ms=timeout_ms,
+ env_opts=env_opts)
+
+ def _custom_post_init(self):
+
+ device = "cuda" if self._use_gpu else "cpu"
+
+ self._update_jnt_blacklist() # update blacklist for joints
+
+ # constant base-frame unit vectors (reuse to avoid per-call allocations)
+ self._base_x_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device)
+ self._base_x_dir[:, 0] = 1.0
+ self._base_y_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device)
+ self._base_y_dir[:, 1] = 1.0
+
+ self._twist_ref_lb = torch.full((1, 6), dtype=self._dtype, device=device,
+ fill_value=-1.5)
+ self._twist_ref_ub = torch.full((1, 6), dtype=self._dtype, device=device,
+ fill_value=1.5)
+
+ # task reference parameters (world frame)
+ # lin vel
+ self._twist_ref_lb[0, 0] = -self._env_opts["max_linvel_ref"]
+ self._twist_ref_lb[0, 1] = -self._env_opts["max_linvel_ref"]
+ self._twist_ref_lb[0, 2] = 0.0
+ self._twist_ref_ub[0, 0] = self._env_opts["max_linvel_ref"]
+ self._twist_ref_ub[0, 1] = self._env_opts["max_linvel_ref"]
+ self._twist_ref_ub[0, 2] = 0.0
+ # angular vel
+ self._twist_ref_lb[0, 3] = 0.0
+ self._twist_ref_lb[0, 4] = 0.0
+ self._twist_ref_lb[0, 5] = -self._env_opts["max_angvel_ref"]
+ self._twist_ref_ub[0, 3] = 0.0
+ self._twist_ref_ub[0, 4] = 0.0
+ self._twist_ref_ub[0, 5] = self._env_opts["max_angvel_ref"]
+
+ self._twist_ref_offset = (self._twist_ref_ub + self._twist_ref_lb)/2.0
+ self._twist_ref_scale = (self._twist_ref_ub - self._twist_ref_lb)/2.0
+
+ # adding some custom db info
+ agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=False)
+ agent_twist_ref_data = EpisodicData("AgentTwistRefs", agent_twist_ref,
+ ["v_x", "v_y", "v_z", "omega_x", "omega_y", "omega_z"],
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+ rhc_fail_idx = EpisodicData("RhcFailIdx", self._rhc_fail_idx(gpu=False), ["rhc_fail_idx"],
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+
+ f_names=[]
+ for contact in self._contact_names:
+ f_names.append(f"fc_{contact}_x_base_loc")
+ f_names.append(f"fc_{contact}_y_base_loc")
+ f_names.append(f"fc_{contact}_z_base_loc")
+ rhc_contact_f = EpisodicData("RhcContactForces",
+ self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False),
+ f_names,
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+
+ self._pow_db_data=torch.full(size=(self._n_envs,2),
+ dtype=self._dtype, device="cpu",
+ fill_value=-1.0)
+ power_db = EpisodicData("Power",
+ self._pow_db_data,
+ ["CoT", "W"],
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+
+ self._track_error_db=torch.full_like(agent_twist_ref, fill_value=0.0)
+ task_err_db = EpisodicData("TrackingError",
+ agent_twist_ref,
+ ["e_vx", "e_vy", "e_vz", "e_omegax", "e_omegay", "e_omegaz"],
+ ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+ store_transitions=self._full_db,
+ max_ep_duration=self._max_ep_length())
+
+ self._add_custom_db_data(db_data=agent_twist_ref_data)
+ self._add_custom_db_data(db_data=rhc_fail_idx)
+ self._add_custom_db_data(db_data=rhc_contact_f)
+ self._add_custom_db_data(db_data=power_db)
+ self._add_custom_db_data(db_data=task_err_db)
+
+ # rewards
+ self._task_err_weights = torch.full((1, 6), dtype=self._dtype, device=device,
+ fill_value=0.0)
+ if self._env_opts["directional_tracking"]:
+ self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] # frontal
+ self._task_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] # lateral
+ self._task_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] # vertical
+ self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+ self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+ self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+ else:
+ self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"]
+ self._task_err_weights[0, 1] = self._env_opts["task_track_front_weight"]
+ self._task_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"]
+ self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+ self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+ self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+
+ self._task_pred_err_weights = torch.full((1, 6), dtype=self._dtype, device=device,
+ fill_value=0.0)
+ if self._env_opts["directional_tracking"]:
+ self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"]
+ self._task_pred_err_weights[0, 1] = self._env_opts["task_track_lat_weight"]
+ self._task_pred_err_weights[0, 2] = self._env_opts["task_track_vert_weight"]
+ self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+ self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+ self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+ else:
+ self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"]
+ self._task_pred_err_weights[0, 1] = self._env_opts["task_track_front_weight"]
+ self._task_pred_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"]
+ self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+ self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+ self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+
+ self._power_penalty_weights = torch.full((1, self._n_jnts), dtype=self._dtype, device=device,
+ fill_value=1.0)
+ self._power_penalty_weights_sum = torch.sum(self._power_penalty_weights).item()
+ subr_names=self._get_rewards_names() # initializes
+
+ # reward clipping
+ self._reward_thresh_lb[:, :] = self._env_opts["reward_lb_default"]
+ self._reward_thresh_ub[:, :]= self._env_opts["reward_ub_default"]
+
+ for reward_name, env_opt_key in self._reward_lb_map.items():
+ if reward_name in self._reward_map:
+ self._reward_thresh_lb[:, self._reward_map[reward_name]] = self._env_opts[env_opt_key]
+
+ # obs bounds
+ self._obs_threshold_lb = -1e3 # used for clipping observations
+ self._obs_threshold_ub = 1e3
+
+ # actions
+ if not self._env_opts["use_prob_based_stepping"]:
+ self._is_continuous_actions[6:10]=False
+
+ v_cmd_max = self._env_opts["max_cmd_v"]
+ omega_cmd_max = self._env_opts["max_cmd_omega"]
+ self._actions_lb[:, 0:3] = -v_cmd_max
+ self._actions_ub[:, 0:3] = v_cmd_max
+ self._actions_lb[:, 3:6] = -omega_cmd_max # twist cmds
+ self._actions_ub[:, 3:6] = omega_cmd_max
+ if "contact_flag_start" in self._actions_map:
+ idx=self._actions_map["contact_flag_start"]
+ if self._env_opts["use_prob_based_stepping"]:
+ self._actions_lb[:, idx:idx+self._n_contacts] = 0.0 # contact flags
+ self._actions_ub[:, idx:idx+self._n_contacts] = 1.0
+ else:
+ self._actions_lb[:, idx:idx+self._n_contacts] = -1.0
+ self._actions_ub[:, idx:idx+self._n_contacts] = 1.0
+
+ self.default_action[:, :] = (self._actions_ub+self._actions_lb)/2.0
+ # self.default_action[:, ~self._is_continuous_actions] = 1.0
+ self.safe_action[:, :] = self.default_action
+ if "contact_flag_start" in self._actions_map: # safe actions for contacts is 1 (keep contact)
+ idx=self._actions_map["contact_flag_start"]
+ self.safe_action[:, idx:idx+self._n_contacts] = 1.0
+
+ # assign obs bounds (useful if not using automatic obs normalization)
+ obs_names=self._get_obs_names()
+ obs_patterns=["gn",
+ "linvel",
+ "omega",
+ "q_jnt",
+ "v_jnt",
+ "fc",
+ "rhc_fail",
+ "rhc_cmd_q",
+ "rhc_cmd_v",
+ "rhc_cmd_eff",
+ "flight_pos"
+ ]
+ obs_ubs=[1.0,
+ 5*v_cmd_max,
+ 5*omega_cmd_max,
+ 2*torch.pi,
+ 30.0,
+ 2.0,
+ 1.0,
+ 2*torch.pi,
+ 30.0,
+ 200.0,
+ self._n_nodes_rhc.mean().item()]
+ obs_lbs=[-1.0,
+ -5*v_cmd_max,
+ -5*omega_cmd_max,
+ -2*torch.pi,
+ -30.0,
+ -2.0,
+ 0.0,
+ -2*torch.pi,
+ -30.0,
+ -200.0,
+ 0.0]
+ obs_bounds = {name: (lb, ub) for name, lb, ub in zip(obs_patterns, obs_lbs, obs_ubs)}
+
+ for i in range(len(obs_names)):
+ obs_name=obs_names[i]
+ for pattern in obs_patterns:
+ if pattern in obs_name:
+ lb=obs_bounds[pattern][0]
+ ub=obs_bounds[pattern][1]
+ self._obs_lb[:, i]=lb
+ self._obs_ub[:, i]=ub
+ break
+
+ # handle action memory buffer in obs
+ if self._env_opts["use_action_history"]: # just history stats
+ if self._env_opts["add_prev_actions_stats_to_obs"]:
+ i=0
+ prev_actions_idx = next((i for i, s in enumerate(obs_names) if "_prev_act" in s), None)
+ prev_actions_mean_idx=next((i for i, s in enumerate(obs_names) if "_avrg_act" in s), None)
+ prev_actions_std_idx=next((i for i, s in enumerate(obs_names) if "_std_act" in s), None)
+
+ # assume actions are always normalized in [-1, 1] by agent
+ if prev_actions_idx is not None:
+ self._obs_lb[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=-1.0
+ self._obs_ub[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=1.0
+ if prev_actions_mean_idx is not None:
+ self._obs_lb[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=-1.0
+ self._obs_ub[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=1.0
+ if prev_actions_std_idx is not None:
+ self._obs_lb[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=0
+ self._obs_ub[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=1.0
+
+ else: # full history
+ i=0
+ first_action_mem_buffer_idx = next((i for i, s in enumerate(obs_names) if "_m1_act" in s), None)
+ if first_action_mem_buffer_idx is not None:
+ action_idx_start_idx_counter=first_action_mem_buffer_idx
+ for j in range(self._env_opts["actions_history_size"]):
+ self._obs_lb[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=-1.0
+ self._obs_ub[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=1.0
+ action_idx_start_idx_counter+=self.actions_dim()
+
+ # some aux data to avoid allocations at training runtime
+ self._rhc_twist_cmd_rhc_world=self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu).detach().clone()
+ self._rhc_twist_cmd_rhc_h=self._rhc_twist_cmd_rhc_world.detach().clone()
+ self._agent_twist_ref_current_w=self._rhc_twist_cmd_rhc_world.detach().clone()
+ self._agent_twist_ref_current_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+ self._substep_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+ self._step_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+ self._root_twist_avrg_rhc_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+ self._root_twist_avrg_rhc_base_loc_next=self._rhc_twist_cmd_rhc_world.detach().clone()
+
+ self._random_thresh_contacts=torch.rand((self._n_envs,self._n_contacts), device=device)
+ # aux data
+ self._task_err_scaling = torch.zeros((self._n_envs, 1),dtype=self._dtype,device=device)
+
+ self._pof1_b_linvel= torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_linvel"])
+ self._pof1_b_omega = torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_omega"])
+ self._bernoulli_coeffs_linvel = self._pof1_b_linvel.clone()
+ self._bernoulli_coeffs_linvel[:, :] = 1.0
+ self._bernoulli_coeffs_omega = self._pof1_b_omega.clone()
+ self._bernoulli_coeffs_omega[:, :] = 1.0
+
+ # smoothing
+ self._track_rew_smoother=None
+ if self._env_opts["use_track_reward_smoother"]:
+ sub_reward_proxy=self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)[:, 0:1]
+ smoothing_dt=self._substep_dt
+ if not self._is_substep_rew[self._reward_map["task_error"]]: # assuming first reward is tracking
+ smoothing_dt=self._substep_dt*self._action_repeat
+ self._track_rew_smoother=ExponentialSignalSmoother(
+ name=self.__class__.__name__+"VelErrorSmoother",
+ signal=sub_reward_proxy, # same dimension of vel error
+ update_dt=smoothing_dt,
+ smoothing_horizon=self._env_opts["smoothing_horizon_vel_err"],
+ target_smoothing=0.5,
+ debug=self._is_debug,
+ dtype=self._dtype,
+ use_gpu=self._use_gpu)
+
+ # if we need the action rate, we also need the action history
+ if self._env_opts["add_action_rate_reward"]:
+ if not self._env_opts["use_action_history"]:
+ Journal.log(self.__class__.__name__,
+ "_custom_post_init",
+ "add_action_rate_reward is True, but ",
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+ history_size=self._env_opts["actions_history_size"]
+ if history_size < 2:
+ Journal.log(self.__class__.__name__,
+ "_custom_post_init",
+ f"add_action_rate_reward requires actions history ({history_size}) to be >=2!",
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+ # add periodic timer if required
+ self._periodic_clock=None
+ if self._env_opts["add_periodic_clock_to_obs"]:
+ self._add_env_opt(self._env_opts, "clock_period",
+ default=int(1.5*self._action_repeat*self.task_rand_timeout_bounds()[1])) # correcting with n substeps
+ # (we are using the _substep_abs_counter counter)
+ self._periodic_clock=PeriodicTimer(counter=self._substep_abs_counter,
+ period=self._env_opts["clock_period"],
+ dtype=self._dtype,
+ device=self._device)
+
+ def get_file_paths(self):
+ paths=AugMPCTrainingEnvBase.get_file_paths(self)
+ paths.append(self._this_child_path)
+ return paths
+
+ def get_aux_dir(self):
+ aux_dirs = []
+ path_getter = PathsGetter()
+ aux_dirs.append(path_getter.RHCDIR)
+ return aux_dirs
+
+ def _get_reward_scaling(self):
+ if self._env_opts["single_task_ref_per_episode"]:
+ return self._env_opts["n_steps_task_rand_ub"]
+ else:
+ return self._env_opts["episode_timeout_ub"]
+
+ def _max_ep_length(self):
+ if self._env_opts["single_task_ref_per_episode"]:
+ return self._env_opts["n_steps_task_rand_ub"]
+ else:
+ return self._env_opts["episode_timeout_ub"]
+
+ def _check_sub_truncations(self):
+ # overrides parent
+ sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu)
+ sub_truncations[:, 0:1] = self._ep_timeout_counter.time_limits_reached()
+ if self._env_opts["single_task_ref_per_episode"]:
+ sub_truncations[:, 1:2] = self._task_rand_counter.time_limits_reached()
+
+ def _check_sub_terminations(self):
+ # default behaviour-> to be overriden by child
+ sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu)
+
+ # terminate if mpc just failed
+ sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu)
+
+ # check if robot is capsizing
+ robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+ check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle,
+ output_t=self._is_capsized)
+ sub_terminations[:, 1:2] = self._is_capsized
+
+ if self._env_opts["add_term_mpc_capsize"]:
+ # check if robot is about to capsize accordin to MPC
+ robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu)
+ check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle,
+ output_t=self._is_rhc_capsized)
+ sub_terminations[:, 2:3] = self._is_rhc_capsized
+
+ def _custom_reset(self):
+ return None
+
+ def reset(self):
+ AugMPCTrainingEnvBase.reset(self)
+
+ def _pre_substep(self):
+ pass
+
+ def _custom_post_step(self,episode_finished):
+ # executed after checking truncations and terminations and remote env reset
+ if self._use_gpu:
+ time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached().cuda(),episode_finished)
+ self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten())
+ else:
+ time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached(),episode_finished)
+ self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten())
+ # task refs are randomized in world frame -> we rotate them in base local
+ # (not super efficient, we should do it just for the finished envs)
+ self._update_loc_twist_refs()
+
+ if self._track_rew_smoother is not None: # reset smoother
+ self._track_rew_smoother.reset_all(to_be_reset=episode_finished.flatten(),
+ value=0.0)
+
+ def _custom_post_substp_pre_rew(self):
+ self._update_loc_twist_refs()
+
+ def _custom_post_substp_post_rew(self):
+ pass
+
+ def _update_loc_twist_refs(self):
+ # get fresh robot orientation
+ if not self._override_agent_refs:
+ robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+ # rotate agent ref from world to robot base
+ world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q,
+ t_out=self._agent_twist_ref_current_base_loc)
+ # write it to agent refs tensors
+ self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc,
+ gpu=self._use_gpu)
+
+ def _apply_actions_to_rhc(self):
+
+ self._set_rhc_refs()
+
+ self._write_rhc_refs()
+
+ def _set_rhc_refs(self):
+
+ action_to_be_applied = self.get_actual_actions() # see _get_action_names() to get
+ # the meaning of each component of this tensor
+
+ rhc_latest_twist_cmd = self._rhc_refs.rob_refs.root_state.get(data_type="twist", gpu=self._use_gpu)
+ rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror(gpu=self._use_gpu)
+ rhc_latest_pos_ref = self._rhc_refs.rob_refs.contact_pos.get(data_type="p_z", gpu=self._use_gpu)
+ rhc_q=self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) # this is always
+ # avaialble
+
+ # reference twist for MPC is assumed to always be specified in MPC's
+ # horizontal frame, while agent actions are interpreted as in MPC's
+ # base frame -> we need to rotate the actions into the horizontal frame
+ base2world_frame(t_b=action_to_be_applied[:, 0:6],q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_world)
+ w2hor_frame(t_w=self._rhc_twist_cmd_rhc_world,q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_h)
+
+ rhc_latest_twist_cmd[:, 0:6] = self._rhc_twist_cmd_rhc_h
+
+ # self._rhc_refs.rob_refs.root_state.set(data_type="p", data=rhc_latest_p_ref,
+ # gpu=self._use_gpu)
+ self._rhc_refs.rob_refs.root_state.set(data_type="twist", data=rhc_latest_twist_cmd,
+ gpu=self._use_gpu)
+
+ # contact flags
+ idx=self._actions_map["contact_flag_start"]
+ if self._env_opts["use_prob_based_stepping"]:
+ # encode actions as probs
+ self._random_thresh_contacts.uniform_() # random values in-place between 0 and 1
+ rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] >= self._random_thresh_contacts # keep contact with
+ # probability action_to_be_applied[:, 6:10]
+ else: # just use a threshold
+ rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] > self._env_opts["step_thresh"]
+ # actually apply actions to controller
+
+ def _write_rhc_refs(self):
+
+ if self._use_gpu:
+ # GPU->CPU --> we cannot use asynchronous data transfer since it's unsafe
+ self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) # write from gpu to cpu mirror
+ self._rhc_refs.contact_flags.synch_mirror(from_gpu=True,non_blocking=False)
+ self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=True,non_blocking=False)
+
+ self._rhc_refs.rob_refs.root_state.synch_all(read=False, retry=True) # write mirror to shared mem
+ self._rhc_refs.contact_flags.synch_all(read=False, retry=True)
+ self._rhc_refs.rob_refs.contact_pos.synch_all(read=False, retry=True)
+
+ def _override_refs(self,
+ env_indxs: torch.Tensor = None):
+
+ # runs at every post_step
+ self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem
+ if self._use_gpu:
+ # copies latest refs to GPU
+ self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False)
+
+ agent_linvel_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="v",
+ gpu=self._use_gpu)
+
+ agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega",
+ gpu=self._use_gpu)
+
+ # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \
+ # agent_p_ref_current[:, 0:2]
+ self._agent_twist_ref_current_w[:, 0:3]=agent_linvel_ref_current # set linvel target
+
+ self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem
+
+ def _fill_substep_obs(self,
+ obs: torch.Tensor):
+
+ # measured stuff
+ robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu)
+ robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+
+ if self._env_opts["use_linvel_from_rhc"]:
+ # twist estimate from mpc
+ robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu)
+ obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_rhc_base_loc_next[:, 0:3]
+ else:
+ obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_meas_base_loc[:, 0:3]
+ obs[:, self._obs_map["omega_meas"]:(self._obs_map["omega_meas"]+3)] = robot_twist_meas_base_loc[:, 3:6]
+
+ obs[:, self._obs_map["v_jnt"]:(self._obs_map["v_jnt"]+self._n_jnts)] = robot_jnt_v_meas
+
+ def _fill_step_obs(self,
+ obs: torch.Tensor):
+
+ # measured stuff
+ robot_gravity_norm_base_loc = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu)
+ robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu)
+ robot_jnt_q_meas = self._robot_state.jnts_state.get(data_type="q",gpu=self._use_gpu)
+ if self._jnt_q_blacklist_idxs is not None: # we don't want to read joint pos from blacklist
+ robot_jnt_q_meas[:, self._jnt_q_blacklist_idxs]=0.0
+ robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+
+ # twist estimate from mpc
+ robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu)
+ # cmds for jnt imp to be applied next
+ robot_jnt_q_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="q",gpu=self._use_gpu)
+ robot_jnt_v_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="v",gpu=self._use_gpu)
+ robot_jnt_eff_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="eff",gpu=self._use_gpu)
+
+ flight_info_now = self._rhc_refs.flight_info.get(data_type="all",gpu=self._use_gpu)
+ flight_settings_now = self._rhc_refs.flight_settings_req.get(data_type="all",gpu=self._use_gpu)
+
+ # refs
+ agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu)
+
+ obs[:, self._obs_map["gn_base"]:(self._obs_map["gn_base"]+3)] = robot_gravity_norm_base_loc # norm. gravity vector in base frame
+
+ obs[:, self._obs_map["q_jnt"]:(self._obs_map["q_jnt"]+self._n_jnts)] = robot_jnt_q_meas # meas jnt pos
+ obs[:, self._obs_map["twist_ref"]:(self._obs_map["twist_ref"]+6)] = agent_twist_ref # high lev agent refs to be tracked
+
+ if self._env_opts["add_mpc_contact_f_to_obs"]:
+ n_forces=3*len(self._contact_names)
+ obs[:, self._obs_map["contact_f_mpc"]:(self._obs_map["contact_f_mpc"]+n_forces)] = self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=self._use_gpu)
+ if self._env_opts["add_fail_idx_to_obs"]:
+ obs[:, self._obs_map["rhc_fail_idx"]:(self._obs_map["rhc_fail_idx"]+1)] = self._rhc_fail_idx(gpu=self._use_gpu)
+ if self._env_opts["add_term_mpc_capsize"]:
+ obs[:, self._obs_map["gn_base_mpc"]:(self._obs_map["gn_base_mpc"]+3)] = self._rhc_cmds.root_state.get(data_type="gn",gpu=self._use_gpu)
+ if self._env_opts["use_rhc_avrg_vel_tracking"]:
+ self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc, base_loc=True)
+ obs[:, self._obs_map["avrg_twist_mpc"]:(self._obs_map["avrg_twist_mpc"]+6)] = self._root_twist_avrg_rhc_base_loc
+ if self._env_opts["add_flight_info"]:
+ obs[:, self._obs_map["flight_info"]:(self._obs_map["flight_info"]+self._flight_info_size)] = flight_info_now
+ if self._env_opts["add_flight_settings"]:
+ obs[:, self._obs_map["flight_settings_req"]:(self._obs_map["flight_settings_req"]+self._flight_setting_size)] = \
+ flight_settings_now
+
+ if self._env_opts["add_rhc_cmds_to_obs"]:
+ obs[:, self._obs_map["rhc_cmds_q"]:(self._obs_map["rhc_cmds_q"]+self._n_jnts)] = robot_jnt_q_rhc_applied_next
+ obs[:, self._obs_map["rhc_cmds_v"]:(self._obs_map["rhc_cmds_v"]+self._n_jnts)] = robot_jnt_v_rhc_applied_next
+ obs[:, self._obs_map["rhc_cmds_eff"]:(self._obs_map["rhc_cmds_eff"]+self._n_jnts)] = robot_jnt_eff_rhc_applied_next
+ if self._env_opts["use_action_history"]:
+ if self._env_opts["add_prev_actions_stats_to_obs"]: # just add last, std and mean to obs
+ obs[:, self._obs_map["action_history_prev"]:(self._obs_map["action_history_prev"]+self.actions_dim())]=self._act_mem_buffer.get(idx=0)
+ obs[:, self._obs_map["action_history_avrg"]:(self._obs_map["action_history_avrg"]+self.actions_dim())]=self._act_mem_buffer.mean(clone=False)
+ obs[:, self._obs_map["action_history_std"]:(self._obs_map["action_history_std"]+self.actions_dim())]=self._act_mem_buffer.std(clone=False)
+ else: # add whole memory buffer to obs
+ next_idx=self._obs_map["action_history"]
+ for i in range(self._env_opts["actions_history_size"]):
+ obs[:, next_idx:(next_idx+self.actions_dim())]=self._act_mem_buffer.get(idx=i) # get all (n_envs x (obs_dim x horizon))
+ next_idx+=self.actions_dim()
+
+ if self._env_opts["use_action_smoothing"]: # adding smoothed actions
+ obs[:, self._obs_map["action_smoothing"]:(self._obs_map["action_smoothing"]+self.actions_dim())]=self.get_actual_actions(normalized=True)
+ next_idx+=self.actions_dim()
+
+ if self._env_opts["add_periodic_clock_to_obs"]:
+ obs[:, next_idx:(next_idx+2)]=self._periodic_clock.get()
+ next_idx+=2
+ if self._env_opts["add_heightmap_obs"]:
+ hm = self._robot_state.height_sensor.get(gpu=self._use_gpu)
+ obs[:, self._obs_map["heightmap"]:(self._obs_map["heightmap"]+self._height_flat_dim)] = hm
+
+ def _get_custom_db_data(self,
+ episode_finished,
+ ignore_ep_end):
+ episode_finished = episode_finished.cpu()
+ self.custom_db_data["AgentTwistRefs"].update(
+ new_data=self._agent_refs.rob_refs.root_state.get(data_type="twist", gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+ self.custom_db_data["RhcFailIdx"].update(new_data=self._rhc_fail_idx(gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+ self.custom_db_data["RhcContactForces"].update(
+ new_data=self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False),
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+ self.custom_db_data["Power"].update(
+ new_data=self._pow_db_data,
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+ self.custom_db_data["TrackingError"].update(
+ new_data=self._track_error_db,
+ ep_finished=episode_finished,
+ ignore_ep_end=ignore_ep_end)
+
+ # reward functions
+ def _action_rate(self):
+ continuous_actions=self._is_continuous_actions
+ discrete_actions=~self._is_continuous_actions
+ n_c_actions=continuous_actions.sum().item()
+ n_d_actions=discrete_actions.sum().item()
+ actions_prev=self._act_mem_buffer.get(idx=1)
+ actions_now=self._act_mem_buffer.get(idx=0)
+ actions_rate=(actions_now-actions_prev) # actions already normalized
+ actions_rate_c=actions_rate[:, continuous_actions]
+ actions_rate_d=actions_rate[:, discrete_actions]
+
+ actions_rate_sqrd=None # assuming n_c_actions > 0 always
+ actions_rate_sqrd=self._env_opts["action_rate_rew_c_weight"]*torch.sum(actions_rate_c*actions_rate_c, dim=1, keepdim=True)/n_c_actions
+ if discrete_actions.any():
+ actions_rate_sqrd+=self._env_opts["action_rate_rew_d_weight"]*torch.sum(actions_rate_d*actions_rate_d, dim=1, keepdim=True)/n_d_actions
+ return actions_rate_sqrd
+
+ def _mech_pow(self, jnts_vel, jnts_effort, autoscaled: bool = False, drained: bool = True):
+ mech_pow_jnts=(jnts_effort*jnts_vel)*self._power_penalty_weights
+ if drained:
+ mech_pow_jnts.clamp_(0.0,torch.inf) # do not account for regenerative power
+ mech_pow_tot = torch.sum(mech_pow_jnts, dim=1, keepdim=True)
+ self._pow_db_data[:, 1:2]=mech_pow_tot.cpu()
+ if autoscaled:
+ mech_pow_tot=mech_pow_tot/self._power_penalty_weights_sum
+ return mech_pow_tot
+
+ def _cost_of_transport(self, jnts_vel, jnts_effort, v_norm, mass_weight: bool = False):
+ drained_mech_pow=self._mech_pow(jnts_vel=jnts_vel,
+ jnts_effort=jnts_effort,
+ drained=True)
+ CoT=drained_mech_pow/(v_norm+1e-2)
+ if mass_weight:
+ robot_weight=self._rhc_robot_weight
+ CoT=CoT/robot_weight
+
+ # add to db metrics
+ self._pow_db_data[:, 0:1]=CoT.cpu()
+ self._pow_db_data[:, 1:2]=drained_mech_pow.cpu()
+
+ return CoT
+
+ def _jnt_vel_penalty(self, jnts_vel):
+ weighted_jnt_vel = torch.sum(jnts_vel*jnts_vel, dim=1, keepdim=True)/self._n_jnts
+ return weighted_jnt_vel
+
+ def _rhc_fail_idx(self, gpu: bool):
+ rhc_fail_idx = self._rhc_status.rhc_fail_idx.get_torch_mirror(gpu=gpu)
+ return self._env_opts["rhc_fail_idx_scale"]*rhc_fail_idx
+
+ # basic L1 and L2 error functions
+ def _track_err_wmse(self, task_ref, task_meas, scaling, weights):
+ # weighted mean-squared error computation
+ task_error = (task_meas-task_ref)
+ # add to db metrics
+ self._track_error_db[:, :]=torch.abs(task_error)
+ scaled_error=task_error/scaling
+
+ task_wmse = torch.sum(scaled_error*scaled_error*weights, dim=1, keepdim=True)/torch.sum(weights).item()
+ return task_wmse # weighted mean square error (along task dimension)
+
+ def _track_err_dir_wmse(self, task_ref, task_meas, scaling, weights):
+ # weighted DIRECTIONAL mean-squared error computation
+ task_error = (task_meas-task_ref)
+ # add to db metrics
+ self._track_error_db[:, :]=torch.abs(task_error)
+ task_error=task_error/scaling
+
+ # projection along commanded direction and gravity, matching paper formulation
+ v_ref=task_ref[:, 0:3]
+ delta_v=task_error[:, 0:3]
+
+ v_ref_norm=torch.norm(v_ref, dim=1, keepdim=True)
+ cmd_dir=v_ref/(v_ref_norm+1e-8)
+ # fallback to measured direction if command is (near) zero to avoid degenerate projection
+ meas_dir=task_meas[:, 0:3]
+ meas_dir=meas_dir/(torch.norm(meas_dir, dim=1, keepdim=True)+1e-8)
+ cmd_dir=torch.where((v_ref_norm>1e-6), cmd_dir, meas_dir)
+
+ gravity_dir = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) # normalized gravity in base frame
+ gravity_dir = gravity_dir/(torch.norm(gravity_dir, dim=1, keepdim=True)+1e-8)
+
+ forward_error=torch.sum(delta_v*cmd_dir, dim=1, keepdim=True)
+ vertical_error=torch.sum(delta_v*gravity_dir, dim=1, keepdim=True)
+ lateral_vec=delta_v - vertical_error*gravity_dir - forward_error*cmd_dir
+ lateral_error=torch.norm(lateral_vec, dim=1, keepdim=True)
+
+ # angular directional components: use gravity as vertical, project base x onto the world xy plane for roll, and close the triad with pitch
+ base_x = self._base_x_dir
+ base_y = self._base_y_dir
+
+ roll_dir = base_x - torch.sum(base_x*gravity_dir, dim=1, keepdim=True)*gravity_dir
+ roll_norm = torch.norm(roll_dir, dim=1, keepdim=True)
+ roll_dir_alt = base_y - torch.sum(base_y*gravity_dir, dim=1, keepdim=True)*gravity_dir # fallback if base x is almost aligned with gravity
+ roll_norm_alt = torch.norm(roll_dir_alt, dim=1, keepdim=True)
+ use_alt_roll = roll_norm < 1e-6
+ roll_dir = torch.where(use_alt_roll, roll_dir_alt, roll_dir)
+ roll_norm = torch.where(use_alt_roll, roll_norm_alt, roll_norm)
+ roll_dir = roll_dir/(roll_norm+1e-8)
+
+ pitch_dir = torch.cross(gravity_dir, roll_dir, dim=1)
+ pitch_dir = pitch_dir/(torch.norm(pitch_dir, dim=1, keepdim=True)+1e-8)
+
+ delta_omega = task_error[:, 3:6]
+ omega_roll_error = torch.sum(delta_omega*roll_dir, dim=1, keepdim=True)
+ omega_pitch_error = torch.sum(delta_omega*pitch_dir, dim=1, keepdim=True)
+ omega_vertical_error = torch.sum(delta_omega*gravity_dir, dim=1, keepdim=True)
+
+ full_error=torch.cat((forward_error, lateral_error, vertical_error, omega_roll_error, omega_pitch_error, omega_vertical_error), dim=1)
+ task_wmse_dir = torch.sum(full_error*full_error*weights, dim=1, keepdim=True)/torch.sum(weights).item()
+ return task_wmse_dir # weighted mean square error (along task dimension)
+
+ # L2 errors
+ def _tracking_err_rel_wmse(self, task_ref, task_meas, weights, directional: bool = False):
+ ref_norm = task_ref.norm(dim=1,keepdim=True) # norm of the full twist reference
+ self._task_err_scaling[:, :] = ref_norm+1e-2
+ if directional:
+ task_rel_err_wmse=self._track_err_dir_wmse(task_ref=task_ref, task_meas=task_meas,
+ scaling=self._task_err_scaling, weights=weights)
+ else:
+ task_rel_err_wmse=self._track_err_wmse(task_ref=task_ref, task_meas=task_meas,
+ scaling=self._task_err_scaling, weights=weights)
+ return task_rel_err_wmse
+
+ def _tracking_err_wmse(self, task_ref, task_meas, weights, directional: bool = False):
+ self._task_err_scaling[:, :] = 1
+ if directional:
+ task_err_wmse = self._track_err_dir_wmse(task_ref=task_ref,
+ task_meas=task_meas, scaling=self._task_err_scaling, weights=weights)
+ else:
+ task_err_wmse = self._track_err_wmse(task_ref=task_ref,
+ task_meas=task_meas, scaling=self._task_err_scaling, weights=weights)
+ return task_err_wmse
+
+ # L1 errors
+ def _tracking_err_rel_lin(self, task_ref, task_meas, weights, directional):
+ task_rel_err_wmse = self._tracking_err_rel_wmse(task_ref=task_ref,
+ task_meas=task_meas, weights=weights, directional=directional)
+ return task_rel_err_wmse.sqrt()
+
+ def _tracking_err_lin(self, task_ref, task_meas, weights, directional: bool = False):
+ self._task_err_scaling[:, :] = 1
+ task_err_wmse=self._tracking_err_wmse(task_ref=task_ref,
+ task_meas=task_meas, weights=weights, directional=directional)
+ return task_err_wmse.sqrt()
+
+ # reward computation over steps/substeps
+ def _compute_step_rewards(self):
+
+ sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+
+ # tracking reward
+ if self._env_opts["use_L1_norm"]: # linear errors
+ task_error_fun = self._tracking_err_lin
+ if self._env_opts["use_relative_error"]:
+ task_error_fun = self._tracking_err_rel_lin
+ else: # quadratic error
+ task_error_fun = self._tracking_err_wmse
+ if self._env_opts["use_relative_error"]:
+ task_error_fun = self._tracking_err_rel_wmse
+
+ agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) # high level agent refs (hybrid twist)
+ self._get_avrg_step_root_twist(out=self._step_avrg_root_twist_base_loc, base_loc=True)
+ task_error = task_error_fun(task_meas=self._step_avrg_root_twist_base_loc,
+ task_ref=agent_task_ref_base_loc,
+ weights=self._task_err_weights,
+ directional=self._env_opts["directional_tracking"])
+
+ idx=self._reward_map["task_error"]
+ if self._env_opts["use_exp_track_rew"]:
+ sub_rewards[:, idx:(idx+1)] = \
+ self._env_opts["task_track_offset_exp"]*torch.exp(-self._env_opts["task_track_scale_exp"]*task_error)
+ else: # simple linear reward
+ sub_rewards[:, idx:(idx+1)] = \
+ self._env_opts["task_track_offset"]*(1.0-self._env_opts["task_track_scale"]*task_error)
+
+ if self._env_opts["use_fail_idx_weight"]: # add weight based on fail idx
+ fail_idx=self._rhc_fail_idx(gpu=self._use_gpu)
+ sub_rewards[:, idx:(idx+1)]=(1-fail_idx)*sub_rewards[:, idx:(idx+1)]
+ if self._track_rew_smoother is not None: # smooth reward if required
+ self._track_rew_smoother.update(new_signal=sub_rewards[:, 0:1])
+ sub_rewards[:, idx:(idx+1)]=self._track_rew_smoother.get()
+
+ # action rate
+ if self._env_opts["add_action_rate_reward"]:
+ action_rate=self._action_rate()
+ idx=self._reward_map["action_rate"]
+ sub_rewards[:, idx:(idx+1)] = self._env_opts["action_rate_offset"]*(1.0-self._env_opts["action_rate_scale"]*action_rate)
+
+ # mpc vel tracking
+ if self._env_opts["use_rhc_avrg_vel_tracking"]:
+ self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc_next,base_loc=True) # get estimated avrg vel
+ # from MPC after stepping
+ task_pred_error=task_error_fun(task_meas=self._root_twist_avrg_rhc_base_loc_next,
+ task_ref=agent_task_ref_base_loc,
+ weights=self._task_pred_err_weights,
+ directional=self._env_opts["directional_tracking"])
+ idx=self._reward_map["rhc_avrg_vel_error"]
+ sub_rewards[:, idx:(idx+1)] = self._env_opts["task_pred_track_offset"]*torch.exp(-self._env_opts["task_pred_track_scale"]*task_pred_error)
+
+ def _compute_substep_rewards(self):
+
+ sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+
+ if self._env_opts["add_CoT_reward"] or self._env_opts["add_power_reward"]:
+ jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+ jnts_effort = self._robot_state.jnts_state.get(data_type="eff",gpu=self._use_gpu)
+
+ if self._env_opts["add_CoT_reward"]:
+ if self._env_opts["use_CoT_wrt_ref"]: # uses v ref norm for computing cot
+ agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu)
+ v_norm=torch.norm(agent_task_ref_base_loc, dim=1, keepdim=True)
+ else: # uses measured velocity
+ robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu)
+ v_norm=torch.norm(robot_twist_meas_base_loc[:,0:3], dim=1, keepdim=True)
+ CoT=self._cost_of_transport(jnts_vel=jnts_vel,jnts_effort=jnts_effort,v_norm=v_norm,
+ mass_weight=True
+ )
+ idx=self._reward_map["CoT"]
+ sub_rewards[:, idx:(idx+1)] = self._env_opts["CoT_offset"]*(1-self._env_opts["CoT_scale"]*CoT)
+ if self._env_opts["add_power_reward"]:
+ weighted_mech_power=self._mech_pow(jnts_vel=jnts_vel,jnts_effort=jnts_effort, drained=True)
+ idx=self._reward_map["mech_pow"]
+ sub_rewards[:, idx:(idx+1)] = self._env_opts["power_offset"]*(1-self._env_opts["power_scale"]*weighted_mech_power)
+
+ if self._env_opts["add_jnt_v_reward"]:
+ jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+ jnt_v=self._jnt_vel_penalty(jnts_vel=jnts_vel)
+ idx=self._reward_map["jnt_v"]
+ sub_rewards[:, idx:(idx+1)] = self._env_opts["jnt_vel_offset"]*(1-self._env_opts["jnt_vel_scale"]*jnt_v)
+
+ def _randomize_task_refs(self,
+ env_indxs: torch.Tensor = None):
+
+ # we randomize the reference in world frame, since it's much more intuitive
+ # (it will be rotated in base frame when provided to the agent and used for rew
+ # computation)
+
+ if self._env_opts["use_pof0"]: # sample from bernoulli distribution
+ torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"]
+ torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega)
+ if env_indxs is None:
+ random_uniform=torch.full_like(self._agent_twist_ref_current_w, fill_value=0.0)
+ torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+ self._agent_twist_ref_current_w[:, :] = random_uniform*self._twist_ref_scale + self._twist_ref_offset
+ self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel
+ self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega
+ else:
+ random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, :], fill_value=0.0)
+ torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+ self._agent_twist_ref_current_w[env_indxs, :] = random_uniform * self._twist_ref_scale + self._twist_ref_offset
+ self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :]
+ self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega
+
+ def _get_obs_names(self):
+
+ obs_names = [""] * self.obs_dim()
+
+ # proprioceptive stream of obs
+ next_idx=0
+
+ self._obs_map["gn_base"]=next_idx
+ obs_names[0] = "gn_x_base_loc"
+ obs_names[1] = "gn_y_base_loc"
+ obs_names[2] = "gn_z_base_loc"
+ next_idx+=3
+
+ self._obs_map["linvel_meas"]=next_idx
+ obs_names[next_idx] = "linvel_x_base_loc"
+ obs_names[next_idx+1] = "linvel_y_base_loc"
+ obs_names[next_idx+2] = "linvel_z_base_loc"
+ next_idx+=3
+
+ self._obs_map["omega_meas"]=next_idx
+ obs_names[next_idx] = "omega_x_base_loc"
+ obs_names[next_idx+1] = "omega_y_base_loc"
+ obs_names[next_idx+2] = "omega_z_base_loc"
+ next_idx+=3
+
+ jnt_names=self.get_observed_joints()
+ self._obs_map["q_jnt"]=next_idx
+ for i in range(self._n_jnts): # jnt obs (pos):
+ obs_names[next_idx+i] = f"q_jnt_{jnt_names[i]}"
+ next_idx+=self._n_jnts
+
+ self._obs_map["v_jnt"]=next_idx
+ for i in range(self._n_jnts): # jnt obs (v):
+ obs_names[next_idx+i] = f"v_jnt_{jnt_names[i]}"
+ next_idx+=self._n_jnts
+
+ # references
+ self._obs_map["twist_ref"]=next_idx
+ obs_names[next_idx] = "linvel_x_ref_base_loc"
+ obs_names[next_idx+1] = "linvel_y_ref_base_loc"
+ obs_names[next_idx+2] = "linvel_z_ref_base_loc"
+ obs_names[next_idx+3] = "omega_x_ref_base_loc"
+ obs_names[next_idx+4] = "omega_y_ref_base_loc"
+ obs_names[next_idx+5] = "omega_z_ref_base_loc"
+ next_idx+=6
+
+ # contact forces
+ if self._env_opts["add_mpc_contact_f_to_obs"]:
+ i = 0
+ self._obs_map["contact_f_mpc"]=next_idx
+ for contact in self._contact_names:
+ obs_names[next_idx+i] = f"fc_{contact}_x_base_loc"
+ obs_names[next_idx+i+1] = f"fc_{contact}_y_base_loc"
+ obs_names[next_idx+i+2] = f"fc_{contact}_z_base_loc"
+ i+=3
+ next_idx+=3*len(self._contact_names)
+
+ # data directly from MPC
+ if self._env_opts["add_fail_idx_to_obs"]:
+ self._obs_map["rhc_fail_idx"]=next_idx
+ obs_names[next_idx] = "rhc_fail_idx"
+ next_idx+=1
+ if self._env_opts["add_term_mpc_capsize"]:
+ self._obs_map["gn_base_mpc"]=next_idx
+ obs_names[next_idx] = "gn_x_rhc_base_loc"
+ obs_names[next_idx+1] = "gn_y_rhc_base_loc"
+ obs_names[next_idx+2] = "gn_z_rhc_base_loc"
+ next_idx+=3
+ if self._env_opts["use_rhc_avrg_vel_tracking"]:
+ self._obs_map["avrg_twist_mpc"]=next_idx
+ obs_names[next_idx] = "linvel_x_avrg_rhc"
+ obs_names[next_idx+1] = "linvel_y_avrg_rhc"
+ obs_names[next_idx+2] = "linvel_z_avrg_rhc"
+ obs_names[next_idx+3] = "omega_x_avrg_rhc"
+ obs_names[next_idx+4] = "omega_y_avrg_rhc"
+ obs_names[next_idx+5] = "omega_z_avrg_rhc"
+ next_idx+=6
+
+ if self._env_opts["add_flight_info"]:
+ self._obs_map["flight_info"]=next_idx
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_pos_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_len_remaining_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_len_nominal_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_apex_nominal_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_end_nominal_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+
+ if self._env_opts["add_flight_settings"]:
+ self._obs_map["flight_settings_req"]=next_idx
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_len_req_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_apex_req_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+ for i in range(len(self._contact_names)):
+ obs_names[next_idx+i] = "flight_end_req_"+ self._contact_names[i]
+ next_idx+=len(self._contact_names)
+
+ if self._env_opts["add_rhc_cmds_to_obs"]:
+ self._obs_map["rhc_cmds_q"]=next_idx
+ for i in range(self._n_jnts): # jnt obs (pos):
+ obs_names[next_idx+i] = f"rhc_cmd_q_{jnt_names[i]}"
+ next_idx+=self._n_jnts
+ self._obs_map["rhc_cmds_v"]=next_idx
+ for i in range(self._n_jnts): # jnt obs (pos):
+ obs_names[next_idx+i] = f"rhc_cmd_v_{jnt_names[i]}"
+ next_idx+=self._n_jnts
+ self._obs_map["rhc_cmds_eff"]=next_idx
+ for i in range(self._n_jnts): # jnt obs (pos):
+ obs_names[next_idx+i] = f"rhc_cmd_eff_{jnt_names[i]}"
+ next_idx+=self._n_jnts
+
+ # previous actions info
+ if self._env_opts["use_action_history"]:
+ self._obs_map["action_history"]=next_idx
+ action_names = self._get_action_names()
+ if self._env_opts["add_prev_actions_stats_to_obs"]:
+ self._obs_map["action_history_prev"]=next_idx
+ for act_idx in range(self.actions_dim()):
+ obs_names[next_idx+act_idx] = action_names[act_idx]+f"_prev_act"
+ next_idx+=self.actions_dim()
+ self._obs_map["action_history_avrg"]=next_idx
+ for act_idx in range(self.actions_dim()):
+ obs_names[next_idx+act_idx] = action_names[act_idx]+f"_avrg_act"
+ next_idx+=self.actions_dim()
+ self._obs_map["action_history_std"]=next_idx
+ for act_idx in range(self.actions_dim()):
+ obs_names[next_idx+act_idx] = action_names[act_idx]+f"_std_act"
+ next_idx+=self.actions_dim()
+ else:
+ for i in range(self._env_opts["actions_history_size"]):
+ for act_idx in range(self.actions_dim()):
+ obs_names[next_idx+act_idx] = action_names[act_idx]+f"_m{i+1}_act"
+ next_idx+=self.actions_dim()
+
+ if self._env_opts["use_action_smoothing"]:
+ self._obs_map["action_smoothing"]=next_idx
+ for smoothed_action in range(self.actions_dim()):
+ obs_names[next_idx+smoothed_action] = action_names[smoothed_action]+f"_smoothed"
+ next_idx+=self.actions_dim()
+
+ if self._env_opts["add_periodic_clock_to_obs"]:
+ self._obs_map["clock"]=next_idx
+ obs_names[next_idx] = "clock_cos"
+ obs_names[next_idx+1] = "clock_sin"
+ next_idx+=2
+ if self._env_opts["add_heightmap_obs"] and self._height_grid_size is not None:
+ self._obs_map["heightmap"]=next_idx
+ gs = self._height_grid_size
+ for r in range(gs):
+ for c in range(gs):
+ obs_names[next_idx] = f"height_r{r}_c{c}"
+ next_idx += 1
+
+ return obs_names
+
+ def _set_substep_obs(self):
+ # which obs are to be averaged over substeps?
+
+ self._is_substep_obs[self._obs_map["linvel_meas"]:self._obs_map["linvel_meas"]+3]=True
+ self._is_substep_obs[self._obs_map["omega_meas"]:self._obs_map["omega_meas"]+3]=True
+ self._is_substep_obs[self._obs_map["v_jnt"]:self._obs_map["v_jnt"]+self._n_jnts]=True # also good for noise
+
+ # self._is_substep_obs[self._obs_map["contact_f_mpc"]:self._obs_map["contact_f_mpc"]+3*len(self._contact_names)]=True
+
+ def _get_action_names(self):
+
+ action_names = [""] * self.actions_dim()
+ action_names[0] = "vx_cmd" # twist commands from agent to RHC controller
+ action_names[1] = "vy_cmd"
+ action_names[2] = "vz_cmd"
+ action_names[3] = "roll_omega_cmd"
+ action_names[4] = "pitch_omega_cmd"
+ action_names[5] = "yaw_omega_cmd"
+
+ next_idx=6
+
+ self._actions_map["contact_flag_start"]=next_idx
+ for i in range(len(self._contact_names)):
+ contact=self._contact_names[i]
+ action_names[next_idx] = f"contact_flag_{contact}"
+ next_idx+=1
+
+ return action_names
+
+ def _set_substep_rew(self):
+
+ # which rewards are to be computed at substeps frequency?
+ self._is_substep_rew[self._reward_map["task_error"]]=False
+ if self._env_opts["add_CoT_reward"]:
+ self._is_substep_rew[self._reward_map["CoT"]]=True
+ if self._env_opts["add_power_reward"]:
+ self._is_substep_rew[self._reward_map["mech_pow"]]=True
+ if self._env_opts["add_action_rate_reward"]:
+ self._is_substep_rew[self._reward_map["action_rate"]]=False
+ if self._env_opts["add_jnt_v_reward"]:
+ self._is_substep_rew[self._reward_map["jnt_v"]]=True
+
+ if self._env_opts["use_rhc_avrg_vel_tracking"]:
+ self._is_substep_rew[self._reward_map["rhc_avrg_vel_error"]]=False
+
+ def _get_rewards_names(self):
+
+ counter=0
+ reward_names = []
+
+ # adding rewards
+ reward_names.append("task_error")
+ self._reward_map["task_error"]=counter
+ self._reward_lb_map["task_error"]="task_error_reward_lb"
+ counter+=1
+ if self._env_opts["add_power_reward"] and self._env_opts["add_CoT_reward"]:
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ "Only one between CoT and power reward can be used!",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ if self._env_opts["add_CoT_reward"]:
+ reward_names.append("CoT")
+ self._reward_map["CoT"]=counter
+ self._reward_lb_map["CoT"]="CoT_reward_lb"
+ counter+=1
+ if self._env_opts["add_power_reward"]:
+ reward_names.append("mech_pow")
+ self._reward_map["mech_pow"]=counter
+ self._reward_lb_map["mech_pow"]="power_reward_lb"
+ counter+=1
+ if self._env_opts["add_action_rate_reward"]:
+ reward_names.append("action_rate")
+ self._reward_map["action_rate"]=counter
+ self._reward_lb_map["action_rate"]="action_rate_reward_lb"
+ counter+=1
+ if self._env_opts["add_jnt_v_reward"]:
+ reward_names.append("jnt_v")
+ self._reward_map["jnt_v"]=counter
+ self._reward_lb_map["jnt_v"]="jnt_vel_reward_lb"
+ counter+=1
+ if self._env_opts["use_rhc_avrg_vel_tracking"]:
+ reward_names.append("rhc_avrg_vel_error")
+ self._reward_map["rhc_avrg_vel_error"]=counter
+ self._reward_lb_map["rhc_avrg_vel_error"]="rhc_avrg_vel_reward_lb"
+ counter+=1
+
+ return reward_names
+
+ def _get_sub_trunc_names(self):
+ sub_trunc_names = []
+ sub_trunc_names.append("ep_timeout")
+ if self._env_opts["single_task_ref_per_episode"]:
+ sub_trunc_names.append("task_ref_rand")
+ return sub_trunc_names
+
+ def _get_sub_term_names(self):
+ # to be overridden by child class
+ sub_term_names = []
+ sub_term_names.append("rhc_failure")
+ sub_term_names.append("robot_capsize")
+ if self._env_opts["add_term_mpc_capsize"]:
+ sub_term_names.append("rhc_capsize")
+
+ return sub_term_names
+
+ def _set_jnts_blacklist_pattern(self):
+ # used to exclude pos measurement from wheels
+ self._jnt_q_blacklist_patterns=["wheel"]
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.srdf b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.srdf
new file mode 100644
index 0000000000000000000000000000000000000000..0eccb9d4f451dbd6f85affd6972ecce13a609d04
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.srdf
@@ -0,0 +1,55 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.urdf b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.urdf
new file mode 100644
index 0000000000000000000000000000000000000000..6932d9779b22f20dae4458878e998b39517c997a
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.urdf
@@ -0,0 +1,1429 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /b2w_gazebo
+ gazebo_ros_control/DefaultRobotHWSim
+
+
+
+
+
+ 10
+
+ base
+ 0 0 0 0 0 0
+ Gazebo/Yellow
+
+
+
+
+
+
+
+ trunk
+ /apply_force/trunk
+
+
+
+ true
+
+ true
+ 1000
+ true
+ __default_topic__
+
+ trunk_imu
+ imu_link
+ 1000.0
+ 0.0
+ 0 0 0
+ 0 0 0
+ imu_link
+
+ 0 0 0 0 0 0
+
+
+
+
+
+ 100
+
+
+ FR_calf_fixed_joint_lump__FR_foot_collision_1
+
+
+
+
+
+ 100
+
+
+ FL_calf_fixed_joint_lump__FL_foot_collision_1
+
+
+
+
+
+ 100
+
+
+ RR_calf_fixed_joint_lump__RR_foot_collision_1
+
+
+
+
+
+ 100
+
+
+ RL_calf_fixed_joint_lump__RL_foot_collision_1
+
+
+
+
+
+
+ Gazebo/Green
+ false
+
+
+ 0.2
+ 0.2
+
+
+
+
+ 0.2
+ 0.2
+ Gazebo/White
+
+
+ 0.2
+ 0.2
+ Gazebo/Red
+
+
+
+ 0.2
+ 0.2
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+ 0.6
+ 0.6
+ 1
+
+
+
+
+
+
+ 0.2
+ 0.2
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+ 0.6
+ 0.6
+ 1
+
+
+
+
+
+
+ 0.2
+ 0.2
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+ 0.6
+ 0.6
+ 1
+
+
+
+
+
+
+ 0.2
+ 0.2
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+
+
+
+ 0.2
+ 0.2
+ 1
+
+
+ 0.6
+ 0.6
+ 1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+ transmission_interface/SimpleTransmission
+
+ hardware_interface/EffortJointInterface
+
+
+ hardware_interface/EffortJointInterface
+ 1
+
+
+
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/world_interface_base.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/world_interface_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..9dc8f41cb9b4a2c1518bd204e663bb6568902b3d
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/world_interface_base.py
@@ -0,0 +1,1719 @@
+from aug_mpc.controllers.rhc.augmpc_cluster_server import AugMpcClusterServer
+from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperClnt
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetClnt
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest
+from aug_mpc.utils.jnt_imp_control_base import JntImpCntrlBase
+from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds
+from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf
+from aug_mpc.utils.math_utils import quaternion_difference
+from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds
+
+from aug_mpc.utils.filtering import FirstOrderFilter
+
+from mpc_hive.utilities.homing import RobotHomer
+from mpc_hive.utilities.shared_data.jnt_imp_control import JntImpCntrlData
+
+from EigenIPC.PyEigenIPC import VLevel, Journal, LogType, dtype
+from EigenIPC.PyEigenIPC import StringTensorServer
+from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper
+
+from typing import List, Dict, TypeVar
+
+import os
+import inspect
+import signal
+import time
+
+import numpy as np
+import torch
+
+from abc import ABC, abstractmethod
+
+JntImpCntrlChild = TypeVar('JntImpCntrlChild', bound='JntImpCntrlBase')
+
+class AugMPCWorldInterfaceBase(ABC):
+
+ def __init__(self,
+ robot_names: List[str],
+ robot_urdf_paths: List[str],
+ robot_srdf_paths: List[str],
+ jnt_imp_config_paths: List[str],
+ n_contacts: List[int],
+ cluster_dt: List[float],
+ use_remote_stepping: List[bool],
+ name: str = "AugMPCWorldInterfaceBase",
+ num_envs: int = 1,
+ debug = False,
+ verbose: bool = False,
+ vlevel: VLevel = VLevel.V1,
+ n_init_step: int = 0,
+ timeout_ms: int = 60000,
+ env_opts: Dict = None,
+ use_gpu: bool = True,
+ dtype: torch.dtype = torch.float32,
+ dump_basepath: str = "/tmp",
+ override_low_lev_controller: bool = False):
+
+ # checks on input args
+ # type checks
+ if not isinstance(robot_names, List):
+ exception = "robot_names must be a list!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not isinstance(robot_urdf_paths, List):
+ exception = "robot_urdf_paths must be a list!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not isinstance(robot_srdf_paths, List):
+ exception = "robot_srdf_paths must be a list!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not isinstance(cluster_dt, List):
+ exception = "cluster_dt must be a list!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not isinstance(use_remote_stepping, List):
+ exception = "use_remote_stepping must be a list!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not isinstance(n_contacts, List):
+ exception = "n_contacts must be a list (of integers)!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not isinstance(jnt_imp_config_paths, List):
+ exception = "jnt_imp_config_paths must be a list paths!"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ # dim checks
+ if not len(robot_urdf_paths) == len(robot_names):
+ exception = f"robot_urdf_paths has len {len(robot_urdf_paths)}" + \
+ f" while robot_names {len(robot_names)}"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not len(robot_srdf_paths) == len(robot_names):
+ exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \
+ f" while robot_names {len(robot_names)}"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not len(cluster_dt) == len(robot_names):
+ exception = f"cluster_dt has len {len(cluster_dt)}" + \
+ f" while robot_names {len(robot_names)}"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not len(use_remote_stepping) == len(robot_names):
+ exception = f"use_remote_stepping has len {len(use_remote_stepping)}" + \
+ f" while robot_names {len(robot_names)}"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not len(robot_srdf_paths) == len(robot_names):
+ exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \
+ f" while robot_names {len(robot_names)}"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if not len(jnt_imp_config_paths) == len(robot_names):
+ exception = f"jnt_imp_config_paths has len {len(jnt_imp_config_paths)}" + \
+ f" while robot_names {len(robot_names)}"
+ Journal.log(self.__class__.__name__,
+ "__init__",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = True)
+
+ self._remote_exit_flag=None
+
+ self._name=name
+ self._num_envs=num_envs
+ self._debug=debug
+ self._verbose=verbose
+ self._vlevel=vlevel
+ self._force_reconnection=True
+ self._timeout_ms=timeout_ms
+ self._use_gpu=use_gpu
+ self._device = "cuda" if self._use_gpu else "cpu"
+ self._dtype=dtype
+ self._robot_names=robot_names
+ self._env_opts={}
+ self._env_opts["deact_when_failure"]=True
+ self._env_opts["filter_jnt_vel"]=False
+ self._env_opts["filter_cutoff_freq"]=10.0 # [Hz]
+ self._env_opts["filter_sampling_rate"]=100 # rate at which state is filtered [Hz]
+ self._env_opts["add_remote_exit_flag"]=False # add shared data server to trigger a remote exit
+ self._env_opts["wheel_joint_patterns"]=["wheel"]
+ self._env_opts["filter_wheel_pos_ref"]=True
+ self._env_opts["zero_wheel_eff_ref"]=True
+
+ self._env_opts["enable_height_sensor"]=False
+ self._env_opts["height_sensor_resolution"]=0.16
+ self._env_opts["height_sensor_pixels"]=10
+ self._env_opts["height_sensor_lateral_offset"]=0.0
+ self._env_opts["height_sensor_forward_offset"]=0.0
+
+ self._env_opts["run_cluster_bootstrap"] = False
+
+ self._filter_step_ssteps_freq=None
+
+ self._env_opts.update(env_opts)
+
+ self.step_counter = 0 # global step counter
+ self._n_init_steps = n_init_step # n steps to be performed before applying solutions from control clusters
+ self._srdf_dump_paths = robot_srdf_paths
+ self._homers = {}
+ self._homing = None
+ self._jnt_imp_cntrl_shared_data = {}
+ self._jnt_imp_controllers = {}
+ self._jnt_imp_config_paths = {}
+
+ # control cluster data
+ self.cluster_sim_step_counters = {}
+ self.cluster_servers = {}
+ self._trigger_sol = {}
+ self._wait_sol = {}
+ self._cluster_dt = {}
+ self._robot_urdf_paths={}
+ self._robot_srdf_paths={}
+ self._contact_names={}
+ self._num_contacts={}
+
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ self._cluster_dt[robot_name]=cluster_dt[i]
+ self._robot_urdf_paths[robot_name]=robot_urdf_paths[i]
+ self._robot_srdf_paths[robot_name]=robot_srdf_paths[i]
+ self._contact_names[robot_name]=None
+ self._num_contacts[robot_name]=n_contacts[i]
+ self._jnt_imp_config_paths[robot_name]=jnt_imp_config_paths[i]
+ # db data
+ self.debug_data = {}
+ self.debug_data["time_to_step_world"] = np.nan
+ self.debug_data["time_to_get_states_from_env"] = np.nan
+ self.debug_data["cluster_sol_time"] = {}
+ self.debug_data["cluster_state_update_dt"] = {}
+ self.debug_data["sim_time"] = {}
+ self.debug_data["cluster_time"] = {}
+
+ self._env_timer = time.perf_counter()
+
+ # remote sim stepping options
+ self._timeout = timeout_ms # timeout for remote stepping
+ self._use_remote_stepping = use_remote_stepping
+ # should use remote stepping
+ self._remote_steppers = {}
+ self._remote_resetters = {}
+ self._remote_reset_requests = {}
+ self._is_first_trigger = {}
+
+ self._closed = False
+
+ self._this_child_path=os.path.abspath(inspect.getfile(self.__class__))
+ self._descr_dump_path=dump_basepath+"/"+f"{self.__class__.__name__}"
+ self._urdf_dump_paths = {}
+ self._srdf_dump_paths = {}
+ self.xrdf_cmd_vals = [] # by default empty, needs to be overriden by
+ # child class
+ self._world_iface_files_server=None
+
+ self._override_low_lev_controller=override_low_lev_controller
+
+ self._root_p = {}
+ self._root_q = {}
+ self._jnts_q = {}
+ self._root_p_prev = {} # used for num differentiation
+ self._root_q_prev = {} # used for num differentiation
+ self._jnts_q_prev = {} # used for num differentiation
+ self._root_v_prev = {} # used for num differentiation
+ self._root_omega_prev = {} # used for num differentiation
+ self._root_p_default = {}
+ self._root_q_default = {}
+ self._jnts_q_default = {}
+
+ self._gravity_normalized = {}
+ self._gravity_normalized_base_loc = {}
+
+ self._root_v = {}
+ self._root_v_base_loc = {}
+ self._root_v_default = {}
+ self._root_omega = {}
+ self._root_omega_base_loc = {}
+ self._root_omega_default = {}
+ self._root_a = {}
+ self._root_a_base_loc = {}
+ self._root_alpha = {}
+ self._root_alpha_base_loc = {}
+
+ self._jnts_v = {}
+ self._jnt_vel_filter = {}
+ self._jnts_v_default = {}
+ self._jnts_eff = {}
+ self._jnts_eff_default = {}
+
+ self._root_pos_offsets = {}
+ self._root_q_offsets = {}
+ self._root_q_offsets_yaw = {}
+ self._root_q_yaw_rel_ws = {}
+
+ self._parse_env_opts()
+
+ self._enable_height_shared = self._env_opts["enable_height_sensor"]
+ self._height_sensor_resolution = self._env_opts["height_sensor_resolution"]
+ self._height_sensor_pixels = self._env_opts["height_sensor_pixels"]
+
+ self._pre_setup() # child's method
+
+ self._init_world() # after this point all info from sim or robot is
+ # available
+
+ self._publish_world_interface_files()
+
+ setup_ok=self._setup()
+ if not setup_ok:
+ self.close()
+
+ self._exit_request=False
+ signal.signal(signal.SIGINT, self.signal_handler)
+
+ def signal_handler(self, sig, frame):
+ Journal.log(self.__class__.__name__,
+ "signal_handler",
+ "received SIGINT -> cleaning up",
+ LogType.WARN)
+ self._exit_request=True
+
+ def __del__(self):
+ self.close()
+
+ def is_closed(self):
+ return self._closed
+
+ def close(self) -> None:
+ if not self._closed:
+ for i in range(len(self._robot_names)):
+ if self._robot_names[i] in self.cluster_servers:
+ self.cluster_servers[self._robot_names[i]].close()
+ if self._use_remote_stepping[i]: # remote signaling
+ if self._robot_names[i] in self._remote_reset_requests:
+ self._remote_reset_requests[self._robot_names[i]].close()
+ self._remote_resetters[self._robot_names[i]].close()
+ self._remote_steppers[self._robot_names[i]].close()
+ if self._robot_names[i] in self._jnt_imp_cntrl_shared_data:
+ jnt_imp_shared_data=self._jnt_imp_cntrl_shared_data[self._robot_names[i]]
+ if jnt_imp_shared_data is not None:
+ jnt_imp_shared_data.close()
+ if self._remote_exit_flag is not None:
+ self._remote_exit_flag.close()
+ if self._world_iface_files_server is not None:
+ self._world_iface_files_server.close()
+ self._close()
+ self._closed=True
+
+ def _collect_world_interface_files(self):
+ files = [self._this_child_path]
+ # prefer generated URDF/SRDF if available, fallback to provided xacros
+ if len(self._urdf_dump_paths) > 0:
+ files.extend(list(self._urdf_dump_paths.values()))
+ else:
+ files.extend(list(self._robot_urdf_paths.values()))
+ if len(self._srdf_dump_paths) > 0:
+ files.extend(list(self._srdf_dump_paths.values()))
+ else:
+ files.extend(list(self._robot_srdf_paths.values()))
+ files.extend(list(self._jnt_imp_config_paths.values()))
+ # remove duplicates while preserving order
+ unique_files=[]
+ for f in files:
+ if f not in unique_files:
+ unique_files.append(f)
+ return unique_files
+
+ def _publish_world_interface_files(self):
+
+ if not any(self._use_remote_stepping):
+ return
+ self._world_iface_files_server=StringTensorServer(length=1,
+ basename="SharedWorldInterfaceFilesDropDir",
+ name_space=self._robot_names[0],
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ force_reconnection=True)
+ self._world_iface_files_server.run()
+ combined_paths=", ".join(self._collect_world_interface_files())
+ while not self._world_iface_files_server.write_vec([combined_paths], 0):
+ Journal.log(self.__class__.__name__,
+ "_publish_world_interface_files",
+ f"Failed to pub world interface files. Retrying...",
+ LogType.WARN)
+ time.sleep(0.1)
+ Journal.log(self.__class__.__name__,
+ "_publish_world_interface_files",
+ f"World interface files advertised: {combined_paths}",
+ LogType.STAT)
+
+ def _setup(self) -> bool:
+
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+
+ # normalized gravity vector
+ self._gravity_normalized[robot_name]=torch.full_like(self._root_v[robot_name], fill_value=0.0)
+ self._gravity_normalized[robot_name][:, 2]=-1.0
+ self._gravity_normalized_base_loc[robot_name]=self._gravity_normalized[robot_name].detach().clone()
+
+ # Pre-allocate yaw-related buffers once and reuse them in root_q_yaw_rel().
+ q_ref = self._root_q[robot_name]
+ self._root_q_offsets_yaw[robot_name] = torch.zeros(
+ (self._num_envs,), dtype=q_ref.dtype, device=q_ref.device)
+ self._root_q_yaw_rel_ws[robot_name] = {
+ "yaw_abs": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+ "yaw_rel": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+ "yaw_sin": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+ "yaw_cos": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+ "q_abs_unit": torch.zeros_like(q_ref),
+ "q_yaw_abs": torch.zeros_like(q_ref),
+ "q_yaw_rel": torch.zeros_like(q_ref),
+ "q_yaw_abs_conj": torch.zeros_like(q_ref),
+ "q_pr": torch.zeros_like(q_ref),
+ "q_rel": torch.zeros_like(q_ref),
+ }
+
+ self.cluster_sim_step_counters[robot_name]=0
+ self._is_first_trigger[robot_name] = True
+ if not isinstance(self._cluster_dt[robot_name], (float)):
+ exception = f"cluster_dt[{i}] should be a float!"
+ Journal.log(self.__class__.__name__,
+ "_setup",
+ exception,
+ LogType.EXCEP,
+ throw_when_excep = False)
+ return False
+ self._cluster_dt[robot_name] = self._cluster_dt[robot_name]
+ self._trigger_sol[robot_name] = True # allow first trigger
+ self._wait_sol[robot_name] = False
+
+ # initialize a lrhc cluster server for communicating with rhc controllers
+ self.cluster_servers[robot_name] = AugMpcClusterServer(cluster_size=self._num_envs,
+ cluster_dt=self._cluster_dt[robot_name],
+ control_dt=self.physics_dt(),
+ jnt_names=self._robot_jnt_names(robot_name=robot_name),
+ n_contacts=self._n_contacts(robot_name=robot_name),
+ contact_linknames=self._contact_names[robot_name],
+ verbose=self._verbose,
+ vlevel=self._vlevel,
+ debug=self._debug,
+ robot_name=robot_name,
+ use_gpu=self._use_gpu,
+ force_reconnection=self._force_reconnection,
+ timeout_ms=self._timeout,
+ enable_height_sensor=self._enable_height_shared,
+ height_grid_size=self._height_sensor_pixels,
+ height_grid_resolution=self._height_sensor_resolution)
+ self.cluster_servers[robot_name].run()
+ self.debug_data["cluster_sol_time"][robot_name] = np.nan
+ self.debug_data["cluster_state_update_dt"][robot_name] = np.nan
+ self.debug_data["sim_time"][robot_name] = np.nan
+ # remote sim stepping
+ if self._use_remote_stepping[i]:
+ self._remote_steppers[robot_name] = RemoteStepperClnt(namespace=robot_name,
+ verbose=self._debug,
+ vlevel=self._vlevel)
+ self._remote_resetters[robot_name] = RemoteResetClnt(namespace=robot_name,
+ verbose=self._debug,
+ vlevel=self._vlevel)
+ self._remote_reset_requests[robot_name] = RemoteResetRequest(namespace=robot_name,
+ n_env=self._num_envs,
+ is_server=True,
+ verbose=self._debug,
+ vlevel=self._vlevel,
+ force_reconnection=self._force_reconnection,
+ safe=False)
+ self._remote_steppers[robot_name].run()
+ self._remote_resetters[robot_name].run()
+ self._remote_reset_requests[robot_name].run()
+ else:
+ self._remote_steppers[robot_name] = None
+ self._remote_reset_requests[robot_name] = None
+ self._remote_resetters[robot_name] = None
+
+ self._homers[robot_name] = RobotHomer(srdf_path=self._srdf_dump_paths[robot_name],
+ jnt_names=self._robot_jnt_names(robot_name=robot_name),
+ filter=True,
+ verbose=self._verbose)
+ robot_homing=torch.from_numpy(self._homers[robot_name].get_homing().reshape(1,-1))
+ if "cuda" in self._device:
+ robot_homing=robot_homing.cuda()
+ self._homing=robot_homing.repeat(self._num_envs, 1)
+
+ self._jnts_q_default[robot_name] = self._homing
+ self._set_jnts_to_homing(robot_name=robot_name)
+ self._set_root_to_defconfig(robot_name=robot_name)
+ self._reset_sim()
+
+ self._init_safe_cluster_actions(robot_name=robot_name)
+
+ Journal.log(self.__class__.__name__,
+ "_setup",
+ f"Will use joint impedance config at {self._jnt_imp_config_paths[robot_name]} for {robot_name}",
+ LogType.STAT)
+
+ self._jnt_imp_controllers[robot_name] = self._generate_jnt_imp_control(robot_name=robot_name)
+ self._jnt_imp_controllers[robot_name].set_velocity_controlled_joints(
+ name_patterns=self._env_opts["wheel_joint_patterns"],
+ filter_pos_ref=self._env_opts["filter_wheel_pos_ref"],
+ zero_eff_ref=self._env_opts["zero_wheel_eff_ref"])
+ self._jnt_imp_cntrl_shared_data[robot_name] = JntImpCntrlData(is_server=True,
+ n_envs=self._num_envs,
+ n_jnts=len(self._robot_jnt_names(robot_name=robot_name)),
+ jnt_names=self._robot_jnt_names(robot_name=robot_name),
+ namespace=robot_name,
+ verbose=self._verbose,
+ force_reconnection=self._force_reconnection,
+ vlevel=self._vlevel,
+ use_gpu=self._use_gpu,
+ safe=False)
+ self._jnt_imp_cntrl_shared_data[robot_name].run()
+
+ self._jnt_vel_filter[robot_name]=None
+ if self._env_opts["filter_jnt_vel"]:
+ self._jnt_vel_filter[robot_name]=FirstOrderFilter(dt=1.0/self._env_opts["filter_sampling_rate"],
+ filter_BW=self._env_opts["filter_cutoff_freq"],
+ rows=self._num_envs,
+ cols=len(self._robot_jnt_names(robot_name=robot_name)),
+ device=self._device,
+ dtype=self._dtype)
+
+ physics_rate=1.0/self.physics_dt()
+ self._filter_step_ssteps_freq=int(physics_rate/self._env_opts["filter_sampling_rate"])
+ if self._filter_step_ssteps_freq <=0:
+ Journal.log(self.__class__.__name__,
+ "_setup",
+ f"The filter_sampling_rate should be smaller that the physics rate ({physics_rate} Hz)",
+ LogType.EXCEP,
+ throw_when_excep=True)
+
+ for n in range(self._n_init_steps): # run some initialization steps
+ if hasattr(self, "_alter_twist_warmup"):
+ self._alter_twist_warmup(robot_name=robot_name, env_indxs=None)
+ self._step_world()
+
+ self._read_jnts_state_from_robot(robot_name=robot_name,
+ env_indxs=None)
+ self._read_root_state_from_robot(robot_name=robot_name,
+ env_indxs=None)
+ # allow child to perform additional warmup validations (e.g., terrain/tilt)
+ # retry_done = False
+ if hasattr(self, "_post_warmup_validation"):
+ failing = self._post_warmup_validation(robot_name=robot_name)
+ if failing is not None and failing.numel() > 0:
+ # retry: reset only failing envs, rerun warmup, revalidate once
+ failing = failing.to(self._device)
+ Journal.log(self.__class__.__name__,
+ "_setup",
+ f"Warmup validation failed for {robot_name}, envs indexes {failing.tolist()}",
+ LogType.EXCEP,
+ throw_when_excep=True)
+ else:
+ Journal.log(self.__class__.__name__,
+ "_setup",
+ f"Warmup validation passed for {robot_name}",
+ LogType.INFO)
+
+ # write some inits for all robots
+ # self._update_root_offsets(robot_name)
+ self._synch_default_root_states(robot_name=robot_name)
+ epsi=0.03 # adding a bit of height to avoid initial penetration
+ self._root_p_default[robot_name][:, 2]=self._root_p_default[robot_name][:, 2]+epsi
+
+ reset_ok=self._reset(env_indxs=None,
+ robot_name=robot_name,
+ reset_cluster=True,
+ reset_cluster_counter=False,
+ randomize=True,
+ acquire_offsets=True) # resets everything, updates the cluster with fresh reset states
+ # and acquire offsets
+ if not reset_ok:
+ return False
+
+ # cluster setup here
+ control_cluster=self.cluster_servers[robot_name]
+
+ control_cluster.pre_trigger()
+ to_be_activated=control_cluster.get_inactive_controllers()
+ if to_be_activated is not None:
+ control_cluster.activate_controllers(
+ idxs=to_be_activated)
+
+ if self._env_opts["run_cluster_bootstrap"]:
+ cluster_setup_ok=self._setup_mpc_cluster(robot_name)
+ if not cluster_setup_ok:
+ return False
+ self._set_cluster_actions(robot_name=robot_name) # write last cmds
+ self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot
+
+ if self._use_remote_stepping[i]:
+ step_wait_ok = self._wait_for_remote_step_req(robot_name=robot_name)
+ if not step_wait_ok:
+ return False
+
+ self._set_startup_jnt_imp_gains(robot_name=robot_name) # set gains to
+ # startup config (usually lower)
+
+ control_cluster.pre_trigger()
+ control_cluster.trigger_solution(bootstrap=False) # trigger first solution (in real-time iteration) before first call to step to ensure that first solution is ready when step is called the first time
+
+ if self._env_opts["add_remote_exit_flag"]:
+ self._remote_exit_flag=SharedTWrapper(namespace = self._robot_names[0],# use first robot as name
+ basename = "IbridoRemoteEnvExitFlag",
+ is_server = True,
+ n_rows = 1,
+ n_cols = 1,
+ verbose = True,
+ vlevel = self._vlevel,
+ safe = False,
+ dtype=dtype.Bool,
+ force_reconnection=True,
+ fill_value = False)
+ self._remote_exit_flag.run()
+
+ self._setup_done=True
+
+ return self._setup_done
+
+ def _setup_mpc_cluster(self, robot_name: str):
+
+ control_cluster = self.cluster_servers[robot_name]
+
+ # self._set_state_to_cluster(robot_name=robot_name)
+ rhc_state = control_cluster.get_state()
+ root_twist=rhc_state.root_state.get(data_type="twist", robot_idxs = None, gpu=self._use_gpu)
+ jnt_v=rhc_state.jnts_state.get(data_type="v", robot_idxs = None, gpu=self._use_gpu)
+ root_twist[:, :]=0 # override meas state to make sure MPC bootstrap uses zero velocity
+ jnt_v[:, :]=0
+
+ control_cluster.write_robot_state()
+
+ # trigger bootstrap solution (solvers will run up to convergence)
+ control_cluster.trigger_solution(bootstrap=True) # this will trigger the bootstrap solver with the initial state,
+ # which will run until convergence before returning
+ wait_ok=control_cluster.wait_for_solution() # blocking
+ if not wait_ok:
+ return False
+ failed = control_cluster.get_failed_controllers(gpu=self._use_gpu)
+ if failed is not None:
+ failed_idxs = torch.nonzero(failed).squeeze(-1)
+ if failed_idxs.numel() > 0:
+ Journal.log(self.__class__.__name__,
+ "_setup",
+ f"Bootstrap solution failed for {robot_name} | n_failed: {failed_idxs.numel()}, idxs: {failed_idxs.cpu().tolist()}",
+ LogType.EXCEP,
+ throw_when_excep=False)
+ return False
+
+ return True
+
+ def step(self) -> bool:
+
+ success=False
+
+ if self._remote_exit_flag is not None:
+ # check for exit request
+ self._remote_exit_flag.synch_all(read=True, retry = False)
+ self._exit_request=self._exit_request or \
+ bool(self._remote_exit_flag.get_numpy_mirror()[0, 0].item())
+
+ if self._exit_request:
+ self.close()
+
+ if self.is_running() and (not self.is_closed()):
+ if self._debug:
+ pre_step_ok=self._pre_step_db()
+ if not pre_step_ok:
+ return False
+ self._env_timer=time.perf_counter()
+ self._step_world()
+ self.debug_data["time_to_step_world"] = \
+ time.perf_counter() - self._env_timer
+ self._post_world_step_db()
+ success=True
+ else:
+ pre_step_ok=self._pre_step()
+ if not pre_step_ok:
+ return False
+ self._step_world()
+ self._post_world_step()
+ success=True
+
+ return success
+
+ def render(self, mode:str="human") -> None:
+ self._render_sim(mode)
+
+ def reset(self,
+ env_indxs: torch.Tensor = None,
+ reset_cluster: bool = False,
+ reset_cluster_counter = False,
+ randomize: bool = False,
+ reset_sim: bool = False) -> None:
+
+ for i in range(len(self._robot_names)):
+ robot_name=self._robot_names[i]
+ reset_ok=self._reset(robot_name=robot_name,
+ env_indxs=env_indxs,
+ randomize=randomize,
+ reset_cluster=reset_cluster,
+ reset_cluster_counter=reset_cluster_counter)
+ if not reset_ok:
+ return False
+ self._set_startup_jnt_imp_gains(robot_name=robot_name,
+ env_indxs=env_indxs)
+
+ if reset_sim:
+ self._reset_sim()
+
+ return True
+
+ def _reset_cluster(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None,
+ reset_cluster_counter: bool = False):
+
+ control_cluster = self.cluster_servers[robot_name]
+
+ reset_ok=control_cluster.reset_controllers(idxs=env_indxs)
+ if not reset_ok:
+ return False
+
+ self._set_state_to_cluster(robot_name=robot_name,
+ env_indxs=env_indxs)
+ control_cluster.write_robot_state() # writes to shared memory
+
+ if reset_cluster_counter:
+ self.cluster_sim_step_counters[robot_name] = 0
+
+ return True
+
+ def _step_jnt_vel_filter(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ self._jnt_vel_filter[robot_name].update(refk=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs),
+ idxs=env_indxs)
+
+ def _set_state_to_cluster(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None,
+ base_loc: bool = True):
+
+ if self._debug:
+ if not isinstance(env_indxs, (torch.Tensor, type(None))):
+ msg = "Provided env_indxs should be a torch tensor of indexes!"
+ raise Exception(f"[{self.__class__.__name__}]" + f"[{self.journal.exception}]: " + msg)
+
+ control_cluster = self.cluster_servers[robot_name]
+ # floating base
+ rhc_state = control_cluster.get_state()
+ # configuration
+ rhc_state.root_state.set(data=self.root_p_rel(robot_name=robot_name, env_idxs=env_indxs),
+ data_type="p", robot_idxs = env_indxs, gpu=self._use_gpu)
+ rhc_state.root_state.set(data=self.root_q(robot_name=robot_name, env_idxs=env_indxs),
+ data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu)
+ # rhc_state.root_state.set(data=self.root_q_yaw_rel(robot_name=robot_name, env_idxs=env_indxs),
+ # data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu)
+ # twist
+ rhc_state.root_state.set(data=self.root_v(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc),
+ data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu)
+ rhc_state.root_state.set(data=self.root_omega(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc),
+ data_type="omega", robot_idxs = env_indxs, gpu=self._use_gpu)
+
+ # angular accc.
+ rhc_state.root_state.set(data=self.root_a(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc),
+ data_type="a", robot_idxs = env_indxs, gpu=self._use_gpu)
+ rhc_state.root_state.set(data=self.root_alpha(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc),
+ data_type="alpha", robot_idxs = env_indxs, gpu=self._use_gpu)
+
+ # gravity vec
+ rhc_state.root_state.set(data=self.gravity(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc),
+ data_type="gn", robot_idxs = env_indxs, gpu=self._use_gpu)
+
+ # joints
+ rhc_state.jnts_state.set(data=self.jnts_q(robot_name=robot_name, env_idxs=env_indxs),
+ data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu)
+
+ v_jnts=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs)
+ if self._jnt_vel_filter[robot_name] is not None: # apply filtering
+ v_jnts=self._jnt_vel_filter[robot_name].get(idxs=env_indxs)
+ rhc_state.jnts_state.set(data=v_jnts,
+ data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu)
+ rhc_state.jnts_state.set(data=self.jnts_eff(robot_name=robot_name, env_idxs=env_indxs),
+ data_type="eff", robot_idxs = env_indxs, gpu=self._use_gpu)
+
+ # height map
+ if self._enable_height_shared:
+ hdata = self._height_imgs[robot_name]
+ if env_indxs is not None:
+ hdata = hdata[env_indxs]
+ flat = hdata.reshape(hdata.shape[0], -1)
+ rhc_state.height_sensor.set(data=flat, data_type=None, robot_idxs=env_indxs, gpu=self._use_gpu)
+
+ # Updating contact state for selected contact links
+ self._update_contact_state(robot_name=robot_name, env_indxs=env_indxs)
+
+ def _update_contact_state(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+ for i in range(0, self.cluster_servers[robot_name].n_contact_sensors()):
+ contact_link = self.cluster_servers[robot_name].contact_linknames()[i]
+ f_contact = self._get_contact_f(robot_name=robot_name,
+ contact_link=contact_link,
+ env_indxs=env_indxs)
+ if f_contact is not None:
+ self.cluster_servers[robot_name].get_state().contact_wrenches.set(data=f_contact, data_type="f",
+ contact_name=contact_link,
+ robot_idxs = env_indxs,
+ gpu=self._use_gpu)
+
+ def _init_safe_cluster_actions(self,
+ robot_name: str):
+
+ # this does not actually write on shared memory,
+ # but it's enough to get safe actions for the simulator before the
+ # cluster starts to receive data from the controllers
+ control_cluster = self.cluster_servers[robot_name]
+ rhc_cmds = control_cluster.get_actions()
+ n_jnts = rhc_cmds.n_jnts()
+
+ null_action = torch.zeros((self._num_envs, n_jnts),
+ dtype=self._dtype,
+ device=self._device)
+ rhc_cmds.jnts_state.set(data=self._homing, data_type="q", gpu=self._use_gpu)
+ rhc_cmds.jnts_state.set(data=null_action, data_type="v", gpu=self._use_gpu)
+ rhc_cmds.jnts_state.set(data=null_action, data_type="eff", gpu=self._use_gpu)
+
+ def _pre_step_db(self) -> None:
+
+ # cluster step logic here
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+
+ if self._override_low_lev_controller:
+ # if overriding low-lev jnt imp. this has to run at the highest
+ # freq possible
+ start=time.perf_counter()
+ self._read_jnts_state_from_robot(robot_name=robot_name)
+ self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start
+
+ self._write_state_to_jnt_imp(robot_name=robot_name)
+ self._apply_cmds_to_jnt_imp_control(robot_name=robot_name)
+
+ if self._jnt_vel_filter[robot_name] is not None and \
+ (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0:
+ # filter joint vel at a fixed frequency wrt sim steps
+ if not self._override_low_lev_controller:
+ # we need a fresh sensor reading
+ self._read_jnts_state_from_robot(robot_name=robot_name)
+ self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None)
+
+ control_cluster = self.cluster_servers[robot_name]
+ if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]):
+ wait_ok=control_cluster.wait_for_solution() # this is blocking
+ if not wait_ok:
+ return False
+ failed = control_cluster.get_failed_controllers(gpu=self._use_gpu)
+ self._set_cluster_actions(robot_name=robot_name) # write last cmds to low level control
+ if not self._override_low_lev_controller:
+ self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot
+ # we can update the jnt state just at the rate at which the cluster needs it
+ start=time.perf_counter()
+ self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None)
+ else:
+ # read state necessary for cluster
+ start=time.perf_counter()
+ self._read_root_state_from_robot(robot_name=robot_name,
+ env_indxs=None)
+ self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start
+ start=time.perf_counter()
+ self._set_state_to_cluster(robot_name=robot_name,
+ env_indxs=None)
+ control_cluster.write_robot_state()
+ self.debug_data["cluster_state_update_dt"][robot_name] = time.perf_counter()-start
+
+ self._update_jnt_imp_cntrl_shared_data() # only if debug_mode_jnt_imp is enabled
+
+ if self._use_remote_stepping[i]:
+ self._remote_steppers[robot_name].ack() # signal cluster stepping is finished
+ if failed is not None and self._env_opts["deact_when_failure"]: # deactivate robot completely
+ self._deactivate(env_indxs=failed,
+ robot_name=robot_name)
+ wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking)
+ wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name)
+ if not wait_reset_ok or not wait_step_ok:
+ return False
+ else:
+ if failed is not None:
+ reset_ok=self._reset(env_indxs=failed,
+ robot_name=robot_name,
+ reset_cluster=True,
+ reset_cluster_counter=False,
+ randomize=True)
+ if not reset_ok:
+ return False
+ self._set_startup_jnt_imp_gains(robot_name=robot_name,
+ env_indxs=failed)
+
+ control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers())
+
+ control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving
+ # values of some rhc flags on shared memory
+ control_cluster.trigger_solution() # trigger only active controllers
+
+ return True
+
+ def _pre_step(self) -> None:
+
+ # cluster step logic here
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ if self._override_low_lev_controller:
+ # if overriding low-lev jnt imp. this has to run at the highest
+ # freq possible
+ self._read_jnts_state_from_robot(robot_name=robot_name)
+ self._write_state_to_jnt_imp(robot_name=robot_name)
+ self._apply_cmds_to_jnt_imp_control(robot_name=robot_name)
+
+ if self._jnt_vel_filter[robot_name] is not None and \
+ (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0:
+ # filter joint vel at a fixed frequency wrt sim steps
+ if not self._override_low_lev_controller:
+ # we need a fresh sensor reading
+ self._read_jnts_state_from_robot(robot_name=robot_name)
+ self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None)
+
+ control_cluster = self.cluster_servers[robot_name]
+ if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]):
+ wait_ok=control_cluster.wait_for_solution() # this is blocking
+ if not wait_ok:
+ return False
+ failed = control_cluster.get_failed_controllers(gpu=self._use_gpu)
+ self._set_cluster_actions(robot_name=robot_name) # set last cmds to low level control
+ if not self._override_low_lev_controller:
+ self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot
+ # we can update the jnt state just at the rate at which the cluster needs it
+ self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None)
+ # read state necessary for cluster
+ self._read_root_state_from_robot(robot_name=robot_name,
+ env_indxs=None)
+ # write last robot state to the cluster of controllers
+ self._set_state_to_cluster(robot_name=robot_name,
+ env_indxs=None)
+ control_cluster.write_robot_state() # write on shared mem
+
+ if self._use_remote_stepping[i]:
+ self._remote_steppers[robot_name].ack() # signal cluster stepping is finished
+ if failed is not None and self._env_opts["deact_when_failure"]:
+ self._deactivate(env_indxs=failed,
+ robot_name=robot_name)
+ wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking)
+ wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name)
+ if not wait_reset_ok or not wait_step_ok:
+ return False
+ else:
+ if failed is not None:
+ reset_ok=self._reset(env_indxs=failed,
+ robot_name=robot_name,
+ reset_cluster=True,
+ reset_cluster_counter=False,
+ randomize=True)
+ if not reset_ok:
+ return False
+ self._set_startup_jnt_imp_gains(robot_name=robot_name,
+ env_indxs=failed)
+ control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers())
+
+ control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving
+ # values of some rhc flags on shared memory
+ control_cluster.trigger_solution() # trigger only active controllers
+
+ return True
+
+ def _post_world_step_db(self) -> bool:
+
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ control_cluster = self.cluster_servers[robot_name]
+ self.cluster_sim_step_counters[robot_name]+=1 # this has to be update with sim freq
+ if self._debug:
+ self.debug_data["sim_time"][robot_name]=self.world_time(robot_name=robot_name)
+ self.debug_data["cluster_sol_time"][robot_name] = \
+ control_cluster.solution_time()
+
+ self.step_counter +=1
+
+ def _post_world_step(self) -> bool:
+
+ for i in range(len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ self.cluster_sim_step_counters[robot_name]+=1
+ self.step_counter +=1
+
+ def _reset(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None,
+ randomize: bool = False,
+ reset_cluster: bool = False,
+ reset_cluster_counter = False,
+ acquire_offsets: bool = False):
+
+ # resets the state of target robot and env to the defaults
+ self._reset_state(env_indxs=env_indxs,
+ robot_name=robot_name,
+ randomize=randomize)
+
+ # and jnt imp. controllers
+ self._reset_jnt_imp_control(robot_name=robot_name,
+ env_indxs=env_indxs)
+
+ # read reset state
+ self._read_root_state_from_robot(robot_name=robot_name,
+ env_indxs=env_indxs)
+ self._read_jnts_state_from_robot(robot_name=robot_name,
+ env_indxs=env_indxs)
+
+ if self._jnt_vel_filter[robot_name] is not None:
+ self._jnt_vel_filter[robot_name].reset(idxs=env_indxs)
+
+ if acquire_offsets:
+ self._update_root_offsets(robot_name=robot_name,
+ env_indxs=env_indxs)
+
+ if reset_cluster: # reset controllers remotely
+ reset_ok=self._reset_cluster(env_indxs=env_indxs,
+ robot_name=robot_name,
+ reset_cluster_counter=reset_cluster_counter)
+ if not reset_ok:
+ return False
+
+ return True
+
+ def _randomize_yaw(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ root_q_default = self._root_q_default[robot_name]
+ if env_indxs is None:
+ env_indxs = torch.arange(root_q_default.shape[0])
+
+ num_indices = env_indxs.shape[0]
+ yaw_angles = torch.rand((num_indices,),
+ device=root_q_default.device) * 2 * torch.pi # uniformly distributed random angles
+
+ # Compute cos and sin once
+ cos_half = torch.cos(yaw_angles / 2)
+ root_q_default[env_indxs, :] = torch.stack((cos_half,
+ torch.zeros_like(cos_half),
+ torch.zeros_like(cos_half),
+ torch.sin(yaw_angles / 2)), dim=1).reshape(num_indices, 4)
+
+ def _deactivate(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ # deactivate jnt imp controllers for given robots and envs (makes the robot fall)
+ self._jnt_imp_controllers[robot_name].deactivate(robot_indxs=env_indxs)
+
+ def _n_contacts(self, robot_name: str) -> List[int]:
+ return self._num_contacts[robot_name]
+
+ def root_p(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None):
+
+ if env_idxs is None:
+ return self._root_p[robot_name]
+ else:
+ return self._root_p[robot_name][env_idxs, :]
+
+ def root_p_rel(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None):
+
+ if env_idxs is None:
+ rel_pos = torch.sub(self.root_p(robot_name=robot_name),
+ self._root_pos_offsets[robot_name])
+ else:
+ rel_pos = torch.sub(self.root_p(robot_name=robot_name,
+ env_idxs=env_idxs),
+ self._root_pos_offsets[robot_name][env_idxs, :])
+ return rel_pos
+
+ def root_q(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None):
+
+ if env_idxs is None:
+ return self._root_q[robot_name]
+ else:
+ return self._root_q[robot_name][env_idxs, :]
+
+ def root_q_rel(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None):
+
+ if env_idxs is None:
+ return quaternion_difference(self._root_q_offsets[robot_name],
+ self.root_q(robot_name=robot_name))
+ rel_q = quaternion_difference(self._root_q_offsets[robot_name][env_idxs, :],
+ self.root_q(robot_name=robot_name,
+ env_idxs=env_idxs))
+ return rel_q
+
+ def _quat_to_yaw_wxyz(self, q: torch.Tensor, out: torch.Tensor = None):
+ # Quaternion convention is w, x, y, z.
+ w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3]
+ num = 2.0 * (w * z + x * y)
+ den = 1.0 - 2.0 * (y * y + z * z)
+ if out is None:
+ return torch.atan2(num, den)
+ return torch.atan2(num, den, out=out)
+
+ def _yaw_to_quat_wxyz(self, yaw: torch.Tensor, like_q: torch.Tensor,
+ out: torch.Tensor = None):
+ q = out
+ if q is None:
+ q = torch.zeros((yaw.shape[0], 4), dtype=like_q.dtype, device=like_q.device)
+ else:
+ q.zero_()
+ q[:, 0] = torch.cos(yaw / 2.0)
+ q[:, 3] = torch.sin(yaw / 2.0)
+ return q
+
+ def _quat_conjugate_wxyz(self, q: torch.Tensor, out: torch.Tensor = None):
+ qi = out
+ if qi is None:
+ qi = torch.empty_like(q)
+ qi[:, :] = q
+ qi[:, 1:] = -qi[:, 1:]
+ return qi
+
+ def _quat_multiply_wxyz(self, q1: torch.Tensor, q2: torch.Tensor,
+ out: torch.Tensor = None):
+ q_out = out
+ if q_out is None:
+ q_out = torch.empty_like(q1)
+ w1, x1, y1, z1 = q1[:, 0], q1[:, 1], q1[:, 2], q1[:, 3]
+ w2, x2, y2, z2 = q2[:, 0], q2[:, 1], q2[:, 2], q2[:, 3]
+ q_out[:, 0] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+ q_out[:, 1] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+ q_out[:, 2] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+ q_out[:, 3] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+ return q_out
+
+ def _normalize_quat_wxyz(self, q: torch.Tensor, out: torch.Tensor = None):
+ q_norm = out
+ if q_norm is None:
+ q_norm = torch.empty_like(q)
+ q_norm[:, :] = q
+ q_norm /= torch.clamp(torch.norm(q_norm, dim=1, keepdim=True), min=1e-9)
+ return q_norm
+
+ def root_q_yaw_rel(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None):
+
+ # Return quaternion with startup yaw removed while preserving current pitch/roll.
+ if env_idxs is None:
+ ws = self._root_q_yaw_rel_ws[robot_name]
+ q_abs = self._root_q[robot_name]
+ yaw_start = self._root_q_offsets_yaw[robot_name]
+
+ self._normalize_quat_wxyz(q=q_abs, out=ws["q_abs_unit"])
+ self._quat_to_yaw_wxyz(q=ws["q_abs_unit"], out=ws["yaw_abs"])
+
+ torch.sub(ws["yaw_abs"], yaw_start, out=ws["yaw_rel"])
+ torch.sin(ws["yaw_rel"], out=ws["yaw_sin"])
+ torch.cos(ws["yaw_rel"], out=ws["yaw_cos"])
+ torch.atan2(ws["yaw_sin"], ws["yaw_cos"], out=ws["yaw_rel"])
+
+ # Build pure-yaw quaternions for:
+ # 1) the current absolute heading and 2) the startup-relative heading.
+ self._yaw_to_quat_wxyz(yaw=ws["yaw_abs"], like_q=ws["q_abs_unit"], out=ws["q_yaw_abs"])
+ self._yaw_to_quat_wxyz(yaw=ws["yaw_rel"], like_q=ws["q_abs_unit"], out=ws["q_yaw_rel"])
+
+ # Isolate pitch/roll by removing the absolute yaw from the current orientation.
+ # For unit quaternions q_pr = q_yaw_abs^{-1} * q_abs.
+ self._quat_conjugate_wxyz(q=ws["q_yaw_abs"], out=ws["q_yaw_abs_conj"])
+ self._quat_multiply_wxyz(q1=ws["q_yaw_abs_conj"], q2=ws["q_abs_unit"], out=ws["q_pr"])
+ # Recompose orientation with relative yaw + current pitch/roll.
+ self._quat_multiply_wxyz(q1=ws["q_yaw_rel"], q2=ws["q_pr"], out=ws["q_rel"])
+
+ return self._normalize_quat_wxyz(q=ws["q_rel"], out=ws["q_rel"])
+
+ q_abs = self.root_q(robot_name=robot_name, env_idxs=env_idxs)
+ q_abs = self._normalize_quat_wxyz(q=q_abs, out=q_abs)
+
+ yaw_abs = self._quat_to_yaw_wxyz(q_abs)
+ yaw_start = self._root_q_offsets_yaw[robot_name][env_idxs]
+ yaw_rel = yaw_abs - yaw_start
+ yaw_rel = torch.atan2(torch.sin(yaw_rel), torch.cos(yaw_rel))
+
+ q_yaw_abs = self._yaw_to_quat_wxyz(yaw_abs, like_q=q_abs)
+ q_yaw_rel = self._yaw_to_quat_wxyz(yaw_rel, like_q=q_abs)
+ q_pr = self._quat_multiply_wxyz(self._quat_conjugate_wxyz(q_yaw_abs), q_abs)
+ q_rel = self._quat_multiply_wxyz(q_yaw_rel, q_pr)
+
+ return self._normalize_quat_wxyz(q_rel)
+
+ def root_v(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None,
+ base_loc: bool = True):
+
+ root_v=self._root_v[robot_name]
+ if base_loc:
+ root_v=self._root_v_base_loc[robot_name]
+ if env_idxs is None:
+ return root_v
+ else:
+ return root_v[env_idxs, :]
+
+ def root_omega(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None,
+ base_loc: bool = True):
+
+ root_omega=self._root_omega[robot_name]
+ if base_loc:
+ root_omega=self._root_omega_base_loc[robot_name]
+ if env_idxs is None:
+ return root_omega
+ else:
+ return root_omega[env_idxs, :]
+
+ def root_a(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None,
+ base_loc: bool = True):
+
+ root_a=self._root_a[robot_name]
+ if base_loc:
+ root_a=self._root_a_base_loc[robot_name]
+ if env_idxs is None:
+ return root_a
+ else:
+ return root_a[env_idxs, :]
+
+ def root_alpha(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None,
+ base_loc: bool = True):
+
+ root_alpha=self._root_alpha[robot_name]
+ if base_loc:
+ root_alpha=self._root_alpha_base_loc[robot_name]
+ if env_idxs is None:
+ return root_alpha
+ else:
+ return root_alpha[env_idxs, :]
+
+ def gravity(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None,
+ base_loc: bool = True):
+
+ gravity_loc=self._gravity_normalized[robot_name]
+ if base_loc:
+ gravity_loc=self._gravity_normalized_base_loc[robot_name]
+ if env_idxs is None:
+ return gravity_loc
+ else:
+ return gravity_loc[env_idxs, :]
+
+ def jnts_q(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None):
+
+ if env_idxs is None:
+ return self._jnts_q[robot_name]
+ else:
+ return self._jnts_q[robot_name][env_idxs, :]
+
+ def jnts_v(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None):
+
+ if env_idxs is None:
+ return self._jnts_v[robot_name]
+ else:
+ return self._jnts_v[robot_name][env_idxs, :]
+
+ def jnts_eff(self,
+ robot_name: str,
+ env_idxs: torch.Tensor = None): # (measured) efforts
+
+ if env_idxs is None:
+ return self._jnts_eff[robot_name]
+ else:
+ return self._jnts_eff[robot_name][env_idxs, :]
+
+ def _wait_for_remote_step_req(self,
+ robot_name: str):
+ if not self._remote_steppers[robot_name].wait(self._timeout):
+ Journal.log(self.__class__.__name__,
+ "_wait_for_remote_step_req",
+ "Didn't receive any remote step req within timeout!",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ return False
+ return True
+
+ def _process_remote_reset_req(self,
+ robot_name: str):
+
+ if not self._remote_resetters[robot_name].wait(self._timeout):
+ Journal.log(self.__class__.__name__,
+ "_process_remote_reset_req",
+ "Didn't receive any remote reset req within timeout!",
+ LogType.EXCEP,
+ throw_when_excep = False)
+ return False
+
+ reset_requests = self._remote_reset_requests[robot_name]
+ reset_requests.synch_all(read=True, retry=True) # read reset requests from shared mem
+ to_be_reset = reset_requests.to_be_reset(gpu=self._use_gpu)
+ if to_be_reset is not None:
+ reset_ok=self._reset(env_indxs=to_be_reset,
+ robot_name=robot_name,
+ reset_cluster=True,
+ reset_cluster_counter=False,
+ randomize=True)
+ if not reset_ok:
+ return False
+ self._set_startup_jnt_imp_gains(robot_name=robot_name,
+ env_indxs=to_be_reset) # set gains to startup config (usually lower gains)
+
+ control_cluster = self.cluster_servers[robot_name]
+ control_cluster.activate_controllers(idxs=to_be_reset) # activate controllers
+ # (necessary if failed)
+
+ self._remote_resetters[robot_name].ack() # signal reset performed
+
+ return True
+
+ def _update_jnt_imp_cntrl_shared_data(self):
+ if self._debug:
+ for i in range(0, len(self._robot_names)):
+ robot_name = self._robot_names[i]
+ # updating all the jnt impedance data - > this may introduce some overhead
+ imp_data = self._jnt_imp_cntrl_shared_data[robot_name].imp_data_view
+ # set data
+ imp_data.set(data_type="pos_err",
+ data=self._jnt_imp_controllers[robot_name].pos_err(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="vel_err",
+ data=self._jnt_imp_controllers[robot_name].vel_err(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="pos_gains",
+ data=self._jnt_imp_controllers[robot_name].pos_gains(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="vel_gains",
+ data=self._jnt_imp_controllers[robot_name].vel_gains(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="eff_ff",
+ data=self._jnt_imp_controllers[robot_name].eff_ref(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="pos",
+ data=self._jnt_imp_controllers[robot_name].pos(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="pos_ref",
+ data=self._jnt_imp_controllers[robot_name].pos_ref(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="vel",
+ data=self._jnt_imp_controllers[robot_name].vel(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="vel_ref",
+ data=self._jnt_imp_controllers[robot_name].vel_ref(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="eff",
+ data=self._jnt_imp_controllers[robot_name].eff(),
+ gpu=self._use_gpu)
+ imp_data.set(data_type="imp_eff",
+ data=self._jnt_imp_controllers[robot_name].imp_eff(),
+ gpu=self._use_gpu)
+ # copy from GPU to CPU if using gpu
+ if self._use_gpu:
+ imp_data.synch_mirror(from_gpu=True,non_blocking=True)
+ # even if it's from GPU->CPu we can use non-blocking since it's just for db
+ # purposes
+ # write copies to shared memory
+ imp_data.synch_all(read=False, retry=False)
+
+ def _set_startup_jnt_imp_gains(self,
+ robot_name:str,
+ env_indxs: torch.Tensor = None):
+
+ startup_p_gains=self._jnt_imp_controllers[robot_name].startup_p_gains()
+ startup_d_gains=self._jnt_imp_controllers[robot_name].startup_d_gains()
+
+ if env_indxs is not None:
+ self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs,
+ pos_gains=startup_p_gains[env_indxs, :],
+ vel_gains=startup_d_gains[env_indxs, :])
+ else:
+ self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs,
+ pos_gains=startup_p_gains[:, :],
+ vel_gains=startup_d_gains[:, :])
+
+ def _write_state_to_jnt_imp(self,
+ robot_name: str):
+
+ # always update ,imp. controller internal state (jnt imp control is supposed to be
+ # always running)
+ self._jnt_imp_controllers[robot_name].update_state(pos=self.jnts_q(robot_name=robot_name),
+ vel = self.jnts_v(robot_name=robot_name),
+ eff = self.jnts_eff(robot_name=robot_name))
+
+ def _set_cluster_actions(self,
+ robot_name):
+ control_cluster = self.cluster_servers[robot_name]
+ actions=control_cluster.get_actions()
+ active_controllers=control_cluster.get_active_controllers(gpu=self._use_gpu)
+
+ if active_controllers is not None:
+ self._jnt_imp_controllers[robot_name].set_refs(
+ pos_ref=actions.jnts_state.get(data_type="q", gpu=self._use_gpu)[active_controllers, :],
+ vel_ref=actions.jnts_state.get(data_type="v", gpu=self._use_gpu)[active_controllers, :],
+ eff_ref=actions.jnts_state.get(data_type="eff", gpu=self._use_gpu)[active_controllers, :],
+ robot_indxs=active_controllers)
+
+ def _jnt_imp_reset_overrride(self, robot_name:str):
+ # to be overriden
+ pass
+
+ def _apply_cmds_to_jnt_imp_control(self, robot_name:str):
+
+ self._jnt_imp_controllers[robot_name].apply_cmds()
+
+ def _update_root_offsets(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ if self._debug:
+ for_robots = ""
+ if env_indxs is not None:
+ if not isinstance(env_indxs, torch.Tensor):
+ msg = "Provided env_indxs should be a torch tensor of indexes!"
+ Journal.log(self.__class__.__name__,
+ "update_root_offsets",
+ msg,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if self._use_gpu:
+ if not env_indxs.device.type == "cuda":
+ error = "Provided env_indxs should be on GPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ else:
+ if not env_indxs.device.type == "cpu":
+ error = "Provided env_indxs should be on CPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist())
+ if self._verbose:
+ Journal.log(self.__class__.__name__,
+ "update_root_offsets",
+ f"updating root offsets " + for_robots,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ # only planar position used
+ if env_indxs is None:
+ self._root_pos_offsets[robot_name][:, 0:2] = self._root_p[robot_name][:, 0:2]
+ self._normalize_quat_wxyz(q=self._root_q[robot_name], out=self._root_q_offsets[robot_name])
+ self._quat_to_yaw_wxyz(q=self._root_q_offsets[robot_name],
+ out=self._root_q_offsets_yaw[robot_name])
+
+ else:
+ self._root_pos_offsets[robot_name][env_indxs, 0:2] = self._root_p[robot_name][env_indxs, 0:2]
+ q_root_norm=self._normalize_quat_wxyz(self._root_q[robot_name][env_indxs, :])
+ self._root_q_offsets[robot_name][env_indxs, :] = q_root_norm
+ self._root_q_offsets_yaw[robot_name][env_indxs] = self._quat_to_yaw_wxyz(q=q_root_norm)
+
+ def _reset_jnt_imp_control(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ if self._debug:
+ for_robots = ""
+ if env_indxs is not None:
+ if not isinstance(env_indxs, torch.Tensor):
+ Journal.log(self.__class__.__name__,
+ "reset_jnt_imp_control",
+ "Provided env_indxs should be a torch tensor of indexes!",
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if self._use_gpu:
+ if not env_indxs.device.type == "cuda":
+ error = "Provided env_indxs should be on GPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ else:
+ if not env_indxs.device.type == "cpu":
+ error = "Provided env_indxs should be on CPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs)
+
+ if self._verbose:
+ Journal.log(self.__class__.__name__,
+ "reset_jnt_imp_control",
+ f"resetting joint impedances " + for_robots,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ # resets all internal data, refs to defaults
+ self._jnt_imp_controllers[robot_name].reset(robot_indxs=env_indxs)
+
+ #restore jnt imp refs to homing
+ if env_indxs is None:
+ self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[:, :],
+ robot_indxs = None)
+ else:
+ self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[env_indxs, :],
+ robot_indxs = env_indxs)
+
+ # self._write_state_to_jnt_imp(robot_name=robot_name)
+ # actually applies reset commands to the articulation
+ self._write_state_to_jnt_imp(robot_name=robot_name)
+ self._jnt_imp_reset_overrride(robot_name=robot_name)
+ self._apply_cmds_to_jnt_imp_control(robot_name=robot_name)
+
+ def _synch_default_root_states(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+
+ if self._debug:
+ for_robots = ""
+ if env_indxs is not None:
+ if not isinstance(env_indxs, torch.Tensor):
+ msg = "Provided env_indxs should be a torch tensor of indexes!"
+ Journal.log(self.__class__.__name__,
+ "synch_default_root_states",
+ msg,
+ LogType.EXCEP,
+ throw_when_excep = True)
+ if self._use_gpu:
+ if not env_indxs.device.type == "cuda":
+ error = "Provided env_indxs should be on GPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ else:
+ if not env_indxs.device.type == "cpu":
+ error = "Provided env_indxs should be on CPU!"
+ Journal.log(self.__class__.__name__,
+ "_step_jnt_imp_control",
+ error,
+ LogType.EXCEP,
+ True)
+ for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist())
+ if self._verbose:
+ Journal.log(self.__class__.__name__,
+ "synch_default_root_states",
+ f"updating default root states " + for_robots,
+ LogType.STAT,
+ throw_when_excep = True)
+
+ if env_indxs is None:
+ self._root_p_default[robot_name][:, :] = self._root_p[robot_name]
+ self._root_q_default[robot_name][:, :] = self._root_q[robot_name]
+ else:
+ self._root_p_default[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :]
+ self._root_q_default[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :]
+
+ def _generate_rob_descriptions(self,
+ robot_name: str,
+ urdf_path: str,
+ srdf_path: str):
+
+ custom_xacro_args=extract_custom_xacro_args(self._env_opts)
+ Journal.log(self.__class__.__name__,
+ "_generate_rob_descriptions",
+ "generating URDF for robot "+ f"{robot_name}, from URDF {urdf_path}...",
+ LogType.STAT,
+ throw_when_excep = True)
+ xrdf_cmds=self._xrdf_cmds(robot_name=robot_name)
+ xrdf_cmds=merge_xacro_cmds(prev_cmds=xrdf_cmds,
+ new_cmds=custom_xacro_args)
+ self._urdf_dump_paths[robot_name]=generate_urdf(robot_name=robot_name,
+ xacro_path=urdf_path,
+ dump_path=self._descr_dump_path,
+ xrdf_cmds=xrdf_cmds)
+ Journal.log(self.__class__.__name__,
+ "_generate_rob_descriptions",
+ "generating SRDF for robot "+ f"{robot_name}, from SRDF {srdf_path}...",
+ LogType.STAT,
+ throw_when_excep = True)
+ # we also generate SRDF files, which are useful for control
+ self._srdf_dump_paths[robot_name]=generate_srdf(robot_name=robot_name,
+ xacro_path=srdf_path,
+ dump_path=self._descr_dump_path,
+ xrdf_cmds=xrdf_cmds)
+
+ def _xrdf_cmds(self, robot_name:str):
+ urdfpath=self._robot_urdf_paths[robot_name]
+ # we assume directory tree of the robot package is like
+ # robot-ros-pkg/robot_urdf/urdf/robot.urdf.xacro
+ parts = urdfpath.split('/')
+ urdf_descr_root_path = '/'.join(parts[:-2])
+ cmds = get_xrdf_cmds(urdf_descr_root_path=urdf_descr_root_path)
+ return cmds
+
+ @abstractmethod
+ def current_tstep(self) -> int:
+ pass
+
+ @abstractmethod
+ def world_time(self, robot_name: str) -> float:
+ return self.cluster_sim_step_counters[robot_name]*self.physics_dt()
+
+ @abstractmethod
+ def is_running(self) -> bool:
+ pass
+
+ @abstractmethod
+ def _get_contact_f(self,
+ robot_name: str,
+ contact_link: str,
+ env_indxs: torch.Tensor) -> torch.Tensor:
+ return None
+
+ @abstractmethod
+ def physics_dt(self) -> float:
+ pass
+
+ @abstractmethod
+ def rendering_dt(self) -> float:
+ pass
+
+ @abstractmethod
+ def set_physics_dt(self, physics_dt:float):
+ pass
+
+ @abstractmethod
+ def set_rendering_dt(self, rendering_dt:float):
+ pass
+
+ @abstractmethod
+ def _robot_jnt_names(self, robot_name: str) -> List[str]:
+ pass
+
+ @abstractmethod
+ def _read_root_state_from_robot(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+ # IMPORTANT: Child interfaces should provide root quaternions in w, x, y, z convention.
+ pass
+
+ @abstractmethod
+ def _read_jnts_state_from_robot(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None):
+ pass
+
+ @abstractmethod
+ def _init_robots_state(self):
+ pass
+
+ @abstractmethod
+ def _reset_state(self,
+ robot_name: str,
+ env_indxs: torch.Tensor = None,
+ randomize: bool = False):
+ pass
+
+ @abstractmethod
+ def _init_world(self):
+ pass
+
+ @abstractmethod
+ def _reset_sim(self) -> None:
+ pass
+
+ @abstractmethod
+ def _set_jnts_to_homing(self, robot_name: str):
+ pass
+
+ @abstractmethod
+ def _set_root_to_defconfig(self, robot_name: str):
+ pass
+
+ @abstractmethod
+ def _parse_env_opts(self):
+ pass
+
+ @abstractmethod
+ def _pre_setup(self):
+ pass
+
+ @abstractmethod
+ def _generate_jnt_imp_control(self) -> JntImpCntrlChild:
+ pass
+
+ @abstractmethod
+ def _render_sim(self, mode:str="human") -> None:
+ pass
+
+ @abstractmethod
+ def _close(self) -> None:
+ pass
+
+ @abstractmethod
+ def _step_world(self) -> None:
+ pass
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/xbot2_basic.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/xbot2_basic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ade37f40e9eeccec3b1c6dea4e50b2bf84caec7b
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/xbot2_basic.yaml
@@ -0,0 +1,48 @@
+XBotInterface:
+ urdf_path: $PWD/kyon.urdf
+ srdf_path: $PWD/kyon.srdf
+
+ModelInterface:
+ model_type: RBDL
+ is_model_floating_base: true
+
+# defaults
+motor_pd:
+ hip_roll*: [500, 10]
+ hip_pitch*: [500, 10]
+ knee_pitch*: [500, 10]
+ wheel_joint*: [0, 30]
+
+# startup pd config
+startup_motor_pd:
+ hip_roll*: [200, 30]
+ hip_pitch*: [200, 30]
+ knee_pitch*: [200, 30]
+ wheel_joint*: [0, 30]
+
+# hal
+xbotcore_device_configs:
+ sim: $PWD/hal/kyon_gz.yaml
+ dummy: $PWD/hal/kyon_dummy.yaml
+
+# threads
+xbotcore_threads:
+ rt_main: {sched: fifo , prio: 60, period: 0.001}
+ nrt_main: {sched: other, prio: 0 , period: 0.005}
+
+# plugins
+xbotcore_plugins:
+
+ homing:
+ thread: rt_main
+ type: homing
+
+ ros_io: {thread: nrt_main, type: ros_io}
+
+ ros_control: {thread: nrt_main, type: ros_control, params: {autostart: {type: bool, value: true}}}
+
+# global parameters
+xbotcore_param:
+ /xbot/hal/joint_safety/filter_autostart: {value: true, type: bool}
+ /xbot/hal/joint_safety/filter_cutoff_hz: {value: 2.0, type: double}
+ /xbot/hal/enable_safety: {value: false, type: bool}
\ No newline at end of file
diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/metadata.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f016635e773c4d6e9f6796aaee342efefde1841
--- /dev/null
+++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/metadata.yaml
@@ -0,0 +1,74 @@
+rosbag2_bagfile_information:
+ version: 5
+ storage_identifier: sqlite3
+ duration:
+ nanoseconds: 89899902344
+ starting_time:
+ nanoseconds_since_epoch: 49804687
+ message_count: 116015
+ topics_with_message_count:
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_robot_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14501
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14501
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14502
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_contacts
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14502
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14502
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_hl_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14502
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_robot_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14502
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_heightmap
+ type: visualization_msgs/msg/Marker
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 14502
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_HandShake
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 1
+ compression_format: zstd
+ compression_mode: FILE
+ relative_file_paths:
+ - rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd
+ files:
+ - path: rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3
+ starting_time:
+ nanoseconds_since_epoch: 49804687
+ duration:
+ nanoseconds: 89899902344
+ message_count: 116015
\ No newline at end of file
diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..a96f1e57621af4c692f8adf95c2b0fee1881d283
--- /dev/null
+++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:035860e2f2276589edddabd0c2112bc5058cf078fb93c939352f6106dde04369
+size 5205351
diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/metadata.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1eb44b52ed33927dacdeab7262228b997fc20b4
--- /dev/null
+++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/metadata.yaml
@@ -0,0 +1,68 @@
+rosbag2_bagfile_information:
+ version: 5
+ storage_identifier: sqlite3
+ duration:
+ nanoseconds: 89960205078
+ starting_time:
+ nanoseconds_since_epoch: 40039062
+ message_count: 114220
+ topics_with_message_count:
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_robot_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 16317
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_robot_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 16317
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 16317
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 16317
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_contacts
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 16317
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 16317
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_hl_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 16317
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_HandShake
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 1
+ compression_format: zstd
+ compression_mode: FILE
+ relative_file_paths:
+ - rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd
+ files:
+ - path: rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3
+ starting_time:
+ nanoseconds_since_epoch: 40039062
+ duration:
+ nanoseconds: 89960205078
+ message_count: 114220
\ No newline at end of file
diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3 b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3
new file mode 100644
index 0000000000000000000000000000000000000000..da0649b6341767e8ea8fb31dcddb4c61f8370856
--- /dev/null
+++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15d925353f9f409466fee90dbd502eb6f19f089a2f1035a53b168b0786c641e4
+size 39010304
diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..385acbea8c65090a30584ea7841ff7ce9ac7463e
--- /dev/null
+++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9052d81fd0fa7a91866b7860bd22c2f3876a6c75adc93dff5fb0f87374c9bf65
+size 2238099
diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/metadata.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1d7119b777a73c216216399651751589d831f31
--- /dev/null
+++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/metadata.yaml
@@ -0,0 +1,68 @@
+rosbag2_bagfile_information:
+ version: 5
+ storage_identifier: sqlite3
+ duration:
+ nanoseconds: 89959960938
+ starting_time:
+ nanoseconds_since_epoch: 40039062
+ message_count: 120835
+ topics_with_message_count:
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_robot_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 17262
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_robot_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 17262
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 17262
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_q
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 17262
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_contacts
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 17262
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_actuated_jointnames
+ type: std_msgs/msg/String
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 17262
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_hl_refs
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 17262
+ - topic_metadata:
+ name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_HandShake
+ type: std_msgs/msg/Float64MultiArray
+ serialization_format: cdr
+ offered_qos_profiles: "- history: 3\n depth: 0\n reliability: 1\n durability: 1\n deadline:\n sec: 9223372036\n nsec: 854775807\n lifespan:\n sec: 9223372036\n nsec: 854775807\n liveliness: 1\n liveliness_lease_duration:\n sec: 9223372036\n nsec: 854775807\n avoid_ros_namespace_conventions: false"
+ message_count: 1
+ compression_format: zstd
+ compression_mode: FILE
+ relative_file_paths:
+ - rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd
+ files:
+ - path: rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3
+ starting_time:
+ nanoseconds_since_epoch: 40039062
+ duration:
+ nanoseconds: 89959960938
+ message_count: 120835
\ No newline at end of file
diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..d277e22eee06530a112d409061f64d97d6e9217c
--- /dev/null
+++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aebb896e85c9a107c8e248d5dcde3ff446baeeb7324380315d15299dc54c067e
+size 2349264