diff --git a/.gitattributes b/.gitattributes
index fc5a340abcf2e5202e33ecd96c02b09d67df318b..d4f40a3bb073a205f61f920e4d257f3de68ce09f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -32,6 +32,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
+*.db3 filter=lfs diff=lfs merge=lfs -text
+*.db3.zstd filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 *_model filter=lfs diff=lfs merge=lfs -text
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5935983ef50bbc0c982d34eefeb6ed796182b215
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc.py
@@ -0,0 +1,108 @@
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc
+
+import numpy as np
+
+from typing import Dict 
+
+from kyonrlstepping.utils.sysutils import PathsGetter
+
+class B2WRhc(HybridQuadRhc):
+
+    def __init__(self, 
+            srdf_path: str,
+            urdf_path: str,
+            robot_name: str, # used for shared memory namespaces
+            codegen_dir: str,
+            n_nodes: float = 31,
+            dt: float = 0.03,
+            injection_node: int = 10,
+            max_solver_iter = 1, # defaults to rt-iteration
+            open_loop: bool = True,
+            close_loop_all: bool = False,
+            dtype = np.float32, 
+            verbose = False, 
+            debug = False,
+            refs_in_hor_frame = True,
+            timeout_ms: int = 60000,
+            custom_opts: Dict = {}):
+
+        paths = PathsGetter()
+        self._files_suffix=""
+        if open_loop:
+            self._files_suffix="_open"
+        config_path=paths.RHCCONFIGPATH_B2W_CONTINUOUS+self._files_suffix+".yaml"
+        
+        self._fix_wheels=False
+
+        super().__init__(srdf_path=srdf_path,
+            urdf_path=urdf_path,
+            config_path=config_path,
+            robot_name=robot_name, # used for shared memory namespaces
+            codegen_dir=codegen_dir, 
+            n_nodes=n_nodes,
+            dt=dt,
+            injection_node=injection_node,
+            max_solver_iter=max_solver_iter, # defaults to rt-iteration
+            open_loop=open_loop,
+            close_loop_all=close_loop_all,
+            dtype=dtype,
+            verbose=verbose, 
+            debug=debug,
+            refs_in_hor_frame=refs_in_hor_frame,
+            timeout_ms=timeout_ms,
+            custom_opts=custom_opts)
+        
+        self._fail_idx_scale=1e-9
+        self._fail_idx_thresh_open_loop=1e0
+        self._fail_idx_thresh_closed_loop=10
+
+        if open_loop:
+            self._fail_idx_thresh=self._fail_idx_thresh_open_loop
+        else:
+            self._fail_idx_thresh=self._fail_idx_thresh_closed_loop
+
+        # adding some additional config files for jnt imp control
+        self._rhc_fpaths.append(paths.JNT_IMP_CONFIG_XBOT+".yaml")
+        self._rhc_fpaths.append(paths.JNT_IMP_CONFIG+".yaml")
+            
+    def _set_rhc_pred_idx(self):
+        self._pred_node_idx=round((self._n_nodes-1)*2/3)
+    
+    def _set_rhc_cmds_idx(self):
+        self._rhc_cmds_node_idx=2
+
+    def _config_override(self):
+        paths = PathsGetter()
+        # if ("control_wheels" in self._custom_opts) and \
+        #     ("false" in self._custom_opts["control_wheels"] or \
+        #     "False" in self._custom_opts["control_wheels"]):
+        #     self.config_path = paths.RHCCONFIGPATH_B2W_NO_WHEELS+self._files_suffix+".yaml"
+        #     self._fix_wheels=True            
+            
+    def _init_problem(self):
+        
+        fixed_jnts_patterns=None
+        wheels_patterns=["foot_joint"]
+        if self._fix_wheels:
+            fixed_jnts_patterns=[]
+            fixed_jnts_patterns.append("foot_joint")
+            wheels_patterns=None
+
+        flight_duration_sec=0.4 # [s]
+        flight_duration=int(flight_duration_sec/self._dt)
+        post_flight_duration_sec=0.2 # [s]
+        post_flight_duration=int(post_flight_duration_sec/self._dt)
+        
+        step_height=0.15
+        if ("step_height" in self._custom_opts):
+            step_height=self._custom_opts["step_height"]
+
+        super()._init_problem(fixed_jnt_patterns=fixed_jnts_patterns,
+            wheels_patterns=wheels_patterns,
+            foot_linkname="FL_foot",
+            flight_duration=flight_duration,
+            post_flight_stance=post_flight_duration,
+            step_height=step_height,
+            keep_yaw_vert=False,
+            phase_force_reg=2e-2,
+            vel_bounds_weight=1.0)
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc_continuous.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc_continuous.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22018d950c1c754514e76107e24adbf7c8aac0f3
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/b2w_rhc_continuous.yaml
@@ -0,0 +1,335 @@
+solver:
+  type: ilqr
+  ipopt.linear_solver: ma57
+  ipopt.tol: 0.1
+#  ilqr.merit_der_threshold: 1e-3
+#  ilqr.defect_norm_threshold: 1e-3
+  ipopt.constr_viol_tol: 0.01
+  ilqr.constraint_violation_threshold: 1e-2
+#  ipopt.hessian_approximation: exact
+  ipopt.print_level: 5
+  ipopt.suppress_all_output: 'yes'
+  ipopt.sb: 'yes'
+  ilqr.suppress_all_output: 'yes'
+  ilqr.codegen_enabled: true
+  ilqr.codegen_workdir: /tmp/tyhio
+  ilqr.enable_gn: true
+  ilqr.hxx_reg_base: 0.0
+  ilqr.n_threads: 0
+  print_time: 0
+
+constraints:
+  - FL_foot_ball
+  - FR_foot_ball
+  - RR_foot_ball
+  - RL_foot_ball
+
+costs:
+  - z_FL_foot_ball
+  - z_FR_foot_ball
+  - z_RR_foot_ball
+  - z_RL_foot_ball
+  # - vz_FL_foot_ball
+  # - vz_FR_foot_ball
+  # - vz_RR_foot_ball
+  # - vz_RL_foot_ball
+  # - xy_FL_foot_ball
+  # - xy_FR_foot_ball
+  # - xy_RR_foot_ball
+  # - xy_RL_foot_ball
+  # - vxy_FL_foot_ball
+  # - vxy_FR_foot_ball
+  # - vxy_RR_foot_ball
+  # - vxy_RL_foot_ball
+  - base_lin_velxy
+  - base_lin_velz
+  - base_omega
+  # - base_horizontal_capture
+  - base_capture
+  - joint_posture_capture
+  - v_reg
+  - a_reg
+
+#closed loop
+.define:
+  - &w_base_omega 20
+  - &w_base_vxy 15
+  - &w_base_vz 35
+  - &w_base_z 5
+  - &w_base_hor_capture 200.
+  - &w_base_capture 200.
+  - &w_contact_z 400.0
+  - &w_contact_vz 1000.0
+  - &w_contact_xy 80.0
+  - &w_contact_vxy 80.0
+  - &wheel_radius 0.1125
+
+base_lin_velxy:
+  type: Cartesian
+  distal_link: base
+  indices: [0, 1]
+  nodes: ${range(1, N-5)}
+  cartesian_type: velocity
+  weight: *w_base_vxy
+
+base_lin_velz:
+  type: Cartesian
+  distal_link: base
+  indices: [2]
+  nodes: ${range(1, N-5)}
+  cartesian_type: velocity
+  weight: *w_base_vz
+
+base_omega:
+  type: Cartesian
+  distal_link: base
+  indices: [3, 4, 5]
+  nodes: ${range(1, N-5)}
+  cartesian_type: velocity
+  weight: *w_base_omega
+
+base_capture:
+  type: Cartesian
+  distal_link: base
+  indices: [0, 1, 2, 3, 4, 5]
+  nodes: ${range(N-5, N+1)}
+  cartesian_type: velocity
+  weight: *w_base_capture
+
+base_horizontal_capture:
+  type: Cartesian
+  distal_link: base_link
+  indices: [3, 4]
+  nodes: ${range(N-5, N+1)}
+  # cartesian_type: position
+  weight: *w_base_hor_capture
+
+# ==================================
+rolling_contact_FL_foot:
+  type: Rolling
+  frame: FL_foot
+  radius: *wheel_radius
+
+rolling_contact_FR_foot:
+  type: Rolling
+  frame: FR_foot
+  radius: *wheel_radius
+
+rolling_contact_RR_foot:
+  type: Rolling
+  frame: RR_foot
+  radius: *wheel_radius
+
+rolling_contact_RL_foot:
+  type: Rolling
+  frame: RL_foot
+  radius: *wheel_radius
+
+# ==================================
+
+interaction_FL_foot:
+  type: VertexForce
+  frame: FL_foot_ball
+  fn_min: 50
+  enable_fc: true
+  friction_coeff: 0.2
+  vertex_frames:
+    - FL_foot
+
+interaction_FR_foot:
+  type: VertexForce
+  frame: FR_foot_ball
+  fn_min: 50
+  enable_fc: true
+  friction_coeff: 0.2
+  vertex_frames:
+    - FR_foot
+
+interaction_RR_foot:
+  type: VertexForce
+  frame: RR_foot_ball
+  fn_min: 50
+  enable_fc: true
+  friction_coeff: 0.2
+  vertex_frames:
+    - RR_foot
+
+interaction_RL_foot:
+  type: VertexForce
+  frame: RL_foot_ball
+  fn_min: 50
+  enable_fc: true
+  friction_coeff: 0.2
+  vertex_frames:
+    - RL_foot
+
+FL_foot_ball:
+  type: Contact
+  subtask: [interaction_FL_foot, rolling_contact_FL_foot]
+
+FR_foot_ball:
+  type: Contact
+  subtask: [interaction_FR_foot, rolling_contact_FR_foot]
+
+RR_foot_ball:
+  type: Contact
+  subtask: [interaction_RR_foot, rolling_contact_RR_foot]
+
+RL_foot_ball:
+  type: Contact
+  subtask: [interaction_RL_foot, rolling_contact_RL_foot]
+
+# when using continuous joints
+joint_posture_capture:
+  type: Postural
+  weight: [100.0, 80.0, 35.0,
+          100.0, 80.0, 35.0,
+          100.0, 80.0, 35.0,
+          100.0, 80.0, 35.0]
+  indices: [0, 1, 2,
+            5, 6, 7,
+            10, 11, 12,
+            15, 16, 17]
+  nodes: ${range(N-5, N+1)}
+
+v_reg:
+  type: Regularization
+  variable_name: v
+  nodes: ${range(0, N+1)}
+  indices: [0, 1, 2, 3, 4, 5,
+    6, 7, 8, 9,
+    10, 11, 12, 13,
+    14, 15, 16, 17,
+    18, 19, 20, 21]
+  weight: [3e-1, 3e-1, 3e-1, 3e-1, 3e-1, 3e-1,
+    6e-1, 6e-1, 6e-1, 8e-2,
+    6e-1, 6e-1, 6e-1, 8e-2,
+    6e-1, 6e-1, 6e-1, 8e-2,
+    6e-1, 6e-1, 6e-1, 8e-2]
+
+a_reg:
+  type: Regularization
+  variable_name: a
+  nodes: ${range(0, N+1)}
+  indices: [0, 1, 2, 3, 4, 5,
+    6, 7, 8, 9,
+    10, 11, 12, 13,
+    14, 15, 16, 17,
+    18, 19, 20, 21]
+  weight: [5e-1, 5e-1, 5e-1, 5e-1, 5e-1, 5e-1,
+    8e-2, 8e-2, 8e-2, 1e-1,
+    8e-2, 8e-2, 8e-2, 1e-1,
+    8e-2, 8e-2, 8e-2, 1e-1,
+    8e-2, 8e-2, 8e-2, 1e-1]
+    
+z_FL_foot_ball:
+  type: Cartesian
+  distal_link: FL_foot
+  indices: [2]
+  cartesian_type: position
+  weight: *w_contact_z
+
+z_FR_foot_ball:
+  type: Cartesian
+  distal_link: FR_foot
+  indices: [2]
+  cartesian_type: position
+  weight: *w_contact_z
+
+z_RR_foot_ball:
+  type: Cartesian
+  distal_link: RR_foot
+  indices: [2]
+  cartesian_type: position
+  weight: *w_contact_z
+
+z_RL_foot_ball:
+  type: Cartesian
+  distal_link: RL_foot
+  indices: [2]
+  cartesian_type: position
+  weight: *w_contact_z
+
+vz_FL_foot_ball:
+  type: Cartesian
+  distal_link: FL_foot
+  indices: [2]
+  cartesian_type: velocity
+  weight: *w_contact_vz
+
+vz_FR_foot_ball:
+  type: Cartesian
+  distal_link: FR_foot
+  indices: [2]
+  cartesian_type: velocity
+  weight: *w_contact_vz
+
+vz_RR_foot_ball:
+  type: Cartesian
+  distal_link: RR_foot
+  indices: [2]
+  cartesian_type: velocity
+  weight: *w_contact_vz
+
+vz_RL_foot_ball:
+  type: Cartesian
+  distal_link: RL_foot
+  indices: [2]
+  cartesian_type: velocity
+  weight: *w_contact_vz
+
+xy_FL_foot_ball:
+  type: Cartesian
+  distal_link: FL_foot
+  indices: [0, 1]
+  cartesian_type: position
+  weight: *w_contact_xy
+
+xy_FR_foot_ball:
+  type: Cartesian
+  distal_link: FR_foot
+  indices: [0, 1]
+  cartesian_type: position
+  weight: *w_contact_xy
+
+xy_RR_foot_ball:
+  type: Cartesian
+  distal_link: RR_foot
+  indices: [0, 1]
+  cartesian_type: position
+  weight: *w_contact_xy
+
+xy_RL_foot_ball:
+  type: Cartesian
+  distal_link: RL_foot
+  indices: [0, 1]
+  cartesian_type: position
+  weight: *w_contact_xy
+
+vxy_FL_foot_ball:
+  type: Cartesian
+  distal_link: FL_foot
+  indices: [0, 1]
+  cartesian_type: velocity
+  weight: *w_contact_vxy
+
+vxy_FR_foot_ball:
+  type: Cartesian
+  distal_link: FR_foot
+  indices: [0, 1]
+  cartesian_type: velocity
+  weight: *w_contact_vxy
+
+vxy_RR_foot_ball:
+  type: Cartesian
+  distal_link: RR_foot
+  indices: [0, 1]
+  cartesian_type: velocity
+  weight: *w_contact_vxy
+
+vxy_RL_foot_ball:
+  type: Cartesian
+  distal_link: RL_foot
+  indices: [0, 1]
+  cartesian_type: velocity
+  weight: *w_contact_vxy
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/bundle.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/bundle.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..552ab2939e273e6b594c11a328e81a1825dda834
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/bundle.yaml
@@ -0,0 +1,92 @@
+bundle_format: augmpc_model_bundle_v1
+bundle_name: d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv
+checkpoint_file: d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model
+preserved_training_cfgs:
+  - ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh
+framework:
+  repos:
+    adarl:
+      commit: 72c1a8eeb0723f86d1ba8af52a4777eed7677632
+      branch: master
+      remote: https://gitlab.com/crzz/adarl.git
+      dirty: false
+    AugMPC:
+      commit: 909ab6c0bab771ee6080e6d5bc43c142477084d6
+      branch: ibrido
+      remote: git@github.com:AndrePatri/AugMPC.git
+      dirty: true
+    AugMPCEnvs:
+      commit: 9785e0bd27939ebfdbfec112a29fee189371fb97
+      branch: ibrido
+      remote: git@github.com:AndrePatri/AugMPCEnvs.git
+      dirty: true
+    casadi:
+      commit: 79cebe3b416dee22abc87de0076b80a5b97bd345
+      branch: optional_float
+      remote: git@github.com:AndrePatri/casadi.git
+      dirty: false
+    CentauroHybridMPC:
+      commit: 9262cb617a6ef1b75887b806181f32ed2566b9af
+      branch: ibrido
+      remote: git@github.com:ADVRHumanoids/CentauroHybridMPC.git
+      dirty: false
+    EigenIPC:
+      commit: 7c1c3ecd08716e61ed164a6e0bb788aa716705ca
+      branch: devel
+      remote: git@github.com:AndrePatri/EigenIPC.git
+      dirty: false
+    horizon:
+      commit: 7f9e7bc89061fd1776d7578aca5566c0d401c150
+      branch: ibrido
+      remote: git@github.com:AndrePatri/horizon.git
+      dirty: false
+    IBRIDO:
+      commit: 2cb0ec5366b4f46cbb09933c35b91b9613f42071
+      branch: main
+      remote: git@github.com:AndrePatri/IBRIDO.git
+      dirty: false
+    iit-centauro-ros-pkg:
+      commit: 6069807ebb37a6d7df39430a02685e09ed9b167a
+      branch: ibrido_ros2
+      remote: git@github.com:AndrePatri/iit-centauro-ros-pkg.git
+      dirty: false
+    iit-dagana-ros-pkg:
+      commit: f7ecd6c84a0b7f3320c1b7de6449cbcd4445d2fe
+      branch: ibrido_ros2
+      remote: git@github.com:AndrePatri/iit-dagana-ros-pkg.git
+      dirty: false
+    iit-kyon-description:
+      commit: 50fd7c8909b3ddfd1ebbe67c61d6b775b86df6b1
+      branch: ibrido_ros2
+      remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git
+      dirty: false
+    iit-kyon-ros-pkg:
+      commit: 298917efffb63dbca540e0aedbd21b41bf393fbf
+      branch: ibrido_ros2_simple
+      remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git
+      dirty: false
+    KyonRLStepping:
+      commit: ffbb9e1db6ed501cce16043337ff0c0702a4164a
+      branch: ibrido
+      remote: git@github.com:ADVRHumanoids/KyonRLStepping.git
+      dirty: false
+    MPCHive:
+      commit: 45b4fc692850cef607020dda2e32fd708e7fff62
+      branch: devel
+      remote: git@github.com:AndrePatri/MPCHive.git
+      dirty: false
+    MPCViz:
+      commit: 806d03efcc9d8ab1fb04991a159c19ba89bfb85b
+      branch: ros2_humble
+      remote: git@github.com:AndrePatri/MPCViz.git
+      dirty: false
+    phase_manager:
+      commit: 9925d18c0d7a55d013cbaa4998c61d85a3a8944f
+      branch: ibrido
+      remote: git@github.com:AndrePatri/phase_manager.git
+      dirty: false
+    unitree_ros:
+      commit: e5904dc2f181981dfbb23d4287c6823a7f4afed4
+      branch: ibrido
+      remote: git@github.com:AndrePatri/unitree_ros.git
+      dirty: false
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model
new file mode 100644
index 0000000000000000000000000000000000000000..0a37e2718bb68aae0edb1af72f47e961278b30bc
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv_model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b2ac3966e2ada1097268085a9149415e96da2e3ad707245200dc27902db83ba
+size 1651565
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/fake_pos_tracking_env.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/fake_pos_tracking_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c5acd9248a46d3a9b2c3669fe43ae8ed502e6b6
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/fake_pos_tracking_env.py
@@ -0,0 +1,202 @@
+import os
+
+from typing import Dict
+
+import torch
+
+from EigenIPC.PyEigenIPC import VLevel
+
+from mpc_hive.utilities.math_utils_torch import world2base_frame
+
+from aug_mpc_envs.training_envs.twist_tracking_env import TwistTrackingEnv
+
+class FakePosTrackingEnv(TwistTrackingEnv):
+    """Converts random planar position goals into twist references so the agent learns to drive the robot toward targets while managing contact scheduling."""
+
+    def __init__(self,
+            namespace: str,
+            actions_dim: int = 10,
+            verbose: bool = False,
+            vlevel: VLevel = VLevel.V1,
+            use_gpu: bool = True,
+            dtype: torch.dtype = torch.float32,
+            debug: bool = True,
+            override_agent_refs: bool = False,
+            timeout_ms: int = 60000,
+            env_opts: Dict = {}):
+
+        self._add_env_opt(env_opts, "max_distance", default=10.0) # [m] 
+        self._add_env_opt(env_opts, "min_distance", default=0.0) # [m]
+        self._add_env_opt(env_opts, "max_vref", default=1.0) # [m/s]
+        self._add_env_opt(env_opts, "max_dp", default=5.0) # [m] after this, v ref saturates
+        self._add_env_opt(env_opts, "max_dt", default=env_opts["max_dp"]/ env_opts["max_vref"])
+
+        TwistTrackingEnv.__init__(self, 
+            namespace=namespace,
+            actions_dim=actions_dim, # twist + contact flags
+            verbose=verbose,
+            vlevel=vlevel,
+            use_gpu=use_gpu,
+            dtype=dtype,
+            debug=debug,
+            override_agent_refs=override_agent_refs,
+            timeout_ms=timeout_ms,
+            env_opts=env_opts)
+
+    def get_file_paths(self):
+        paths=TwistTrackingEnv.get_file_paths(self)
+        paths.append(os.path.abspath(__file__))        
+        return paths
+    
+    def _custom_post_init(self):
+        TwistTrackingEnv._custom_post_init(self)
+        
+        # position targets to be reached (wrt robot's pos at ep start)
+        self._p_trgt_w=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2].detach().clone()
+        self._p_delta_w=self._p_trgt_w.detach().clone()
+        self._dp_norm=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device)
+        self._dp_versor=self._p_trgt_w.detach().clone()
+
+        self._trgt_d=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device)
+        self._trgt_theta=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device)
+
+    def _update_loc_twist_refs(self):
+        # this is called at each env substep
+        
+        self._compute_twist_ref_w()
+    
+        if not self._override_agent_refs:
+            agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p",
+            gpu=self._use_gpu)
+            agent_p_ref_current[:, 0:2]=self._p_trgt_w
+
+        # then convert it to base ref local for the agent
+        robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+        # rotate agent ref from world to robot base
+        world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, 
+            t_out=self._agent_twist_ref_current_base_loc)
+        # write it to agent refs tensors
+        self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc,
+                                            gpu=self._use_gpu)
+
+    def _compute_twist_ref_w(self, env_indxs: torch.Tensor = None):
+        
+        # angular refs are not altered
+        if env_indxs is None:
+            # we update the position error using the current base position
+            self._p_delta_w[:, :]=self._p_trgt_w-\
+                self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2]
+                
+            self._dp_norm[:, :]=self._p_delta_w.norm(dim=1,keepdim=True)+1e-6
+            self._dp_versor[:, :]=self._p_delta_w/self._dp_norm
+
+            # apply for vref saturation
+            to_be_saturated=self._dp_norm[:, :]>self._env_opts["max_dp"]
+            self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"]
+
+            # we compute the twist refs for the agent depending of the position error
+            self._agent_twist_ref_current_w[:, 0:2]=self._dp_norm*self._dp_versor/self._env_opts["max_dt"]
+            self._agent_twist_ref_current_w[:, 2:3]=0 # no vertical vel
+
+            # apply pof0 using last value of bernoully coeffs
+            self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel
+            self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega
+        else:
+            self._p_delta_w[env_indxs, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[env_indxs, 0:2] -\
+                self._p_trgt_w[env_indxs, :]
+            
+            # apply for vref saturation
+            to_be_saturated=torch.logical_and((self._dp_norm[:, :]>self._env_opts["max_dp"]).flatten(),env_indxs)
+            self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"]
+
+            self._dp_norm[env_indxs, :]=self._p_delta_w[env_indxs, :].norm(dim=1,keepdim=True)+1e-6
+            self._dp_versor[env_indxs, :]=self._p_delta_w[env_indxs, :]/self._dp_norm[env_indxs, :]
+
+            self._agent_twist_ref_current_w[env_indxs, 0:2]=self._dp_norm[env_indxs, :]*self._dp_versor[env_indxs, :]/self._env_opts["max_dt"]        
+            self._agent_twist_ref_current_w[env_indxs, 2:3]=0 # no vertical vel
+
+            # apply pof0 using last value of bernoully coeffs
+            self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :]
+            self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega
+
+    def _override_refs(self,
+            env_indxs: torch.Tensor = None):
+        
+        # runs at every post_step
+        self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem
+        if self._use_gpu:
+            # copies latest refs to GPU 
+            self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) 
+
+        agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p",
+                gpu=self._use_gpu)
+        
+        agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega",
+                gpu=self._use_gpu)
+        
+        # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \
+        #     agent_p_ref_current[:, 0:2]
+        self._p_trgt_w[:, :]=agent_p_ref_current[:, 0:2] # set p target target from shared mem
+
+        self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem
+    
+    def _debug_agent_refs(self):
+        if self._use_gpu:
+            self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False)
+        self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True)
+
+    def _randomize_task_refs(self,
+        env_indxs: torch.Tensor = None):
+
+        # we randomize the target position/omega in world frame
+        if env_indxs is None:
+            self._trgt_d.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"])
+            self._trgt_theta.uniform_(0.0, 2*torch.pi)
+
+            self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] +\
+                torch.cat((self._trgt_d*torch.cos(self._trgt_theta)
+                           ,self._trgt_d*torch.sin(self._trgt_theta)), dim=1)
+            
+            # randomize just omega
+            random_uniform=torch.full_like(self._agent_twist_ref_current_w[:, 3:6], fill_value=0.0)
+            torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+            self._agent_twist_ref_current_w[:, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6]
+
+            # sample for all envs pof0
+            if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients
+                torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"]
+                torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega)
+                           
+        else:
+
+            if env_indxs.any():
+                integer_idxs=torch.nonzero(env_indxs).flatten()
+                
+                trgt_d_selected=self._trgt_d[integer_idxs, :]
+                trgt_d_selected.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"])
+                self._trgt_d[integer_idxs, :]=trgt_d_selected
+
+                trgt_theta_selected=self._trgt_theta[integer_idxs, :]
+                trgt_theta_selected.uniform_(0.0, 2*torch.pi)
+                self._trgt_theta[integer_idxs, :]=trgt_theta_selected
+
+                self._p_trgt_w[integer_idxs, 0:1]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 0:1] +\
+                    self._trgt_d[integer_idxs, :]*torch.cos(self._trgt_theta[integer_idxs, :])
+                self._p_trgt_w[integer_idxs, 1:2]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 1:2] +\
+                    self._trgt_d[integer_idxs, :]*torch.sin(self._trgt_theta[integer_idxs, :])
+
+                # randomize just omega
+                random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, 3:6], fill_value=0.0)
+                torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+                self._agent_twist_ref_current_w[env_indxs, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6]
+
+                # sample for all envs pof0, then reset to 1 for envs which are not to be randomized
+                if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients
+                    torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"]
+                    torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega)
+                    self._bernoulli_coeffs_linvel[~env_indxs, :]=1
+                    self._bernoulli_coeffs_omega[~env_indxs, :]=1
+
+        self._compute_twist_ref_w(env_indxs=env_indxs) # update linear vel twist refs based on pos error
+
+        
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/hybrid_quad_rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/hybrid_quad_rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/hybrid_quad_rhc.py
@@ -0,0 +1,1324 @@
+from mpc_hive.controllers.rhc import RHController
+
+from aug_mpc.controllers.rhc.horizon_based.horizon_imports import *
+
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs
+from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+import numpy as np
+
+import os
+
+import time
+
+from typing import Dict, List
+
+class HybridQuadRhc(RHController):
+
+    def __init__(self, 
+            srdf_path: str,
+            urdf_path: str,
+            config_path: str,
+            robot_name: str, # used for shared memory namespaces
+            codegen_dir: str, 
+            n_nodes:float = 25,
+            injection_node:int = 10,
+            dt: float = 0.02,
+            max_solver_iter = 1, # defaults to rt-iteration
+            open_loop: bool = True,
+            close_loop_all: bool = False,
+            dtype = np.float32,
+            verbose = False, 
+            debug = False,
+            refs_in_hor_frame = True,
+            timeout_ms: int = 60000,
+            custom_opts: Dict = {}):
+
+        self._refs_in_hor_frame = refs_in_hor_frame
+
+        self._injection_node = injection_node
+
+        self._open_loop = open_loop
+        self._close_loop_all = close_loop_all
+
+        self._codegen_dir = codegen_dir
+        if not os.path.exists(self._codegen_dir):
+            os.makedirs(self._codegen_dir)
+        # else:
+        #     # Directory already exists, delete it and recreate
+        #     shutil.rmtree(self._codegen_dir)
+        #     os.makedirs(self._codegen_dir)
+
+        self.step_counter = 0
+        self.sol_counter = 0
+    
+        self.max_solver_iter = max_solver_iter
+        
+        self._timer_start = time.perf_counter()
+        self._prb_update_time = time.perf_counter()
+        self._phase_shift_time = time.perf_counter()
+        self._task_ref_update_time = time.perf_counter()
+        self._rti_time = time.perf_counter()
+
+        self.robot_name = robot_name
+        
+        self.config_path = config_path
+
+        self.urdf_path = urdf_path
+        # read urdf and srdf files
+        with open(self.urdf_path, 'r') as file:
+            self.urdf = file.read()
+        self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
+
+        self._c_timelines = dict()
+        self._f_reg_timelines = dict()
+        
+        self._custom_opts={"replace_continuous_joints": False,
+            "use_force_feedback": False,
+            "lin_a_feedback": False,
+            "is_open_loop": self._open_loop, # fully open (just for db)
+            "fully_closed": False, # closed loop with full feedback (just for db)
+            "closed_partial": False, # closed loop with partial feedback
+            "adaptive_is": True, # closed loop with adaptation
+            "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase
+            "alpha_from_outside": False, # alpha set ext. from shared memory
+            "alpha_half": 1.0, 
+            "only_vel_wheels": True, # whether wheels (if present) are just vel controlled
+            "use_jnt_v_feedback": False
+            }
+        
+        self._custom_opts.update(custom_opts)
+        
+        self._alpha_half=self._custom_opts["alpha_half"]
+
+        if self._open_loop:
+            self._custom_opts["fully_closed"]=False
+            self._custom_opts["adaptive_is"]=False
+            self._custom_opts["closed_partial"]=False
+        else:
+            self._custom_opts["is_open_loop"]=False
+            if self._custom_opts["fully_closed"]:
+                self._custom_opts["adaptive_is"]=False
+                self._custom_opts["closed_partial"]=False
+                self._custom_opts["lin_a_feedback"]=False
+            if self._custom_opts["closed_partial"]:
+                self._custom_opts["adaptive_is"]=False
+                self._custom_opts["fully_closed"]=False
+                self._custom_opts["lin_a_feedback"]=False
+            if self._custom_opts["adaptive_is"]:
+                self._custom_opts["closed_partial"]=False
+                self._custom_opts["fully_closed"]=False
+
+        super().__init__(srdf_path=srdf_path,
+                        n_nodes=n_nodes,
+                        dt=dt,
+                        namespace=self.robot_name,
+                        dtype=dtype,
+                        verbose=verbose, 
+                        debug=debug,
+                        timeout_ms=timeout_ms)
+
+        self._rhc_fpaths.append(self.config_path)
+
+    def _config_override(self):
+        pass
+
+    def _post_problem_init(self):
+
+        self.rhc_costs={}
+        self.rhc_constr={}
+
+        self._fail_idx_scale=0.0
+        self._expl_idx_window_size=int(1*self._n_nodes) 
+        self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size))
+        self._expl_idx_counter=0
+        self._expl_idx_buffer_counter=0
+
+        self._pred_node_idx=self._n_nodes-1
+
+        self._nq=self.nq()
+        self._nq_jnts=self._nq-7# assuming floating base
+        self._nv=self.nv()
+        self._nv_jnts=self._nv-6
+
+        self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype)
+        self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype)
+        self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype)
+        self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype)
+        self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype)
+        self._alphas_q_root[:, :]=1.0 # default to all open
+        self._alphas_q_jnts[:, :]=1.0 
+        self._alphas_twist_root[:, :]=1.0 
+        self._alphas_v_jnts[:, :]=1.0 
+        self._alphas_a[:, :]=1.0
+
+    def _init_problem(self,
+            fixed_jnt_patterns: List[str] = None,
+            wheels_patterns: List[str] = None,
+            foot_linkname: str = None,
+            flight_duration: int = 10,
+            post_flight_stance: int = 3,
+            step_height: float = 0.12,
+            keep_yaw_vert: bool = False,
+            yaw_vertical_weight: float = 2.0,
+            vertical_landing: bool = False,
+            vertical_land_weight: float = 1.0,
+            phase_force_reg: float = 1e-2,
+            vel_bounds_weight: float = 1.0):
+        
+        self._fixed_jnt_patterns=fixed_jnt_patterns
+
+        self._config_override()
+        
+        Journal.log(self.__class__.__name__,
+            "_init_problem",
+            f" Will use horizon config file at {self.config_path}",
+            LogType.INFO,
+            throw_when_excep=True)
+        
+        self._vel_bounds_weight=vel_bounds_weight
+        self._phase_force_reg=phase_force_reg
+        self._yaw_vertical_weight=yaw_vertical_weight
+        self._vertical_land_weight=vertical_land_weight
+        # overrides parent
+        self._prb = Problem(self._n_intervals, 
+                        receding=True, 
+                        casadi_type=cs.SX)
+        self._prb.setDt(self._dt)
+
+        if "replace_continuous_joints" in self._custom_opts:
+            # continous joints are parametrized in So2
+            if self._custom_opts["replace_continuous_joints"]:
+                self.urdf = self.urdf.replace('continuous', 'revolute')
+        else:
+            self.urdf = self.urdf.replace('continuous', 'revolute')
+        
+        self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names 
+        self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+        
+        self._init_robot_homer()
+
+        # handle fixed joints
+        fixed_joint_map={}
+        if self._fixed_jnt_patterns is not None:
+            for jnt_name in self._get_robot_jnt_names():
+                for fixed_jnt_pattern in self._fixed_jnt_patterns:
+                    if fixed_jnt_pattern in jnt_name: 
+                        fixed_joint_map.update({f"{jnt_name}":
+                            self._homer.get_homing_val(jnt_name=jnt_name)})
+                        break # do not search for other pattern matches
+        
+        if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers
+            Journal.log(self.__class__.__name__,
+                "_init_problem",
+                f"Will fix following joints: \n{str(fixed_joint_map)}",
+                LogType.INFO,
+                throw_when_excep=True)
+            # with the fixed joint map
+            self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map)
+            # assign again controlled joints names
+            self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+            # updated robot homer for controlled joints
+            self._init_robot_homer()
+
+        # handle continuous joints (need to change homing and retrieve
+        # cont jnts indexes) and homing
+        self._continuous_joints=self._get_continuous_jnt_names()
+        # reduced
+        self._continuous_joints_idxs=[]
+        self._continuous_joints_idxs_cos=[]
+        self._continuous_joints_idxs_sin=[]
+        self._continuous_joints_idxs_red=[]
+        self._rev_joints_idxs=[]
+        self._rev_joints_idxs_red=[]
+        # qfull
+        self._continuous_joints_idxs_qfull=[]
+        self._continuous_joints_idxs_cos_qfull=[]
+        self._continuous_joints_idxs_sin_qfull=[]
+        self._continuous_joints_idxs_red_qfull=[]
+        self._rev_joints_idxs_qfull=[]
+        self._rev_joints_idxs_red_qfull=[]
+        jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints))
+        jnt_names=self._get_robot_jnt_names()
+        for i in range(len(jnt_names)):
+            jnt=jnt_names[i]
+            index=self._get_jnt_id(jnt)# accounting for floating joint
+            homing_idx=index-7 # homing is only for actuated joints
+            homing_value=self._homer.get_homing_val(jnt)
+            if jnt in self._continuous_joints:
+                jnt_homing[homing_idx]=np.cos(homing_value).item()
+                jnt_homing[homing_idx+1]=np.sin(homing_value).item()
+                # just actuated joints
+                self._continuous_joints_idxs.append(homing_idx) # cos
+                self._continuous_joints_idxs.append(homing_idx+1) # sin
+                self._continuous_joints_idxs_cos.append(homing_idx)
+                self._continuous_joints_idxs_sin.append(homing_idx+1)
+                self._continuous_joints_idxs_red.append(i)
+                # q full
+                self._continuous_joints_idxs_qfull.append(index) # cos
+                self._continuous_joints_idxs_qfull.append(index+1) # sin
+                self._continuous_joints_idxs_cos_qfull.append(index)
+                self._continuous_joints_idxs_sin_qfull.append(index+1)
+                self._continuous_joints_idxs_red_qfull.append(i+7)
+            else:
+                jnt_homing[homing_idx]=homing_value
+                # just actuated joints
+                self._rev_joints_idxs.append(homing_idx) 
+                self._rev_joints_idxs_red.append(i) 
+                # q full
+                self._rev_joints_idxs_qfull.append(index) 
+                self._rev_joints_idxs_red_qfull.append(i+7) 
+
+        self._jnts_q_reduced=None
+        if not len(self._continuous_joints)==0: 
+            cont_joints=", ".join(self._continuous_joints)
+
+            Journal.log(self.__class__.__name__,
+                "_init_problem",
+                f"The following continuous joints were found: \n{cont_joints}",
+                LogType.INFO,
+                throw_when_excep=True)
+            # preallocating data 
+            self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype)
+            self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+            self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype)
+            self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+        else:
+            self._custom_opts["replace_continuous_joints"]=True
+            Journal.log(self.__class__.__name__,
+                "_init_problem",
+                f"No continuous joints were found.",
+                LogType.INFO,
+                throw_when_excep=True)
+
+        # retrieve wheels indexes (not considering continuous joints, 
+        # ok just for v, eff, etc..)
+        self._wheel_patterns=wheels_patterns
+        self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns)
+        self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81]
+        
+        # we can create an init for the base
+        init = self._base_init.tolist() + jnt_homing
+
+        if foot_linkname is not None:
+            FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height
+            ground_level = FK(q=init)['ee_pos']
+            self._base_init[2] = -ground_level[2]  # override init
+        
+        self._model = FullModelInverseDynamics(problem=self._prb,
+            kd=self._kin_dyn,
+            q_init=self._homer.get_homing_map(),
+            base_init=self._base_init)
+
+        self._ti = TaskInterface(prb=self._prb, 
+                            model=self._model, 
+                            max_solver_iter=self.max_solver_iter,
+                            debug = self._debug,
+                            verbose = self._verbose, 
+                            codegen_workdir = self._codegen_dir)
+        self._ti.setTaskFromYaml(self.config_path)
+        
+        # setting initial base pos ref if exists
+        base_pos = self._ti.getTask('base_height')
+        if base_pos is not None:
+            base_pos.setRef(np.atleast_2d(self._base_init).T)
+
+        self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes?????
+
+        self._gm = GaitManager(self._ti, 
+            self._pm, 
+            self._injection_node,
+            keep_yaw_vert=keep_yaw_vert,
+            yaw_vertical_weight=self._yaw_vertical_weight,
+            vertical_landing=vertical_landing,
+            landing_vert_weight=self._vertical_land_weight,
+            phase_force_reg=self._phase_force_reg,
+            custom_opts=self._custom_opts,
+            flight_duration=flight_duration,
+            post_flight_stance=post_flight_stance,
+            step_height=step_height,
+            dh=0.0)
+                
+        self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0)
+        self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0)
+        self._ti.model.q.setInitialGuess(self._ti.model.q0)
+        self._ti.model.v.setInitialGuess(self._ti.model.v0)
+        for _, cforces in self._ti.model.cmap.items():
+            n_contact_f=len(cforces)
+            for c in cforces:
+                c.setInitialGuess(np.array(self._f0)/n_contact_f)        
+
+        vel_lims = self._model.kd.velocityLimits()
+        import horizon.utils as utils
+        self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:]))
+        self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:]))
+
+        self._meas_lin_a_par=None
+        # if self._custom_opts["lin_a_feedback"]:
+        #     # acceleration feedback on first node
+        #     self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback",
+        #         dim=3, nodes=0)   
+        #     base_lin_a_prb=self._prb.getInput().getVars()[0:3] 
+        #     self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, 
+        #             nodes=[0])
+
+        # if not self._open_loop:
+        #     # we create a residual cost to be used as an attractor to the measured state on the first node
+        #     # hard constraints injecting meas. states are pure EVIL!
+        #     prb_state=self._prb.getState()
+        #     full_state=prb_state.getVars()
+        #     state_dim=prb_state.getBounds()[0].shape[0]
+        #     meas_state=self._prb.createParameter(name="measured_state",
+        #         dim=state_dim, nodes=0)     
+        #     self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), 
+        #                 nodes=[0])
+
+        self._ti.finalize()
+        self._ti.bootstrap()
+
+        self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes
+        self._ti.load_initial_guess()
+
+        self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we
+        # know n_dofs -> we assign it (by default = None)
+
+        self.n_contacts = len(self._model.cmap.keys())
+        
+        # remove variables bounds (before they were necessary to have a good
+        # quality bootstrap solution)
+        self._q_inf=np.zeros((self.nq(), 1))
+        self._q_inf[:, :]=np.inf
+        self._v_inf=np.zeros((self.nv(), 1))
+        self._v_inf[:, :]=np.inf
+        self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0)
+        self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0)
+
+        # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb)
+
+    def get_file_paths(self):
+        # can be overriden by child
+        paths = super().get_file_paths()
+        return paths
+    
+    def _get_quat_remap(self):
+        # overrides parent
+        return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention
+    
+    def _zmp(self, model):
+
+        num = cs.SX([0, 0])
+        den = cs.SX([0])
+        pos_contact = dict()
+        force_val = dict()
+
+        q = cs.SX.sym('q', model.nq)
+        v = cs.SX.sym('v', model.nv)
+        a = cs.SX.sym('a', model.nv)
+
+        com = model.kd.centerOfMass()(q=q, v=v, a=a)['com']
+
+        n = cs.SX([0, 0, 1])
+        for c in model.fmap.keys():
+            pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos']
+            force_val[c] = cs.SX.sym('force_val', 3)
+            num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n)
+            den += cs.dot(force_val[c], n)
+
+        zmp = com[0:2] + (num / den)
+        input_list = []
+        input_list.append(q)
+        input_list.append(v)
+        input_list.append(a)
+
+        for elem in force_val.values():
+            input_list.append(elem)
+
+        f = cs.Function('zmp', input_list, [zmp])
+        return f
+    
+    def _add_zmp(self):
+
+        input_zmp = []
+
+        input_zmp.append(self._model.q)
+        input_zmp.append(self._model.v)
+        input_zmp.append(self._model.a)
+
+        for f_var in self._model.fmap.values():
+            input_zmp.append(f_var)
+
+        c_mean = cs.SX([0, 0, 0])
+        for c_name, f_var in self._model.fmap.items():
+            fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos']
+            c_mean += fk_c_pos
+
+        c_mean /= len(self._model.cmap.keys())
+
+        zmp_nominal_weight = 10.
+        zmp_fun = self._zmp(self._model)(*input_zmp)
+
+        if 'wheel_joint_1' in self._model.kd.joint_names():
+            zmp_residual = self._prb.createIntermediateResidual('zmp',  zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2]))
+
+    def _quaternion_multiply(self, 
+                    q1, q2):
+        x1, y1, z1, w1 = q1
+        x2, y2, z2, w2 = q2
+        w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+        x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+        y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+        z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+        return np.array([x, y, z, w])
+    
+    def _get_continuous_jnt_names(self):
+        import xml.etree.ElementTree as ET
+        root = ET.fromstring(self.urdf)
+        continuous_joints = []
+        for joint in root.findall('joint'):
+            joint_type = joint.get('type')
+            if joint_type == 'continuous':
+                joint_name = joint.get('name')
+                continuous_joints.append(joint_name)
+        return continuous_joints
+    
+    def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]):
+        jnt_names=self._get_robot_jnt_names()
+        wheels_idxs=[]
+        for i in range(len(jnt_names)):
+            jnt_name=jnt_names[i]
+            for wheel_pattern in wheel_patterns:
+                if wheel_pattern in jnt_name:
+                    wheels_idxs.append(i)
+                    break
+        return wheels_idxs
+    
+    def _get_jnt_id(self, jnt_name):
+        return self._kin_dyn.joint_iq(jnt_name)
+    
+    def _init_rhc_task_cmds(self):
+        
+        rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm,
+            robot_index_shm=self.controller_index,
+            robot_index_view=0, # when using optimize_mem the view if always of shape 1x...
+            namespace=self.namespace,
+            safe=False, 
+            verbose=self._verbose,
+            vlevel=VLevel.V2,
+            use_force_feedback=self._custom_opts["use_force_feedback"],
+            optimize_mem=True)
+        
+        rhc_refs.run()
+
+        rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller)
+        rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap())
+        
+        rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3],
+            q_ref=np.atleast_2d(self._base_init)[:, 3:7])
+        
+        return rhc_refs
+    
+    def get_vertex_fnames_from_ti(self):
+        tasks=self._ti.task_list
+        contact_f_names=[]
+        for task in tasks:
+            if isinstance(task, ContactTask):
+                interaction_task=task.dynamics_tasks[0]
+                contact_f_names.append(interaction_task.vertex_frames[0])
+        return contact_f_names
+        
+    def _get_contact_names(self):
+        # should get contact names from vertex frames
+        # list(self._ti.model.cmap.keys())
+        return self.get_vertex_fnames_from_ti()
+    
+    def _get_robot_jnt_names(self):
+
+        joints_names = self._kin_dyn.joint_names()
+        to_be_removed = ["universe", 
+                        "reference", 
+                        "world", 
+                        "floating", 
+                        "floating_base"]
+        for name in to_be_removed:
+            if name in joints_names:
+                joints_names.remove(name)
+
+        return joints_names
+    
+    def _get_ndofs(self):
+        return len(self._model.joint_names)
+    
+    def nq(self):
+        return self._kin_dyn.nq()
+    
+    def nv(self):
+        return self._kin_dyn.nv()
+    
+    def _get_robot_mass(self):
+
+        return self._kin_dyn.mass()
+
+    def _get_root_full_q_from_sol(self, node_idx=1):
+
+        root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype)
+
+        np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False)
+        np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full)
+
+        return root_q_full
+    
+    def _get_full_q_from_sol(self, node_idx=1):
+
+        return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype)
+    
+    def _get_root_twist_from_sol(self, node_idx=1):
+        # provided in world frame
+        twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6)
+        # if world_aligned:
+        #     q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+        #     r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+        #     twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3]
+        #     twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6]
+        return twist_base_local
+
+    def _get_root_a_from_sol(self, node_idx=0):
+        # provided in world frame
+        a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6)
+        # if world_aligned:
+        #     q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+        #     r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+        #     a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3]
+        #     a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6]
+        return a_base_local
+    
+    def _get_jnt_q_from_sol(self, node_idx=0, 
+            reduce: bool = True,
+            clamp: bool = True):
+        
+        full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype)
+        
+        np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place
+        np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place
+
+        if self._custom_opts["replace_continuous_joints"] or (not reduce):
+            if clamp:
+                return np.fmod(full_jnts_q, 2*np.pi)
+            else:
+                return full_jnts_q
+        else:
+            cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2)
+            # copy rev joint vals
+            self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1)
+            # and continuous
+            self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1)
+            return self._jnts_q_reduced
+        
+    def _get_jnt_v_from_sol(self, node_idx=1):
+        
+        jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1,  
+                    self._nv_jnts)
+        np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place
+        # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place
+
+        return jnt_v_sol
+
+    def _get_jnt_a_from_sol(self, node_idx=1):
+
+        return self._get_a_from_sol()[6:, node_idx].reshape(1,
+                    self._nv_jnts)
+
+    def _get_jnt_eff_from_sol(self, node_idx=0):
+        
+        efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx)
+        
+        # if self._custom_opts["only_vel_wheels"]:
+
+        jnt_efforts=efforts_on_node[6:, 0]
+        
+        if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v:
+            jnt_efforts[self._wheels_idxs_v]=0.0
+
+        return jnt_efforts.reshape(1,  
+                self._nv_jnts).astype(self._dtype)
+    
+    def _get_rhc_cost(self):
+
+        return self._ti.solution["opt_cost"]
+    
+    def _get_rhc_constr_viol(self):
+        
+        return self._ti.solution["residual_norm"]
+    
+    def _get_rhc_nodes_cost(self):
+
+        cost = self._ti.solver_rti.getCostValOnNodes()
+        return cost.reshape((1, -1))
+    
+    def _get_rhc_nodes_constr_viol(self):
+        
+        constr_viol = self._ti.solver_rti.getConstrValOnNodes()
+        return constr_viol.reshape((1, -1))
+    
+    def _get_rhc_niter_to_sol(self):
+
+        return self._ti.solution["n_iter2sol"]
+    
+    def _set_ig_bootstrap(self,
+            q_state: np.ndarray = None,
+            v_state: np.ndarray = None):
+
+        xig = self._ti.solution['x_opt'].copy()
+        uig = self._ti.solution['u_opt'].copy()
+
+        # Normalize and keep quaternion in the same hemisphere as the previous
+        # solution to avoid artificial 180-deg jumps in the bootstrap warm start.
+        q_state_boot = q_state.copy()
+        q_prev = xig[3:7, 0]
+        q_now = q_state_boot[3:7, 0]
+
+        q_now_norm = np.linalg.norm(q_now)
+        if q_now_norm > 1e-9:
+            q_state_boot[3:7, :] /= q_now_norm
+        else:
+            q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype)
+
+        q_prev_norm = np.linalg.norm(q_prev)
+        if q_prev_norm > 1e-9:
+            q_prev = q_prev / q_prev_norm
+
+        q_now = q_state_boot[3:7, 0]
+        if np.dot(q_prev, q_now) < 0.0:
+            q_state_boot[3:7, :] *= -1.0
+        
+        xig[0:self._nq, :] = q_state_boot
+        xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes 
+        uig[0:self._nv, :]=0.0 # 0 acceleration
+
+        # assigning ig
+        self._prb.getState().setInitialGuess(xig)
+        self._prb.getInput().setInitialGuess(uig)
+        # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype))
+        for _, cforces in self._ti.model.cmap.items():
+            n_contact_f = len(cforces)
+            if n_contact_f == 0:
+                continue
+            f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f
+            for c in cforces:
+                c.setInitialGuess(f_guess)
+
+        # print("initial guesses")
+        # print(self._nq)
+        # print(self._nv)
+        # print("q")
+        # qig=self._ti.model.q.getInitialGuess()
+        # print(qig.shape)
+        # print(qig)
+        # print("v")
+        # print(self._ti.model.v.getInitialGuess())
+        # print("a")
+        # print(self._ti.model.a.getInitialGuess())
+        # for _, cforces in self._ti.model.cmap.items():
+        #     for c in cforces:
+        #         print("force")
+        #         print(c.getInitialGuess())
+        
+        return xig, uig
+    
+    def _set_ig(self):
+
+        shift_num = -1 # shift data by one node
+
+        x_opt = self._ti.solution['x_opt']
+        u_opt = self._ti.solution['u_opt']
+
+        # building ig for state
+        xig = np.roll(x_opt, 
+                shift_num, axis=1) # rolling state sol.
+        for i in range(abs(shift_num)):
+            # state on last node is copied to the elements
+            # which are "lost" during the shift operation
+            xig[:, -1 - i] = x_opt[:, -1]
+        # building ig for inputs
+        uig = np.roll(u_opt, 
+                shift_num, axis=1) # rolling state sol.
+        for i in range(abs(shift_num)):
+            # state on last node is copied to the elements
+            # which are "lost" during the shift operation
+            uig[:, -1 - i] = u_opt[:, -1]
+
+        # assigning ig
+        self._prb.getState().setInitialGuess(xig)
+        self._prb.getInput().setInitialGuess(uig)
+        
+        return xig, uig
+    
+    def _update_open_loop(self,
+            bootstrap: bool = False):
+
+        q_state, v_state, a_state=self._set_is_open()
+
+        if not bootstrap:
+            self._set_ig()
+        else:
+            self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+        # robot_state=xig[:, 0]
+        # # open loop update:
+        # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0
+        # # is node 1 in the last opt solution)
+
+        return q_state, v_state, a_state
+    
+    def _update_closed_loop(self,
+            bootstrap: bool = False):
+        
+        # set initial state
+        q_state=None
+        v_state=None
+        a_state=None
+        if self._custom_opts["adaptive_is"]:
+            # adaptive closed loop
+            q_state, v_state, a_state=self._set_is_adaptive()
+        elif self._custom_opts["fully_closed"]:
+            q_state, v_state, a_state=self._set_is_full()
+        elif self._custom_opts["closed_partial"]:
+            q_state, v_state, a_state=self._set_is_partial()
+        else:
+            Journal.log(self.__class__.__name__,
+                    "_update_closed_loop",
+                    "Neither adaptive_is, fully_closed, or closed_partial.",
+                    LogType.EXCEP,
+                    throw_when_excep = False)
+            q_state, v_state, a_state=self._set_is()
+
+        # set initial guess for controller
+        if not bootstrap:
+            self._set_ig()
+        else:
+            self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+        return q_state, v_state, a_state
+    
+    def _set_is_open(self):
+        
+        # overriding states with rhc data
+        q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+        q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1)
+        
+        twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1)
+        v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_q_full_rhc=q[0:7] # root full q
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_twist_rhc=vel[0:6] # lin v.
+        jnts_v_rhc=vel[6:] # jnts v
+
+        self.rhc_refs.set_alpha(alpha=1.0) # fully open
+
+        # close state on known quantities
+        root_q_full_rhc.setBounds(lb=q_full_root,
+            ub=q_full_root, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts, 
+            ub=q_jnts, nodes=0)
+        root_twist_rhc.setBounds(lb=twist_root, 
+            ub=twist_root, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts, 
+            ub=v_jnts, nodes=0)
+        
+        # return state used for feedback
+        q_state=np.concatenate((q_full_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((twist_root, v_jnts),
+                axis=0)
+        
+        return (q_state, v_state, None)
+    
+    def _set_is_full(self):
+        
+        # measurements
+        q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        
+        q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+        if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+            self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+            self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+            self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+            q_jnts=self._jnts_q_expanded.reshape(-1,1)
+            
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_full_q_rhc=q[0:7] # root p
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_v_rhc=vel[0:3] # lin v.
+        root_omega_rhc=vel[3:6] # omega
+        jnts_v_rhc=vel[6:] # jnts v
+        acc=self._prb.getVariables("a")
+        lin_a_prb=acc[0:3] # lin acc
+        
+        self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+        root_full_q_rhc.setBounds(lb=q_full_root,
+            ub=q_full_root, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts, 
+            ub=q_jnts, nodes=0)
+        root_v_rhc.setBounds(lb=v_root, 
+            ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints)
+        root_omega_rhc.setBounds(lb=omega, 
+            ub=omega, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts, 
+            ub=v_jnts, nodes=0)
+        if self._custom_opts["lin_a_feedback"]:
+            # write base lin 13793197 from meas
+            lin_a_prb.setBounds(lb=a_root[0:3, :], 
+                ub=a_root[0:3, :], 
+                nodes=0)
+
+        # return state used for feedback
+        q_state=np.concatenate((q_full_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((v_root, omega, v_jnts),
+                axis=0)
+        a_state=np.concatenate((a_root, a_jnts),
+                axis=0)
+        
+        return (q_state, v_state, a_state)
+    
+    def _set_is_partial(self):
+        
+        # measurements
+        p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        
+        q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+        if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+            self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+            self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+            self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+            q_jnts=self._jnts_q_expanded.reshape(-1,1)
+
+        # overriding states with rhc data (-> all overridden state are open looop)
+        root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+        root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+        p_root[:, :]=root_p_from_rhc # position is always open loop
+        
+        if not self._custom_opts["estimate_v_root"]:
+            v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1)
+            # override v jnts with the ones from controller
+            if not self._custom_opts["use_jnt_v_feedback"]:
+                v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+            # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+        # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1)
+        # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1)
+        # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1)
+        # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1)
+        # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+            
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_p_rhc=q[0:3] # root p
+        root_q_rhc=q[3:7] # root orientation
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_v_rhc=vel[0:3] # lin v.
+        root_omega_rhc=vel[3:6] # omega
+        jnts_v_rhc=vel[6:] # jnts v
+        acc=self._prb.getVariables("a")
+        lin_a_prb=acc[0:3] # lin acc
+        
+        self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+        root_p_rhc.setBounds(lb=p_root,
+            ub=p_root, nodes=0)
+        root_q_rhc.setBounds(lb=q_root, 
+            ub=q_root, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts, 
+            ub=q_jnts, nodes=0)
+        
+        if self._custom_opts["estimate_v_root"]:
+            root_v_rhc.setBounds(lb=-self._v_inf[0:3], 
+                ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints)
+        else: # get it from controller 
+            root_v_rhc.setBounds(lb=v_root, 
+                ub=v_root, nodes=0)
+        root_omega_rhc.setBounds(lb=omega, 
+            ub=omega, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts, 
+            ub=v_jnts, nodes=0)
+        if self._custom_opts["lin_a_feedback"]:
+            # write base lin 13793197 from meas
+            lin_a_prb.setBounds(lb=a_root[0:3, :], 
+                ub=a_root[0:3, :], 
+                nodes=0)
+
+        # return state used for feedback
+        q_state=np.concatenate((p_root, q_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((v_root, omega, v_jnts),
+                axis=0)
+        a_state=np.concatenate((a_root, a_jnts),
+                axis=0)
+        
+        return (q_state, v_state, a_state)
+    
+    def _set_is_adaptive(self):
+        
+        # measurements
+        p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        
+        q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+            
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_p_rhc=q[0:3] # root p
+        root_q_rhc=q[3:7] # root orientation
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_v_rhc=vel[0:3] # lin v.
+        root_omega_rhc=vel[3:6] # omega
+        jnts_v_rhc=vel[6:] # jnts v
+        acc=self._prb.getVariables("a")
+        lin_a_prb=acc[0:3] # lin acc
+        
+        # getting prediction defects
+        root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+        # close state on known quantities, estimate some (e.g. lin vel) and
+        # open loop if thing start to explode
+        alpha_now=1.0
+        delta=0.0 
+        if self._custom_opts["alpha_from_outside"]:
+            alpha_now=self.rhc_refs.get_alpha()
+        else: # "autotuned" alpha
+            if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.)
+                delta=np.max(np.abs(jnt_v_delta))
+            else:
+                delta=np.max(np.abs(omega_root_delta))
+            # fail_idx=self._get_failure_index()
+            # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh
+            alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0
+
+        bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1]
+        self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db
+        self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db
+
+        self._alphas_q_root[:]=alpha_now # for now single alpha for everything
+        self._alphas_q_jnts[:]=alpha_now
+        self._alphas_twist_root[:]=alpha_now
+        self._alphas_v_jnts[:]=alpha_now
+        self._alphas_a[:]=alpha_now
+        if not self._custom_opts["estimate_v_root"]:
+            self._alphas_twist_root[0:3]=1.0 # open
+            self._alphas_v_jnts[:]=1.0 # open
+
+        # position is always open loop
+        root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+        root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+        p_root[:, :]=root_p_from_rhc 
+    
+        # expaning meas q if continuous joints
+        if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+            self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+            self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+            self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+            
+            # continous joints position is always open loop, but we need a delta vector of matching dimension
+            q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1)
+
+            self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:]
+
+            self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\
+                np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+                    np.cos(q_jnts[self._continuous_joints_idxs_red, :])
+            self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\
+                np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+                    np.sin(q_jnts[self._continuous_joints_idxs_red, :])
+
+            q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts
+            jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts
+
+            self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop
+            self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop
+
+            # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop
+
+        root_p_rhc.setBounds(lb=p_root,
+            ub=p_root, nodes=0)
+        root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, 
+            ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, 
+            ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0)
+        if self._custom_opts["estimate_v_root"]:
+            root_v_rhc.setBounds(lb=-self._v_inf[0:3], 
+                ub=self._v_inf[0:3], nodes=0)
+        else:
+            root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, 
+                ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0)
+        root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, 
+            ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, 
+            ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0)
+        if self._custom_opts["lin_a_feedback"]:
+            lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], 
+                ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], 
+                nodes=0)
+            
+        # return state used for feedback
+        q_state=np.concatenate((p_root, q_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((v_root, omega, v_jnts),
+                axis=0)
+        a_state=np.concatenate((a_root, a_jnts),
+                axis=0)
+        
+        return (q_state, v_state, a_state)
+
+    def _solve(self):
+        
+        if self._debug:
+            return self._db_solve(bootstrap=False)
+        else:
+            return self._min_solve(bootstrap=False)
+
+    def _bootstrap(self):
+
+        if self._debug:
+            return self._db_solve(bootstrap=True)
+        else:
+            return self._min_solve(bootstrap=True)
+        
+    def _min_solve(self, bootstrap: bool = False):
+        # minimal solve version -> no debug 
+        robot_qstate=None
+        robot_vstate=None
+        robot_astate=None
+        if self._open_loop:
+            robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using data from the solution itself
+        else: 
+            robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using robot measurements
+    
+        self._pm.shift() # shifts phases of one dt
+        if self._refs_in_hor_frame:
+            # q_base=self.robot_state.root_state.get(data_type="q", 
+            #     robot_idxs=self.controller_index).reshape(-1, 1)
+            # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+            # using internal base pose from rhc. in case of closed loop, it will be the meas state
+            force_norm=None
+            if self._custom_opts["use_force_feedback"]:
+                contact_forces=self.robot_state.contact_wrenches.get(data_type="f", 
+                    robot_idxs=self.controller_index_np,
+                    contact_name=None).reshape(self.n_contacts,3)
+                force_norm=np.linalg.norm(contact_forces, axis=1)
+            self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+                force_norm=force_norm)
+        else:
+            self.rhc_refs.step()
+            
+        try:
+            if not bootstrap:
+                converged = self._ti.rti() # RTI step
+            else:
+                converged = self._ti.bootstrap() # full solve (to convergence)
+            self.sol_counter = self.sol_counter + 1
+            return not self._check_rhc_failure()
+        except Exception as e: # fail in case of exceptions
+            return False
+    
+    def _db_solve(self, bootstrap: bool = False):
+
+        self._timer_start = time.perf_counter()
+
+        robot_qstate=None
+        robot_vstate=None
+        robot_astate=None
+        if self._open_loop:
+            robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using data from the solution itself
+        else: 
+            robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using robot measurements
+
+        self._prb_update_time = time.perf_counter() 
+        self._pm.shift() # shifts phases of one dt
+        self._phase_shift_time = time.perf_counter()
+
+        if self._refs_in_hor_frame:
+            # q_base=self.robot_state.root_state.get(data_type="q", 
+            #     robot_idxs=self.controller_index).reshape(-1, 1)
+            # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+            # using internal base pose from rhc. in case of closed loop, it will be the meas state
+            force_norm=None
+            if self._custom_opts["use_force_feedback"]:
+                contact_forces=self.robot_state.contact_wrenches.get(data_type="f", 
+                    robot_idxs=self.controller_index_np,
+                    contact_name=None).reshape(self.n_contacts,3)
+                force_norm=np.linalg.norm(contact_forces, axis=1)
+            self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+                force_norm=force_norm)
+        else:
+            self.rhc_refs.step()
+             
+        self._task_ref_update_time = time.perf_counter() 
+    
+        try:
+            if not bootstrap:
+                converged = self._ti.rti() # RTI step
+            else:
+                converged = self._ti.bootstrap() # full solve bootstrap
+            self._rti_time = time.perf_counter() 
+            self.sol_counter = self.sol_counter + 1
+            self._update_db_data()
+            return not self._check_rhc_failure()
+        except Exception as e: # fail in case of exceptions
+            if self._verbose:
+                solve_mode = "RTI" if not bootstrap else "Bootstrap"
+                exception = f"{solve_mode}() for controller {self.controller_index} failed" + \
+                f" with exception {type(e).__name__}"
+                Journal.log(self.__class__.__name__,
+                    "solve",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = False)
+            self._update_db_data()
+            return False
+    
+    def _get_fail_idx(self):
+        
+        self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx()
+        self._expl_idx_buffer_counter+=1
+        self._expl_idx_counter+=1
+        if self._expl_idx_counter%self._expl_idx_window_size==0:
+            self._expl_idx_buffer_counter=0 # restart from 0
+        
+        running_avrg=np.mean(self._explosion_idx_buffer).item()
+        
+        return running_avrg
+    
+    def _get_explosion_idx(self):
+        explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale
+        return explosion_index
+
+    def _update_db_data(self):
+
+        self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start
+        self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time
+        self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time
+        self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time
+        self.rhc_costs.update(self._ti.solver_rti.getCostsValues())
+        self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues())
+
+    def _reset(self):
+        
+        # reset task interface (ig, solvers, etc..) + 
+        # phase manager and sets bootstap as solution
+        self._gm.reset()
+        self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution
+        self._expl_idx_counter=0.0
+        self._expl_idx_buffer_counter=0
+
+    def _get_cost_info(self):
+        
+        cost_dict = self._ti.solver_rti.getCostsValues()
+        cost_names = list(cost_dict.keys())
+        cost_dims = [1] * len(cost_names) # costs are always scalar
+        return cost_names, cost_dims
+    
+    def _get_constr_info(self):
+        
+        constr_dict = self._ti.solver_rti.getConstraintsValues()
+        constr_names = list(constr_dict.keys())
+        constr_dims = [-1] * len(constr_names)
+        i = 0
+        for constr in constr_dict:
+            constr_val = constr_dict[constr]
+            constr_shape = constr_val.shape
+            constr_dims[i] = constr_shape[0]
+            i+=1
+        return constr_names, constr_dims
+    
+    def _get_q_from_sol(self):
+        full_q=self._ti.solution['q'].astype(self._dtype)
+        if self._custom_opts["replace_continuous_joints"]:
+            return full_q
+        else:
+            cont_jnts=full_q[self._continuous_joints_idxs_qfull, :]
+            cos=cont_jnts[::2, :]
+            sin=cont_jnts[1::2, :]
+            # copy root
+            self._full_q_reduced[0:7, :]=full_q[0:7, :]
+            # copy rev joint vals
+            self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :]
+            # and continuous
+            angle=np.arctan2(sin, cos)
+            self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle
+            return self._full_q_reduced
+
+    def _get_v_from_sol(self):
+        return self._ti.solution['v'].astype(self._dtype)
+    
+    def _get_a_from_sol(self):
+        return self._ti.solution['a'].astype(self._dtype)
+    
+    def _get_a_dot_from_sol(self):
+        return None
+    
+    def _get_f_from_sol(self):
+        # to be overridden by child class
+        contact_names =self._get_contacts() # we use controller-side names
+        try: 
+            data=[]
+            for key in contact_names:
+                contact_f=self._ti.solution["f_" + key].astype(self._dtype)
+                np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False)
+                np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f)
+                data.append(contact_f)
+            return np.concatenate(data, axis=0)
+        except:
+            return None
+            
+    def _get_f_dot_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_eff_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_cost_from_sol(self,
+                    cost_name: str):
+        return self.rhc_costs[cost_name]
+    
+    def _get_constr_from_sol(self,
+                    constr_name: str):
+        return self.rhc_constr[constr_name]
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh
new file mode 100644
index 0000000000000000000000000000000000000000..afdd7ea0925f57b483bdac7b0bde43dad50549d4
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/ibrido_run__2026_03_28_11_09_54_ID/training_cfg_unitree_b2w.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+export EVAL=0
+export DET_EVAL=1
+export EVAL_ON_CPU=1
+export OVERRIDE_ENV=1
+export OVERRIDE_AGENT_REFS=1
+export MPATH="/root/training_data/"
+export MNAME=""
+
+export WANDB_KEY="25f235316292344cea6dfa68e7c95409b3374d03"
+export SHM_NS="unitree_b2w" # shared mem namespace used for all shared data on CPU 
+export N_ENVS=800 # number of env to run in parallel
+export RNAME="B2WPartialCloopWheels" # a descriptive base name for this run
+export SEED=34 # random n generator seed to be used for this run
+export REMOTE_STEPPING=1
+export COMPRESSION_RATIO=0.6
+export ACTOR_LWIDTH=128
+export ACTOR_DEPTH=3
+export CRITIC_LWIDTH=256
+export CRITIC_DEPTH=4
+export OBS_NORM=1
+export OBS_RESCALING=0
+export CRITIC_ACTION_RESCALE=1
+export WEIGHT_NORM=1
+export LAYER_NORM=0
+export BATCH_NORM=0
+export IS_CLOSED_LOOP=1
+export DEBUG=1
+export RMDEBUG=1
+export DUMP_ENV_CHECKPOINTS=1
+export TOT_STEPS=20000000
+export USE_RND=0
+export DEMO_ENVS_PERC=0.0
+export DEMO_STOP_THRESH=2.0
+export EXPL_ENVS_PERC=0.0
+export ACTION_REPEAT=3
+export USE_SAC=1
+export USE_DUMMY=0
+export DISCOUNT_FACTOR=0.98
+export USE_PERIOD_RESETS=0
+export COMMENT='unitree b2w, CLOSED more vz tracking, new tuning, gamma 0.98, NO entropy annhealing [-0.3, -0.5], max pos 10 m, max_cmd_v 1.0, max_cmd_omega 0.5, task_track_omega_z_weight 1.0, CoT 0.1 0.1' # any training comment
+export URDF_PATH="${HOME}/ibrido_ws/src/unitree_ros/robots/b2w_description/xacro/robot.urdf.xacro" # name of the description package for the robot
+export SRDF_PATH="${HOME}/ibrido_ws/src/unitree_ros/robots/b2w_description/xacro/robot.srdf.xacro" # base path where the description package for the robot are located
+export JNT_IMP_CF_PATH="${HOME}/ibrido_ws/src/KyonRLStepping/kyonrlstepping/config/jnt_imp_config_open_b2w.yaml" # path to yaml file for jnt imp configuration
+if (( $IS_CLOSED_LOOP )); then
+  export JNT_IMP_CF_PATH="${HOME}/ibrido_ws/src/KyonRLStepping/kyonrlstepping/config/jnt_imp_config_b2w.yaml"
+fi
+
+export CLUSTER_CL_FNAME="kyonrlstepping.controllers.horizon_based.b2w_rhc_cluster_client" # base path where the description package for the robot are located
+export CLUSTER_DT=0.035
+export N_NODES=31
+export CLUSTER_DB=1
+export PHYSICS_DT=0.001
+export USE_GPU_SIM=1
+# export CODEGEN_OVERRIDE_BDIR="none"
+export CODEGEN_OVERRIDE_BDIR="${HOME}/aux_data/B2WRHCLusterClient_${SHM_NS}/CodeGen/${SHM_NS}Rhc"
+# export TRAIN_ENV_FNAME="twist_tracking_env"
+# export TRAIN_ENV_CNAME="TwistTrackingEnv"
+export TRAIN_ENV_FNAME="fake_pos_tracking_env"
+export TRAIN_ENV_CNAME="FakePosTrackingEnv"
+# export TRAIN_ENV_FNAME="fake_pos_track_env_phase_control_with_demo"
+# export TRAIN_ENV_CNAME="FakePosTrackEnvPhaseControlWithDemo"
+# export TRAIN_ENV_FNAME="fake_pos_tracking_with_demo"
+# export TRAIN_ENV_CNAME="FakePosTrackingEnvWithDemo"
+# export TRAIN_ENV_FNAME="linvel_env_with_demo"
+# export TRAIN_ENV_CNAME="TwistTrackingEnvWithDemo"
+# export TRAIN_ENV_FNAME="flight_phase_control_env"
+# export TRAIN_ENV_CNAME="FlightPhaseControl"
+export PUB_HEIGHTMAP=0
+export BAG_SDT=90.0
+export BRIDGE_DT=0.1
+export DUMP_DT=50.0
+export ENV_IDX_BAG=6
+export ENV_IDX_BAG_DEMO=-1
+export ENV_IDX_BAG_EXPL=-1
+export SRDF_PATH_ROSBAG="${HOME}/aux_data/B2WRHClusterClient_${SHM_NS}/$SHM_NS.srdf" # base path where the description package for the robot are located
+export CUSTOM_ARGS_NAMES="step_height rendering_dt wheels fixed_flights adaptive_is lin_a_feedback closed_partial use_flat_ground estimate_v_root fully_closed use_gpu control_wheels base_link_name deduce_base_link self_collide" 
+export CUSTOM_ARGS_DTYPE="float float xacro bool bool bool bool bool bool bool bool bool str bool bool"
+export CUSTOM_ARGS_VALS="0.12 0.1 true true true false true true false false true true base true true"
+export SET_ULIM=1 
+export ULIM_N=131072  # maximum number of open file descriptors for each process (shared memory)
+export TIMEOUT_MS=120000 # timeout after which each script autokills ([ms])
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/isaac_world_interface.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/isaac_world_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..51938e9ba9b4ffbdc9491e6df6326ec0223bd033
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/isaac_world_interface.py
@@ -0,0 +1,2355 @@
+# Copyright (C) 2023  Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com)
+# 
+# This file is part of AugMPCEnvs and distributed under the General Public License version 2 license.
+# 
+# AugMPCEnvs is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+# 
+# AugMPCEnvs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with AugMPCEnvs.  If not, see <http://www.gnu.org/licenses/>.
+# 
+from isaacsim import SimulationApp
+
+import carb
+
+import os
+import math
+
+import torch
+import numpy as np
+
+from typing import Dict, List
+from typing_extensions import override
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+
+from aug_mpc_envs.utils.math_utils import quat_to_omega
+from aug_mpc_envs.utils.height_grid_visualizer import HeightGridVisualizer
+from aug_mpc_envs.utils.height_sensor import HeightGridSensor
+
+from aug_mpc.world_interfaces.world_interface_base import AugMPCWorldInterfaceBase
+from mpc_hive.utilities.math_utils_torch import world2base_frame,world2base_frame3D
+
+class IsaacSimEnv(AugMPCWorldInterfaceBase):
+
+    def __init__(self,
+        robot_names: List[str],
+        robot_urdf_paths: List[str],
+        robot_srdf_paths: List[str],
+        jnt_imp_config_paths: List[str],
+        n_contacts: List[int],
+        cluster_dt: List[float],
+        use_remote_stepping: List[bool],
+        name: str = "IsaacSimEnv",
+        num_envs: int = 1,
+        debug = False,
+        verbose: bool = False,
+        vlevel: VLevel = VLevel.V1,
+        n_init_step: int = 0,
+        timeout_ms: int = 60000,
+        env_opts: Dict = None,
+        use_gpu: bool = True,
+        dtype: torch.dtype = torch.float32,
+        override_low_lev_controller: bool = False):
+
+        self._render_step_counter = 0
+
+        super().__init__(name=name,
+            robot_names=robot_names,
+            robot_urdf_paths=robot_urdf_paths,
+            robot_srdf_paths=robot_srdf_paths,
+            jnt_imp_config_paths=jnt_imp_config_paths,
+            n_contacts=n_contacts,
+            cluster_dt=cluster_dt,
+            use_remote_stepping=use_remote_stepping,
+            num_envs=num_envs,
+            debug=debug,
+            verbose=verbose,
+            vlevel=vlevel,
+            n_init_step=n_init_step,
+            timeout_ms=timeout_ms,
+            env_opts=env_opts,
+            use_gpu=use_gpu,
+            dtype=dtype,
+            override_low_lev_controller=override_low_lev_controller)
+
+    def is_running(self):
+        return self._simulation_app.is_running()
+
+    def _pre_setup(self):
+        self._backend="torch"
+        enable_livestream = self._env_opts["enable_livestream"]
+        enable_viewport = self._env_opts["enable_viewport"]
+        base_isaac_exp = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.kit'
+        base_isaac_exp_headless = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.headless.kit'
+
+        experience=base_isaac_exp
+        if self._env_opts["headless"]:
+            info = f"Will run in headless mode."
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                info,
+                LogType.STAT,
+                throw_when_excep = True)
+            if enable_livestream:
+                experience = ""
+            elif enable_viewport:
+                exception = f"Using viewport is not supported yet."
+                Journal.log(self.__class__.__name__,
+                    "__init__",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = True)
+            else:
+                experience=base_isaac_exp_headless
+
+        self._simulation_app = SimulationApp({"headless": self._env_opts["headless"]}, 
+                                            experience=experience)
+        # all imports depending on isaac sim kits have to be done after simulationapp
+        # from omni.isaac.core.tasks.base_task import BaseTask
+        self._import_isaac_pkgs()
+        info = "Using IsaacSim experience file @ " + experience
+        Journal.log(self.__class__.__name__,
+            "__init__",
+            info,
+            LogType.STAT,
+            throw_when_excep = True)
+        # carb.settings.get_settings().set("/persistent/omnihydra/useSceneGraphInstancing", True)
+
+        if enable_livestream:
+            info = "Livestream enabled"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                info,
+                LogType.STAT,
+                throw_when_excep = True)
+            
+            self._simulation_app.set_setting("/app/livestream/enabled", True)
+            self._simulation_app.set_setting("/app/window/drawMouse", True)
+            self._simulation_app.set_setting("/app/livestream/proto", "ws")
+            self._simulation_app.set_setting("/app/livestream/websocket/framerate_limit", 120)
+            self._simulation_app.set_setting("/ngx/enabled", False)
+            enable_extension("omni.kit.livestream.native")
+            enable_extension("omni.services.streaming.manager")
+        self._render = ((not self._env_opts["headless"]) or self._env_opts["render_to_file"]) or enable_livestream or enable_viewport
+
+        self._record = False
+        self._world = None
+        self._physics_context = None
+        self._scene = None
+        self._task = None
+        self._metadata = None    
+
+        self._robots_art_views = {}
+        self._blink_rigid_prim_views = {}
+        self._robots_articulations = {}
+        self._robots_geom_prim_views = {}
+        self.omni_contact_sensors = {}
+
+        self._solver_position_iteration_count=self._env_opts["solver_position_iteration_count"]
+        self._solver_velocity_iteration_count=self._env_opts["solver_velocity_iteration_count"]
+        self._solver_stabilization_thresh=self._env_opts["stabilization_threshold"]
+        self._solver_position_iteration_counts={}
+        self._solver_velocity_iteration_counts={}
+        self._solver_stabilization_threshs={}
+        self._robot_bodynames={}
+        self._robot_n_links={}
+        self._robot_n_dofs={}
+        self._robot_dof_names={}
+        self._distr_offset={} # decribed how robots within each env are distributed
+        self._spawning_radius=self._env_opts["spawning_radius"] # [m] -> default distance between roots of robots in a single 
+        self._height_sensors={}
+        self._height_imgs={}
+        self._height_vis={}
+        self._height_vis_step={}
+        self._height_vis_step={}
+        self._height_vis={}
+        self._terrain_hit_margin = 1.0
+        self._terrain_hit_log_period = 1000
+        self._terrain_hit_active = {}
+        self._terrain_hit_counts = {}
+        self._terrain_hit_counts_last_logged = {}
+        for robot_name in self._robot_names:
+            self._terrain_hit_active[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.bool)
+            self._terrain_hit_counts[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.int32)
+            self._terrain_hit_counts_last_logged[robot_name] = None
+        
+    def _import_isaac_pkgs(self):
+        # we use global, so that we can create the simulation app inside (and so
+        # access Isaac's kit) and also expose to all methods the imports
+        global World, omni_kit, get_context, UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools, UsdShade, Vt
+        global enable_extension, set_camera_view, _urdf, move_prim, GridCloner, prim_utils
+        global get_current_stage, Scene, ArticulationView, RigidPrimView, rep
+        global OmniContactSensors, RlTerrains,OmniJntImpCntrl
+        global PhysxSchema, UsdGeom
+        global _sensor, _dynamic_control
+        global get_prim_at_path
+
+        from pxr import PhysxSchema, UsdGeom, UsdShade, Vt
+
+        from omni.isaac.core.world import World
+        from omni.usd import get_context
+        from pxr import UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools
+        from omni.isaac.core.utils.extensions import enable_extension
+        from omni.isaac.core.utils.viewports import set_camera_view
+        import omni.kit as omni_kit
+        from omni.importer.urdf import _urdf
+        from omni.isaac.core.utils.prims import move_prim
+        from omni.isaac.cloner import GridCloner
+        import omni.isaac.core.utils.prims as prim_utils
+        from omni.isaac.core.utils.stage import get_current_stage
+        from omni.isaac.core.scenes.scene import Scene
+        from omni.isaac.core.articulations import ArticulationView
+        from omni.isaac.core.prims import RigidPrimView
+
+        import omni.replicator.core as rep
+
+        from omni.isaac.core.utils.prims import get_prim_at_path
+
+        from omni.isaac.sensor import _sensor
+
+        from omni.isaac.dynamic_control import _dynamic_control
+
+        from aug_mpc_envs.utils.contact_sensor import OmniContactSensors
+        from aug_mpc_envs.utils.omni_jnt_imp_cntrl import OmniJntImpCntrl
+        from aug_mpc_envs.utils.terrains import RlTerrains
+
+    def _parse_env_opts(self):
+        isaac_opts={}
+        isaac_opts["envs_ns"]="/World/envs"
+        isaac_opts["template_env_ns"]=isaac_opts["envs_ns"] + "/env_0"
+        isaac_opts["base_linkname"]="base_link"
+        isaac_opts["deduce_base_link"]=False
+        isaac_opts["ground_plane_prim_path"]="/World/terrain"
+        isaac_opts["physics_prim_path"]="/physicsScene"
+        isaac_opts["use_gpu"]=True
+        isaac_opts["use_gpu_pipeline"]=True
+        isaac_opts["device"]="cuda"
+        isaac_opts["is_fixed_base"]=False
+        isaac_opts["merge_fixed_jnts"]=True
+        isaac_opts["self_collide"]=True
+        isaac_opts["sim_device"]="cuda" if isaac_opts["use_gpu"] else "cpu"
+        isaac_opts["physics_dt"]=1e-3
+        isaac_opts["gravity"] = np.array([0.0, 0.0, -9.81])
+        isaac_opts["use_fabric"]=True# Enable/disable reading of physics buffers directly. Default is True.
+        isaac_opts["replicate_physics"]=True
+        # isaac_opts["worker_thread_count"]=4
+        isaac_opts["solver_type"]=1 # 0: PGS, 1:TGS, defaults to TGS. PGS faster but TGS more stable
+        isaac_opts["enable_stabilization"]=True
+        # isaac_opts["bounce_threshold_velocity"] = 0.2
+        # isaac_opts["friction_offset_threshold"] = 0.04
+        # isaac_opts["friction_correlation_distance"] = 0.025
+        # isaac_opts["enable_sleeping"] = True
+        # Per-actor settings ( can override in actor_options )
+        isaac_opts["solver_position_iteration_count"] = 4 # defaults to 4
+        isaac_opts["solver_velocity_iteration_count"] = 3 # defaults to 1
+        isaac_opts["sleep_threshold"] = 1e-5 # Mass-normalized kinetic energy threshold below which an actor may go to sleep.
+        # Allowed range [0, max_float).
+        isaac_opts["stabilization_threshold"] = 1e-4
+        # Per-body settings ( can override in actor_options )
+        # isaac_opts["enable_gyroscopic_forces"] = True
+        # isaac_opts["density"] = 1000 # density to be used for bodies that do not specify mass or density
+        # isaac_opts["max_depenetration_velocity"] = 100.0
+        # isaac_opts["solver_velocity_iteration_count"] = 1
+        # GPU buffers settings
+        isaac_opts["gpu_max_rigid_contact_count"] = 512 * 2048
+        isaac_opts["gpu_max_rigid_patch_count"] = 80 * 2048
+        isaac_opts["gpu_found_lost_pairs_capacity"] = 102400
+        isaac_opts["gpu_found_lost_aggregate_pairs_capacity"] = 102400
+        isaac_opts["gpu_total_aggregate_pairs_capacity"] = 102400
+        # isaac_opts["gpu_max_soft_body_contacts"] = 1024 * 1024
+        # isaac_opts["gpu_max_particle_contacts"] = 1024 * 1024
+        # isaac_opts["gpu_heap_capacity"] = 64 * 1024 * 1024
+        # isaac_opts["gpu_temp_buffer_capacity"] = 16 * 1024 * 1024
+        # isaac_opts["gpu_max_num_partitions"] = 8
+
+        isaac_opts["env_spacing"]=15.0
+        isaac_opts["spawning_height"]=1.0
+        isaac_opts["spawning_radius"]=1.0
+        isaac_opts["spawn_height_check_half_extent"]=0.2
+        isaac_opts["spawn_height_cushion"]=0.03
+
+        isaac_opts["enable_height_vis"]=False
+        isaac_opts["height_vis_radius"]=0.03
+        isaac_opts["height_vis_update_period"]=1
+        isaac_opts["collision_refinement_level"]=3  # increase cylinder tesselation for smoother wheel contacts
+            
+        # rendering helpers
+        isaac_opts["render_to_file"]=False
+        isaac_opts["use_follow_camera"]=False # if True, follow robot during rendering in human mode
+        isaac_opts["render_follow_env_idx"]=0
+        isaac_opts["render_follow_robot_idx"]=0
+        isaac_opts["render_follow_offset"]=[-0.2, 3.0, 0.1]  
+        isaac_opts["render_follow_target_offset"]=[-0.2, -1.0, 0.0]
+        isaac_opts["rendering_dt"]=15*isaac_opts["physics_dt"]
+        isaac_opts["camera_prim_path"]="/OmniverseKit_Persp"
+        isaac_opts["render_resolution"]=[1280, 720] # [1024, 576]
+
+        isaac_opts["render_panoramic_cam"]=True
+        isaac_opts["render_panoramic_cam_height"]=2.0
+        isaac_opts["render_panoramic_cam_target_xy"]=[10.0, 14.]
+        isaac_opts["render_panoramic_cam_target_z"]=1.2
+
+        # ground opts
+        isaac_opts["use_flat_ground"]=True
+        isaac_opts["static_friction"]=0.5
+        isaac_opts["dynamic_friction"]=0.5
+        isaac_opts["restitution"]=0.1
+        isaac_opts["ground_type"]="random"
+        isaac_opts["ground_size"]=800
+        isaac_opts["terrain_border"]=isaac_opts["ground_size"]/2
+        isaac_opts["dh_ground"]=0.03
+        isaac_opts["step_height_lb"]=0.08
+        isaac_opts["step_height_ub"]=0.15
+        isaac_opts["step_width_lb"]=0.6
+        isaac_opts["step_width_ub"]= 1.5
+        isaac_opts["contact_prims"] = []
+        isaac_opts["sensor_radii"] = 0.1
+        isaac_opts["contact_offsets"] = {}
+        
+        isaac_opts["enable_livestream"] = False
+        isaac_opts["enable_viewport"] = False
+
+        isaac_opts["use_diff_vels"] = False
+
+        # random perturbations (impulse, durations, directions are sampled uniformly, force/torque computed accordinly)
+        isaac_opts["use_random_pertub"]=False
+        isaac_opts["pert_planar_only"]=True # if True, linear pushes only in xy plane and no torques
+
+        isaac_opts["pert_wrenches_rate"]=10.0 # on average 1 pert every pert_wrenches_rate seconds
+        isaac_opts["pert_wrenches_min_duration"]=0.6
+        isaac_opts["pert_wrenches_max_duration"]=3.5 # [s]
+        isaac_opts["pert_force_max_weight_scale"]=1.2 # clip force norm to scale*weight
+        isaac_opts["pert_force_min_weight_scale"]=0.2 # optional min force norm as scale*weight
+        isaac_opts["pert_torque_max_weight_scale"]=1.0 # clip torque norm to scale*weight*max_ang_impulse_lever
+        
+        isaac_opts["pert_target_delta_v"]=2.0 # [m/s] desired max impulse = m*delta_v
+        isaac_opts["det_pert_rate"]=True
+
+        # max impulse (unitless scale multiplied by weight to get N*s): delta_v/g
+        isaac_opts["max_lin_impulse_norm"]=isaac_opts["pert_target_delta_v"]/9.81
+        isaac_opts["lin_impulse_mag_min"]=1.0 # [0, 1] -> min fraction of max_lin_impulse_norm when sampling
+        isaac_opts["lin_impulse_mag_max"]=1.0 # [0, 1] -> max fraction of max_lin_impulse_norm when sampling
+
+        isaac_opts["max_ang_impulse_lever"]=0.2 # [m]
+        isaac_opts["max_ang_impulse_norm"]=isaac_opts["max_lin_impulse_norm"]*isaac_opts["max_ang_impulse_lever"]
+        isaac_opts["terrain_hit_log_period"]=1000
+                
+        # opts definition end
+
+        isaac_opts.update(self._env_opts) # update defaults with provided opts
+        isaac_opts["rendering_freq"]=int(isaac_opts["rendering_dt"]/isaac_opts["physics_dt"])
+        # isaac_opts["rendering_dt"]=isaac_opts["physics_dt"] # forcing rendering_dt==physics_dt
+        # for some mystic reason simulation is infuenced by the rendering dt (why ??????)
+        
+        # modify things
+        isaac_opts["cloning_offset"] = np.array([[0.0, 0.0, isaac_opts["spawning_height"]]]*self._num_envs)
+        if not isaac_opts["use_gpu"]: # don't use GPU at all
+            isaac_opts["use_gpu_pipeline"]=False
+            isaac_opts["device"]="cpu"
+            isaac_opts["sim_device"]="cpu"
+        else: # use GPU
+            if isaac_opts["use_gpu_pipeline"]:
+                isaac_opts["device"]="cuda"
+                isaac_opts["sim_device"]="cuda"
+            else: # cpu pipeline
+                isaac_opts["device"]="cpu"
+                isaac_opts["sim_device"]="cpu"
+                isaac_opts["use_gpu"]=False
+        # isaac_opts["sim_device"]=isaac_opts["device"]
+        # overwrite env opts in case some sim params were missing
+        self._env_opts=isaac_opts
+
+        # update device flag based on sim opts
+        self._device=isaac_opts["device"]
+        self._use_gpu=isaac_opts["use_gpu"]
+
+    def _calc_robot_distrib(self):
+
+        import math
+        # we distribute robots in a single env. along the 
+        # circumference of a circle of given radius
+        n_robots = len(self._robot_names)
+        offset_baseangle = 2 * math.pi / n_robots
+        for i in range(n_robots):
+            offset_angle = offset_baseangle * (i + 1) 
+            robot_offset_wrt_center = torch.tensor([self._spawning_radius * math.cos(offset_angle), 
+                                            self._spawning_radius * math.sin(offset_angle), 0], 
+                    device=self._device, 
+                    dtype=self._dtype)
+            # list with n references to the original tensor
+            tensor_list = [robot_offset_wrt_center] * self._num_envs
+            self._distr_offset[self._robot_names[i]] = torch.stack(tensor_list, dim=0)
+
+    def _init_world(self):
+
+        self._cloner = GridCloner(spacing=self._env_opts["env_spacing"])
+        self._cloner.define_base_env(self._env_opts["envs_ns"])
+        prim_utils.define_prim(self._env_opts["template_env_ns"])
+        self._envs_prim_paths = self._cloner.generate_paths(self._env_opts["envs_ns"] + "/env", 
+                                                self._num_envs)
+                
+        # parse device based on sim_param settings
+        
+        info = "Using sim device: " + str(self._env_opts["sim_device"])
+        Journal.log(self.__class__.__name__,
+            "__init__",
+            info,
+            LogType.STAT,
+            throw_when_excep = True)
+        
+        self._world = World(
+            physics_dt=self._env_opts["physics_dt"], 
+            rendering_dt=self._env_opts["physics_dt"],  # == physics dt (rendering is actually done manually by this env)
+            backend=self._backend,
+            device=str(self._env_opts["sim_device"]),
+            physics_prim_path=self._env_opts["physics_prim_path"], 
+            set_defaults = False, # set to True to use the defaults settings [physics_dt = 1.0/ 60.0, 
+            # stage units in meters = 0.01 (i.e in cms), rendering_dt = 1.0 / 60.0, gravity = -9.81 m / s 
+            # ccd_enabled, stabilization_enabled, gpu dynamics turned off, 
+            # broadcast type is MBP, solver type is TGS]
+            sim_params=self._env_opts
+        )
+
+        big_info = "[World] Creating Isaac simulation " + self._name + "\n" + \
+            "use_gpu_pipeline: " + str(self._env_opts["use_gpu_pipeline"]) + "\n" + \
+            "device: " + str(self._env_opts["sim_device"]) + "\n" +\
+            "backend: " + str(self._backend) + "\n" +\
+            "integration_dt: " + str(self.physics_dt()) + "\n" + \
+            "rendering_dt: " + str(self.rendering_dt()) + "\n" 
+        Journal.log(self.__class__.__name__,
+            "_init_world",
+            big_info,
+            LogType.STAT,
+            throw_when_excep = True)
+        
+        # we get the physics context to expose additional low-level ##
+        # settings of the simulation
+        self._physics_context = self._world.get_physics_context() 
+        self._physics_scene_path = self._physics_context.prim_path
+        # self._physics_context.enable_gpu_dynamics(True)
+        self._physics_context.enable_stablization(True)
+        self._physics_scene_prim = self._physics_context.get_current_physics_scene_prim()
+        self._solver_type = self._physics_context.get_solver_type()
+
+        if "gpu_max_rigid_contact_count" in self._env_opts:
+                self._physics_context.set_gpu_max_rigid_contact_count(self._env_opts["gpu_max_rigid_contact_count"])
+        if "gpu_max_rigid_patch_count" in self._env_opts:
+                self._physics_context.set_gpu_max_rigid_patch_count(self._env_opts["gpu_max_rigid_patch_count"])
+        if "gpu_found_lost_pairs_capacity" in self._env_opts:
+                self._physics_context.set_gpu_found_lost_pairs_capacity(self._env_opts["gpu_found_lost_pairs_capacity"])
+        if "gpu_found_lost_aggregate_pairs_capacity" in self._env_opts:
+                self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(self._env_opts["gpu_found_lost_aggregate_pairs_capacity"])
+        if "gpu_total_aggregate_pairs_capacity" in self._env_opts:
+                self._physics_context.set_gpu_total_aggregate_pairs_capacity(self._env_opts["gpu_total_aggregate_pairs_capacity"])
+        if "gpu_max_soft_body_contacts" in self._env_opts:
+                self._physics_context.set_gpu_max_soft_body_contacts(self._env_opts["gpu_max_soft_body_contacts"])
+        if "gpu_max_particle_contacts" in self._env_opts:
+                self._physics_context.set_gpu_max_particle_contacts(self._env_opts["gpu_max_particle_contacts"])
+        if "gpu_heap_capacity" in self._env_opts:
+                self._physics_context.set_gpu_heap_capacity(self._env_opts["gpu_heap_capacity"])
+        if "gpu_temp_buffer_capacity" in self._env_opts:
+                self._physics_context.set_gpu_temp_buffer_capacity(self._env_opts["gpu_temp_buffer_capacity"])
+        if "gpu_max_num_partitions" in self._env_opts:
+                self._physics_context.set_gpu_max_num_partitions(self._env_opts["gpu_max_num_partitions"])
+
+        # overwriting defaults
+        # self._physics_context.set_gpu_max_rigid_contact_count(2 * self._physics_context.get_gpu_max_rigid_contact_count())
+        # self._physics_context.set_gpu_max_rigid_patch_count(2 * self._physics_context.get_gpu_max_rigid_patch_count())
+        # self._physics_context.set_gpu_found_lost_pairs_capacity(2 * self._physics_context.get_gpu_found_lost_pairs_capacity())
+        # self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity())
+        # self._physics_context.set_gpu_total_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_total_aggregate_pairs_capacity())
+        # self._physics_context.set_gpu_heap_capacity(2 * self._physics_context.get_gpu_heap_capacity())
+        # self._physics_context.set_gpu_temp_buffer_capacity(20 * self._physics_context.get_gpu_heap_capacity())
+        # self._physics_context.set_gpu_max_num_partitions(20 * self._physics_context.get_gpu_temp_buffer_capacity())
+
+        # GPU buffers
+        self._gpu_max_rigid_contact_count = self._physics_context.get_gpu_max_rigid_contact_count()
+        self._gpu_max_rigid_patch_count = self._physics_context.get_gpu_max_rigid_patch_count()
+        self._gpu_found_lost_pairs_capacity = self._physics_context.get_gpu_found_lost_pairs_capacity()
+        self._gpu_found_lost_aggregate_pairs_capacity = self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity()
+        self._gpu_total_aggregate_pairs_capacity = self._physics_context.get_gpu_total_aggregate_pairs_capacity()
+        self._gpu_max_soft_body_contacts = self._physics_context.get_gpu_max_soft_body_contacts()
+        self._gpu_max_particle_contacts = self._physics_context.get_gpu_max_particle_contacts()
+        self._gpu_heap_capacity = self._physics_context.get_gpu_heap_capacity()
+        self._gpu_temp_buffer_capacity = self._physics_context.get_gpu_temp_buffer_capacity()
+        # self._gpu_max_num_partitions = physics_context.get_gpu_max_num_partitions() # BROKEN->method does not exist
+
+        big_info2 = "[physics context]:" + "\n" + \
+            "gpu_max_rigid_contact_count: " + str(self._gpu_max_rigid_contact_count) + "\n" + \
+            "gpu_max_rigid_patch_count: " + str(self._gpu_max_rigid_patch_count) + "\n" + \
+            "gpu_found_lost_pairs_capacity: " + str(self._gpu_found_lost_pairs_capacity) + "\n" + \
+            "gpu_found_lost_aggregate_pairs_capacity: " + str(self._gpu_found_lost_aggregate_pairs_capacity) + "\n" + \
+            "gpu_total_aggregate_pairs_capacity: " + str(self._gpu_total_aggregate_pairs_capacity) + "\n" + \
+            "gpu_max_soft_body_contacts: " + str(self._gpu_max_soft_body_contacts) + "\n" + \
+            "gpu_max_particle_contacts: " + str(self._gpu_max_particle_contacts) + "\n" + \
+            "gpu_heap_capacity: " + str(self._gpu_heap_capacity) + "\n" + \
+            "gpu_temp_buffer_capacity: " + str(self._gpu_temp_buffer_capacity) + "\n" + \
+            "use_gpu_sim: " + str(self._world.get_physics_context().use_gpu_sim) + "\n" + \
+            "use_gpu_pipeline: " + str(self._world.get_physics_context().use_gpu_pipeline) + "\n" + \
+            "use_fabric: " + str(self._world.get_physics_context().use_fabric) + "\n" + \
+            "world device: " + str(self._world.get_physics_context().device) + "\n" + \
+            "physics context device: " + str(self._world.get_physics_context().device) + "\n" 
+
+        Journal.log(self.__class__.__name__,
+            "set_task",
+            big_info2,
+            LogType.STAT,
+            throw_when_excep = True)
+
+        self._scene = self._world.scene
+        self._physics_context = self._world.get_physics_context()
+
+        self._stage = get_context().get_stage()
+
+        # strong, uniform lighting: bright sun + dome fill to cover the whole terrain
+        prim_utils.define_prim("/World/Lighting", "Xform")
+        sun_path = "/World/Lighting/SunLight"
+        dome_path = "/World/Lighting/AmbientDome"
+
+        distantLight = UsdLux.DistantLight.Define(self._stage, Sdf.Path(sun_path))
+        distantLight.CreateIntensityAttr(450.0)
+        distantLight.CreateAngleAttr(0.5)  # soften shadows a bit
+        distantLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0))
+        # Shadow attr naming differs across versions; set the underlying USD attribute directly.
+        distantLight.GetPrim().CreateAttribute("shadow:enable", Sdf.ValueTypeNames.Bool).Set(True)
+
+        domeLight = UsdLux.DomeLight.Define(self._stage, Sdf.Path(dome_path))
+        domeLight.CreateIntensityAttr(200.0)
+        domeLight.CreateExposureAttr(1.0)
+        domeLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0))
+
+        self._configure_scene()
+
+        # if "enable_viewport" in sim_params:
+        #     self._render = sim_params["enable_viewport"]
+
+    def _get_baselink_candidate(self, 
+            robot_name: str):
+        
+        stage=get_current_stage()
+        all_prims = [prim.GetPath().pathString for prim in stage.Traverse()]
+        filtered_prims = [prim for prim in all_prims if f"/{robot_name}/" in prim]
+
+        matching=min(filtered_prims, key=len) if filtered_prims else None
+
+        return matching.split('/')[-1]
+
+    def _configure_scene(self):
+            
+        # environment 
+        self._fix_base = [self._env_opts["is_fixed_base"]] * len(self._robot_names)
+        self._self_collide = [self._env_opts["self_collide"]]  * len(self._robot_names)
+        self._merge_fixed = [self._env_opts["merge_fixed_jnts"]] * len(self._robot_names)
+        
+        Journal.log(self.__class__.__name__,
+            "_configure_scene",
+            "cloning environments...",
+            LogType.STAT,
+            throw_when_excep = True)
+        
+        self._ground_plane_prim_paths=[]
+        self._ground_plane=None
+        self.terrain_generator = None
+        if not self._env_opts["use_flat_ground"]:
+            # ensure terrain is large enough to contain all env clones
+            spacing = float(self._env_opts["env_spacing"])
+            num_envs = self._num_envs
+            num_per_row = max(1, int(math.sqrt(num_envs)))
+            num_rows = int(math.ceil(num_envs / num_per_row))
+            num_cols = int(math.ceil(num_envs / num_rows))
+            row_offset = 0.5 * spacing * (num_rows - 1)
+            col_offset = 0.5 * spacing * (num_cols - 1)
+            margin = spacing  # leave a full spacing as border cushion
+            required_size = 2.0 * (max(row_offset, col_offset) + margin)
+
+            if required_size > self._env_opts["ground_size"]:
+                old_size = self._env_opts["ground_size"]
+                self._env_opts["ground_size"] = required_size
+                self._env_opts["terrain_border"] = self._env_opts["ground_size"] / 2.0
+                Journal.log(self.__class__.__name__,
+                            "_configure_scene",
+                            f"Ground size increased from {old_size} to {required_size} to fit {num_envs} envs (spacing {spacing}).",
+                            LogType.WARN,
+                            throw_when_excep = True)
+
+            min_height=-self._env_opts["dh_ground"]
+            max_height=self._env_opts["dh_ground"]
+            step=max_height-min_height
+            if self._env_opts["ground_type"]=="random":
+                terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif"
+                self._ground_plane_prim_paths.append(terrain_prim_path)
+                self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+                self._ground_plane=self.terrain_generator.create_random_uniform_terrain(terrain_size=self._env_opts["ground_size"], 
+                    min_height=min_height,
+                    max_height=max_height,
+                    step=step,
+                    position=np.array([0.0, 0.0,0.0]), 
+                    static_friction=self._env_opts["static_friction"], 
+                    dynamic_friction=self._env_opts["dynamic_friction"], 
+                    restitution=self._env_opts["restitution"])
+            elif self._env_opts["ground_type"]=="random_patches":
+                terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif_patches"
+                self._ground_plane_prim_paths.append(terrain_prim_path)
+                self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+                self._ground_plane=self.terrain_generator.create_random_patched_terrain(terrain_size=self._env_opts["ground_size"], 
+                    min_height=min_height,
+                    max_height=max_height,
+                    step=step,
+                    position=np.array([0.0, 0.0,0.0]), 
+                    static_friction=self._env_opts["static_friction"], 
+                    dynamic_friction=self._env_opts["dynamic_friction"], 
+                    restitution=self._env_opts["restitution"],
+                    patch_ratio=0.8,
+                    patch_size=10
+                    )
+            elif self._env_opts["ground_type"]=="slopes":
+                terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_slopes"
+                self._ground_plane_prim_paths.append(terrain_prim_path)
+                self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+                self._ground_plane=self.terrain_generator.create_sloped_terrain(terrain_size=self._env_opts["ground_size"], 
+                    slope=-0.5,
+                    position=np.array([0.0, 0.0,0.0]), 
+                    static_friction=self._env_opts["static_friction"], 
+                    dynamic_friction=self._env_opts["dynamic_friction"], 
+                    restitution=self._env_opts["restitution"]
+                    )
+            elif self._env_opts["ground_type"]=="stairs":
+                terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stairs"
+                self._ground_plane_prim_paths.append(terrain_prim_path)
+                self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+                self._ground_plane=self.terrain_generator.create_stairs_terrain(terrain_size=self._env_opts["ground_size"],      
+                    position=np.array([0.0, 0.0,0.0]), 
+                    static_friction=self._env_opts["static_friction"], 
+                    dynamic_friction=self._env_opts["dynamic_friction"], 
+                    restitution=self._env_opts["restitution"],
+                    )
+            elif self._env_opts["ground_type"]=="stepup":
+                terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup"
+                self._ground_plane_prim_paths.append(terrain_prim_path)
+                self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+                self._ground_plane=self.terrain_generator.create_stepup_terrain(
+                    terrain_size=self._env_opts["ground_size"], 
+                    stairs_ratio=0.3,
+                    min_steps=1,
+                    max_steps=1,
+                    pyramid_platform_size=15.0,
+                    position=np.array([0.0, 0.0, 0.0]), 
+                    static_friction=self._env_opts["static_friction"], 
+                    dynamic_friction=self._env_opts["dynamic_friction"], 
+                    restitution=self._env_opts["restitution"],
+                    step_height=0.15
+                    )
+            elif self._env_opts["ground_type"]=="stepup_prim":
+                terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup_prim"
+                self._ground_plane_prim_paths.append(terrain_prim_path)
+                self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path)
+                self._ground_plane=self.terrain_generator.create_stepup_prim_terrain(
+                    terrain_size=self._env_opts["ground_size"], 
+                    stairs_ratio=0.9,
+                    platform_size=50.0,
+                    step_height_lb=self._env_opts["step_height_lb"],
+                    step_height_ub=self._env_opts["step_height_ub"],
+                    min_step_width=self._env_opts.get("step_width_lb", None),
+                    max_step_width=self._env_opts.get("step_width_ub", None),
+                    position=np.array([0.0, 0.0, 0.0]), 
+                    static_friction=self._env_opts["static_friction"], 
+                    dynamic_friction=self._env_opts["dynamic_friction"], 
+                    restitution=self._env_opts["restitution"],
+                    n_steps=25,
+                    area_factor=0.7,
+                    random_n_steps=False
+                    )
+                # apply a custom  checker material to the terrain primitives
+                mat_path = self._ensure_lightblue_checker_material()
+                self._apply_checker_material_to_terrain(terrain_root_path=terrain_prim_path, material_path=mat_path)
+                self._add_checker_overlay_plane(terrain_root_path=terrain_prim_path, material_path=mat_path)
+                self._add_checker_overlays_on_tiles(terrain_root_path=terrain_prim_path, material_path=mat_path)
+            else:
+                ground_type=self._env_opts["ground_type"]
+                Journal.log(self.__class__.__name__,
+                    "_configure_scene",
+                    f"Terrain type {ground_type} not supported. Will default to flat ground.",
+                    LogType.EXCEP,
+                    throw_when_excep = True)
+                
+            # add offsets to intial height depending on the terrain heightmap
+            if self.terrain_generator is not None:
+                stage = get_current_stage()
+                up_axis = UsdGeom.GetStageUpAxis(stage)
+
+                spacing = self._env_opts["env_spacing"]
+                num_envs = self._num_envs
+
+                num_per_row = max(1, int(np.sqrt(num_envs)))
+                num_rows = int(np.ceil(num_envs / num_per_row))
+                num_cols = int(np.ceil(num_envs / num_rows))
+
+                row_offset = 0.5 * spacing * (num_rows - 1)
+                col_offset = 0.5 * spacing * (num_cols - 1)
+
+                offsets = np.array(self._env_opts["cloning_offset"], dtype=float)
+
+                for env_idx in range(num_envs):
+                    row = env_idx // num_cols
+                    col = env_idx % num_cols
+                    x = row_offset - row * spacing
+                    y = col * spacing - col_offset
+
+                    half_extent = self._env_opts["spawn_height_check_half_extent"]
+                    if up_axis == UsdGeom.Tokens.z:
+                        height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent)
+                    else:
+                        height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent)
+
+                    offsets[env_idx][2] += height + self._env_opts["spawn_height_cushion"]
+
+                self._env_opts["cloning_offset"] = offsets
+
+        else:
+            defaul_prim_path=self._env_opts["ground_plane_prim_path"]+"_default"
+            self._ground_plane_prim_paths.append(defaul_prim_path)
+            self._ground_plane=self._scene.add_default_ground_plane(z_position=0, 
+                name="terrain", 
+                prim_path=defaul_prim_path, 
+                static_friction=self._env_opts["static_friction"], 
+                dynamic_friction=self._env_opts["dynamic_friction"], 
+                restitution=self._env_opts["restitution"])
+            self._terrain_collision=None
+            
+            physics_material=UsdPhysics.MaterialAPI.Apply(self._ground_plane.prim)
+            physics_material.CreateDynamicFrictionAttr(self._env_opts["dynamic_friction"])
+            physics_material.CreateStaticFrictionAttr(self._env_opts["static_friction"])
+            physics_material.CreateRestitutionAttr(self._env_opts["restitution"])
+            # self._ground_plane.apply_physics_material(physics_material)
+
+        self._terrain_collision=PhysxSchema.PhysxCollisionAPI.Get(get_current_stage(), self._ground_plane.prim_path)
+        self._terrain_material=UsdPhysics.MaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path)
+        self._terrain_physix_material=PhysxSchema.PhysxMaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path)
+
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+                
+            urdf_path = self._robot_urdf_paths[robot_name]
+            srdf_path = self._robot_srdf_paths[robot_name]
+            fix_base = self._fix_base[i]
+            self_collide = self._self_collide[i]
+            merge_fixed = self._merge_fixed[i]
+            
+            self._generate_rob_descriptions(robot_name=robot_name, 
+                                    urdf_path=urdf_path,
+                                    srdf_path=srdf_path)
+            self._import_urdf(robot_name, 
+                            fix_base=fix_base, 
+                            self_collide=self_collide, 
+                            merge_fixed=merge_fixed)
+            
+            self._cloner.clone(
+                source_prim_path=self._env_opts["template_env_ns"],
+                prim_paths=self._envs_prim_paths,
+                replicate_physics=self._env_opts["replicate_physics"],
+                position_offsets=self._env_opts["cloning_offset"]
+            ) # we can clone the environment in which all the robos are
+
+            base_link_name=self._env_opts["base_linkname"]
+            if self._env_opts["deduce_base_link"]:
+                base_link_name=self._get_baselink_candidate(robot_name=robot_name)
+
+            self._robots_art_views[robot_name] = ArticulationView(name = robot_name + "ArtView",
+                                                        prim_paths_expr = self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name, 
+                                                        reset_xform_properties=False)
+            self._robots_articulations[robot_name] = self._scene.add(self._robots_art_views[robot_name])
+
+            # height grid sensor (terrain may be None if using flat ground)
+            self._height_sensors[robot_name] = HeightGridSensor(
+                terrain_utils=self.terrain_generator if not self._env_opts["use_flat_ground"] else None,
+                grid_size=int(self._env_opts["height_sensor_pixels"]),
+                resolution=float(self._env_opts["height_sensor_resolution"]),
+                forward_offset=float(self._env_opts["height_sensor_forward_offset"]),
+                lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]),
+                n_envs=self._num_envs,
+                device=self._device,
+                dtype=self._dtype)
+            # ensure shared-data flags are set if a height sensor is active
+            self._env_opts["height_sensor_pixels"] = int(self._env_opts["height_sensor_pixels"])
+            self._env_opts["height_sensor_resolution"] = float(self._env_opts["height_sensor_resolution"])
+            self._enable_height_shared = True
+            self._height_sensor_pixels = self._env_opts["height_sensor_pixels"]
+            self._height_sensor_resolution = self._env_opts["height_sensor_resolution"]
+            self._height_vis_step[robot_name] = 0
+            if self._env_opts["enable_height_vis"]:
+                self._height_vis[robot_name] = HeightGridVisualizer(
+                    robot_name=robot_name,
+                    num_envs=self._num_envs,
+                    grid_size=int(self._env_opts["height_sensor_pixels"]),
+                    resolution=float(self._env_opts["height_sensor_resolution"]),
+                    marker_radius=float(self._env_opts["height_vis_radius"]),
+                    forward_offset=float(self._env_opts["height_sensor_forward_offset"]),
+                    lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]),
+                    device=self._device,
+                    dtype=self._dtype)
+
+            self._blink_rigid_prim_views[robot_name] = RigidPrimView(prim_paths_expr=self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name,
+                                                    name = robot_name + "RigidPrimView") # base link prim views
+            self._scene.add(self._blink_rigid_prim_views[robot_name]) # need to add so it is properly initialized when resetting world 
+
+            # self._robots_geom_prim_views[robot_name] = GeometryPrimView(name = robot_name + "GeomView",
+            #                                                 prim_paths_expr = self._env_ns + "/env*"+ "/" + robot_name,
+            #                                                 # prepare_contact_sensors = True
+            #                                             )
+            # self._robots_geom_prim_views[robot_name].apply_collision_apis() # to be able to apply contact sensors
+
+            # init contact sensors
+            self._init_contact_sensors(robot_name=robot_name) # IMPORTANT: this has to be called
+            # after calling the clone() method and initializing articulation views!!
+
+        # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_1/collisions/mesh_1")
+        # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_2/collisions/mesh_1")
+        # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_3/collisions/mesh_1")
+        # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_4/collisions/mesh_1")
+
+        # filter collisions between default ground plane and custom terrains
+        # self._cloner.filter_collisions(physicsscene_path = self._physics_context.prim_path,
+        #     collision_root_path = "/World/terrain_collisions", 
+        #     prim_paths=[self._ground_plane_prim_paths[1]], 
+        #     global_paths=[self._ground_plane_prim_paths[0]]
+        #     )
+
+        # delete_prim(self._env_opts["ground_plane_prim_path"] + "/SphereLight") # we remove the default spherical light
+        
+        # set default camera viewport position and target
+        camera_position=[4.2, 4.2, 1.5]
+        camera_target=[0, 0, 0]
+        # use a single camera prim (configurable) for both viewport and render products
+        camera_prim = self._env_opts["camera_prim_path"]
+        self._set_initial_camera_params(camera_position=camera_position,
+                camera_target=camera_target,
+                camera_prim_path=camera_prim)
+
+        if self._env_opts["render_to_file"]:
+            # base output dir
+            from datetime import datetime
+            timestamp = datetime.now().strftime("h%H_m%M_s%S_%d_%m_%Y")
+            self._render_output_dir = f"/tmp/IsaacRenderings/{timestamp}"
+            res = tuple(int(x) for x in self._env_opts["render_resolution"])
+
+            # create render product from chosen camera prim
+            self._render_product  = rep.create.render_product(camera_prim, 
+                        res, name='rendering_camera')
+            self._render_writer = rep.WriterRegistry.get("BasicWriter")
+            self._render_writer.initialize(output_dir=self._render_output_dir, 
+                        rgb=True)
+            self._render_writer.attach([self._render_product])
+            # optional top-down capture
+            if self._env_opts["render_panoramic_cam"]:
+                td_height = float(self._env_opts["render_panoramic_cam_height"])
+                td_dir = self._render_output_dir + "/panoramic_cam"
+                td_offset = self._env_opts["render_panoramic_cam_target_xy"]
+                td_target_z = float(self._env_opts["render_panoramic_cam_target_z"])
+                pos = [8.0, 11.0, td_height]
+                self._panoramic_cam_camera = rep.create.camera(focal_length=12,
+                                name='rendering_camera_panoramic_cam',
+                                clipping_range = (1, 200),
+                                position = pos,
+                                look_at = [td_offset[0], td_offset[1], td_target_z])
+                self._panoramic_cam_render_product = rep.create.render_product(self._panoramic_cam_camera,
+                                res, name='rendering_camera_panoramic_cam')
+                self._panoramic_cam_writer = rep.WriterRegistry.get("BasicWriter")
+                self._panoramic_cam_writer.initialize(output_dir=td_dir, rgb=True)
+                self._panoramic_cam_writer.attach([self._panoramic_cam_render_product])
+
+        self.apply_collision_filters(self._physics_context.prim_path, 
+                            "/World/collisions")
+
+        self._reset_sim()
+
+        self._fill_robot_info_from_world() 
+        # initializes robot state data
+        
+        # update solver options 
+        self._update_art_solver_options() 
+        self._get_solver_info() # get again solver option before printing everything
+        self._print_envs_info() # debug print
+
+        # for n in range(self._n_init_steps): # run some initialization steps
+        #     self._step_sim()
+
+        self._init_robots_state()
+
+        self.scene_setup_completed = True
+        
+        Journal.log(self.__class__.__name__,
+            "set_up_scene",
+            "finished scene setup...",
+            LogType.STAT,
+            throw_when_excep = True)
+        
+        self._is = _sensor.acquire_imu_sensor_interface()
+        self._dyn_control=_dynamic_control.acquire_dynamic_control_interface()
+
+    def _set_contact_links_material(self, prim_path: str):
+        prim=get_prim_at_path(prim_path)
+        physics_material=UsdPhysics.MaterialAPI.Apply(prim)
+        physics_material.CreateDynamicFrictionAttr(0)
+        physics_material.CreateStaticFrictionAttr(0)
+        physics_material.CreateRestitutionAttr(1.0)
+        physxMaterialAPI=PhysxSchema.PhysxMaterialAPI.Apply(prim)
+        physxMaterialAPI.CreateFrictionCombineModeAttr().Set("multiply") # average, min, multiply, max 
+        physxMaterialAPI.CreateRestitutionCombineModeAttr().Set("multiply")
+
+    def _get_lightblue_checker_texture_path(self) -> str:
+        tex_rel_path = os.path.join(os.path.dirname(__file__), "..", "assets", "textures", "ibrido_terrain_texture.png")
+        return os.path.abspath(tex_rel_path)
+
+    def _ensure_lightblue_checker_material(self):
+        """Create (or reuse) a light-blue checker material for primitive terrains."""
+        stage = get_current_stage()
+        mat_path = "/World/Looks/IbridoCheckerMaterial"
+        mat_prim = stage.GetPrimAtPath(mat_path)
+        if mat_prim.IsValid():
+            return mat_path
+
+        texture_path = self._get_lightblue_checker_texture_path()
+
+        material = UsdShade.Material.Define(stage, mat_path)
+
+        st_reader = UsdShade.Shader.Define(stage, f"{mat_path}/PrimvarReader_st")
+        st_reader.CreateIdAttr("UsdPrimvarReader_float2")
+        st_reader.CreateInput("varname", Sdf.ValueTypeNames.Token).Set("st")
+        st_reader.CreateOutput("result", Sdf.ValueTypeNames.Float2)
+
+        uv_transform = UsdShade.Shader.Define(stage, f"{mat_path}/UVTransform")
+        uv_transform.CreateIdAttr("UsdTransform2d")
+        # keep UV scale at 1 here; tiling is controlled via mesh UVs
+        uv_transform.CreateInput("in", Sdf.ValueTypeNames.Float2).ConnectToSource(st_reader.GetOutput("result"))
+        uv_transform.CreateInput("scale", Sdf.ValueTypeNames.Float2).Set(Gf.Vec2f(1.0, 1.0))
+        uv_transform.CreateOutput("result", Sdf.ValueTypeNames.Float2)
+
+        tex = UsdShade.Shader.Define(stage, f"{mat_path}/CheckerTex")
+        tex.CreateIdAttr("UsdUVTexture")
+        tex.CreateInput("file", Sdf.ValueTypeNames.Asset).Set(Sdf.AssetPath(texture_path))
+        tex.CreateInput("st", Sdf.ValueTypeNames.Float2).ConnectToSource(uv_transform.GetOutput("result"))
+        tex.CreateInput("wrapS", Sdf.ValueTypeNames.Token).Set("repeat")
+        tex.CreateInput("wrapT", Sdf.ValueTypeNames.Token).Set("repeat")
+        tex.CreateInput("minFilter", Sdf.ValueTypeNames.Token).Set("nearest")
+        tex.CreateInput("magFilter", Sdf.ValueTypeNames.Token).Set("nearest")
+        # disable mipmaps to avoid blurring sharp edges
+        tex.CreateInput("mipFilter", Sdf.ValueTypeNames.Token).Set("nearest")
+        tex.CreateInput("enableMipMap", Sdf.ValueTypeNames.Bool).Set(False)
+        tex.CreateInput("fallback", Sdf.ValueTypeNames.Color4f).Set(Gf.Vec4f(0.69, 0.85, 1.0, 1.0))
+        tex.CreateOutput("rgb", Sdf.ValueTypeNames.Float3)
+        tex.CreateOutput("a", Sdf.ValueTypeNames.Float)
+
+        pbr = UsdShade.Shader.Define(stage, f"{mat_path}/PBRShader")
+        pbr.CreateIdAttr("UsdPreviewSurface")
+        pbr.CreateInput("diffuseColor", Sdf.ValueTypeNames.Color3f).ConnectToSource(tex.GetOutput("rgb"))
+        pbr.CreateInput("roughness", Sdf.ValueTypeNames.Float).Set(0.45)
+        pbr.CreateInput("metallic", Sdf.ValueTypeNames.Float).Set(0.0)
+        pbr.CreateOutput("surface", Sdf.ValueTypeNames.Token)
+
+        material.CreateSurfaceOutput().ConnectToSource(pbr.GetOutput("surface"))
+
+        return mat_path
+
+    def _ensure_groundplane_material(self):
+        """Guarantee a ground-plane material exists (default checker) and return its path."""
+        stage = get_current_stage()
+        mat_path = "/World/Looks/groundPlaneMaterial"
+        mat_prim = stage.GetPrimAtPath(mat_path)
+        if mat_prim.IsValid():
+            return mat_path
+
+        # create a temporary default ground plane to spawn the checker material, then delete the geom
+        tmp_gp_path = "/World/tmp_ground_for_material"
+        self._scene.add_default_ground_plane(z_position=-1000.0,
+            name="tmp_ground_for_material",
+            prim_path=tmp_gp_path,
+            static_friction=self._env_opts["static_friction"],
+            dynamic_friction=self._env_opts["dynamic_friction"],
+            restitution=self._env_opts["restitution"])
+
+        mat_prim = stage.GetPrimAtPath(mat_path)
+        prim_utils.delete_prim(tmp_gp_path)
+
+        return mat_path if mat_prim.IsValid() else None
+
+    def _apply_checker_material_to_terrain(self, terrain_root_path: str, material_path: str):
+        """Bind the checker material to all terrain prims (visual only)."""
+
+        stage = get_current_stage()
+        mat_prim = stage.GetPrimAtPath(material_path)
+        if not mat_prim.IsValid():
+            return
+        material = UsdShade.Material(mat_prim)
+        for prim in stage.Traverse():
+            path = prim.GetPath().pathString
+            if not path.startswith(terrain_root_path):
+                continue
+            binding = UsdShade.MaterialBindingAPI.Apply(prim)
+            binding.Bind(material, UsdShade.Tokens.strongerThanDescendants)
+
+    def _add_checker_overlay_plane(self, terrain_root_path: str, material_path: str):
+        """Create a thin visual-only mesh with UVs so the checker pattern shows up even on cube prims."""
+        stage = get_current_stage()
+        plane_path = f"{terrain_root_path}/visual_checker"
+        plane_prim = stage.GetPrimAtPath(plane_path)
+        if plane_prim.IsValid():
+            # if already exists, just (re)bind material
+            mat_prim = stage.GetPrimAtPath(material_path)
+            if mat_prim.IsValid():
+                UsdShade.MaterialBindingAPI.Apply(plane_prim).Bind(UsdShade.Material(mat_prim), UsdShade.Tokens.strongerThanDescendants)
+            return
+
+        # try to read base slab dimensions/position to size the overlay
+        slab_path = terrain_root_path + "_slab"
+        slab_prim = stage.GetPrimAtPath(slab_path)
+        center = Gf.Vec3f(0.0, 0.0, 0.0)
+        width = float(self._env_opts.get("ground_size", 50.0))
+        length = width
+        thickness = 0.1
+        if slab_prim.IsValid():
+            xformable = UsdGeom.Xformable(slab_prim)
+            for op in xformable.GetOrderedXformOps():
+                if op.GetOpType() == UsdGeom.XformOp.TypeTranslate:
+                    center = Gf.Vec3f(op.Get())
+                elif op.GetOpType() == UsdGeom.XformOp.TypeScale:
+                    scale = op.Get()
+                    width = float(scale[0])
+                    length = float(scale[1])
+                    thickness = float(scale[2])
+
+        half_w = 0.5 * width
+        half_l = 0.5 * length
+        z = center[2] + 0.5 * thickness + 1e-3  # slightly above the slab to avoid z-fighting
+
+        plane = UsdGeom.Mesh.Define(stage, plane_path)
+        plane.CreatePointsAttr([
+            Gf.Vec3f(center[0] - half_w, center[1] - half_l, z),
+            Gf.Vec3f(center[0] + half_w, center[1] - half_l, z),
+            Gf.Vec3f(center[0] + half_w, center[1] + half_l, z),
+            Gf.Vec3f(center[0] - half_w, center[1] + half_l, z),
+        ])
+        plane.CreateFaceVertexCountsAttr([4])
+        plane.CreateFaceVertexIndicesAttr([0, 1, 2, 3])
+        plane.CreateDoubleSidedAttr(True)
+
+        # increase tiling density; adjustable via env opt
+        uv_repeats = max(1, int((width / 10.0) * 3.0))
+        primvars = UsdGeom.PrimvarsAPI(plane)
+        st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex)
+        st.Set(Vt.Vec2fArray([
+            Gf.Vec2f(0.0, 0.0),
+            Gf.Vec2f(uv_repeats, 0.0),
+            Gf.Vec2f(uv_repeats, uv_repeats),
+            Gf.Vec2f(0.0, uv_repeats),
+        ]))
+        st.SetInterpolation(UsdGeom.Tokens.vertex)
+
+        mat_prim = stage.GetPrimAtPath(material_path)
+        if mat_prim.IsValid():
+            material = UsdShade.Material(mat_prim)
+            UsdShade.MaterialBindingAPI.Apply(plane.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants)
+
+    def _add_checker_overlays_on_tiles(self, terrain_root_path: str, material_path: str):
+        """Add visual quads on top of each tile cube so the checker texture appears on raised steps."""
+        stage = get_current_stage()
+        mat_prim = stage.GetPrimAtPath(material_path)
+        if not mat_prim.IsValid():
+            return
+        material = UsdShade.Material(mat_prim)
+        for prim in stage.Traverse():
+            path = prim.GetPath().pathString
+            if not path.startswith(terrain_root_path):
+                continue
+            if prim.GetTypeName() != "Cube":
+                continue
+            name = path.split("/")[-1]
+            if "wall" in name or name.endswith("_slab"):
+                continue
+            xformable = UsdGeom.Xformable(prim)
+            center = Gf.Vec3f(0.0, 0.0, 0.0)
+            scale = Gf.Vec3f(1.0, 1.0, 1.0)
+            for op in xformable.GetOrderedXformOps():
+                if op.GetOpType() == UsdGeom.XformOp.TypeTranslate:
+                    center = Gf.Vec3f(op.Get())
+                elif op.GetOpType() == UsdGeom.XformOp.TypeScale:
+                    scale = Gf.Vec3f(op.Get())
+            width, length, height = float(scale[0]), float(scale[1]), float(scale[2])
+            half_w = 0.5 * width
+            half_l = 0.5 * length
+            z = center[2] + 0.5 * height + 1e-3
+            overlay_path = f"{path}_checker"
+            if stage.GetPrimAtPath(overlay_path).IsValid():
+                UsdShade.MaterialBindingAPI.Apply(stage.GetPrimAtPath(overlay_path)).Bind(material, UsdShade.Tokens.strongerThanDescendants)
+                continue
+            mesh = UsdGeom.Mesh.Define(stage, overlay_path)
+            mesh.CreatePointsAttr([
+                Gf.Vec3f(center[0] - half_w, center[1] - half_l, z),
+                Gf.Vec3f(center[0] + half_w, center[1] - half_l, z),
+                Gf.Vec3f(center[0] + half_w, center[1] + half_l, z),
+                Gf.Vec3f(center[0] - half_w, center[1] + half_l, z),
+            ])
+            mesh.CreateFaceVertexCountsAttr([4])
+            mesh.CreateFaceVertexIndicesAttr([0, 1, 2, 3])
+            mesh.CreateDoubleSidedAttr(True)
+            uv_repeats = max(1, int((width / 10.0) * float(self._env_opts.get("checker_uv_density", 3.0))))
+            primvars = UsdGeom.PrimvarsAPI(mesh)
+            st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex)
+            st.Set(Vt.Vec2fArray([
+                Gf.Vec2f(0.0, 0.0),
+                Gf.Vec2f(uv_repeats, 0.0),
+                Gf.Vec2f(uv_repeats, uv_repeats),
+                Gf.Vec2f(0.0, uv_repeats),
+            ]))
+            st.SetInterpolation(UsdGeom.Tokens.vertex)
+            UsdShade.MaterialBindingAPI.Apply(mesh.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants)
+
+    def _is_link(self, prim):
+        return prim.GetTypeName() == 'Xform' 
+
+    def _is_joint(self, prim):
+        return prim.GetTypeName() == 'PhysicsRevoluteJoint'
+    
+    def _create_collision_group(self, group_path, link_paths):
+        """
+        Create a collision group under the given group_path that contains the links.
+        Args:
+            group_path (str): Path to create the collision group.
+            link_paths (List[str]): List of link paths to include in this group.
+        """
+        collision_group = Sdf.PrimSpec(
+            self._stage.GetRootLayer().GetPrimAtPath(group_path),
+            group_path.split("/")[-1],
+            Sdf.SpecifierDef,
+            "PhysicsCollisionGroup"
+        )
+        # Add the links to the collision group
+        for link_path in link_paths:
+            includes_rel = Sdf.RelationshipSpec(collision_group, "collection:colliders:includes", False)
+            includes_rel.targetPathList.Append(link_path)
+
+    def _add_collision_filter(self, group_path, link1, link2):
+        """
+        Filters collision between two successive links.
+        
+        Args:
+            group_path (str): Path of the collision group.
+            link1 (str): Path of the first link.
+            link2 (str): Path of the second link.
+        """
+        # Create a relationship to filter collisions between the two links
+        filtered_groups = Sdf.RelationshipSpec(
+            self._stage.GetPrimAtPath(group_path),
+            "physics:filteredGroups",
+            False
+        )
+        filtered_groups.targetPathList.Append(link1)
+        filtered_groups.targetPathList.Append(link2)
+
+    def _render_sim(self, mode="human"):
+
+        if mode == "human":
+            # follow requested robot/env
+            if self._env_opts["use_follow_camera"]:
+                ridx = int(self._env_opts["render_follow_robot_idx"])
+                eidx = int(self._env_opts["render_follow_env_idx"])
+                if ridx < len(self._robot_names) and eidx < self._num_envs:
+                    rname = self._robot_names[ridx]
+                    pos = self._root_p.get(rname, None)
+                    if pos is not None and pos.shape[0] > eidx:
+                        base = pos[eidx].detach().cpu()
+                        offset = torch.as_tensor(self._env_opts["render_follow_offset"],
+                                                 device=base.device, dtype=base.dtype)
+                        target_offset = torch.as_tensor(self._env_opts["render_follow_target_offset"],
+                                                 device=base.device, dtype=base.dtype)
+                        quat = self._root_q.get(rname, None)
+                        if quat is not None and quat.shape[0] > eidx:
+                            q = quat[eidx].detach().cpu()
+                            w, x, y, z = q.unbind(-1)
+                            yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z))
+                            cy, sy = torch.cos(yaw), torch.sin(yaw)
+                            rot = torch.tensor([[cy, -sy], [sy, cy]], device=base.device, dtype=base.dtype)
+                            offset_xy = torch.matmul(rot, offset[:2])
+                            target_offset_xy = torch.matmul(rot, target_offset[:2])
+                            offset = torch.stack((offset_xy[0], offset_xy[1], offset[2]))
+                            target_offset = torch.stack((target_offset_xy[0], target_offset_xy[1], target_offset[2]))
+                        eye = (base + offset).tolist()
+                        target = (base + target_offset).tolist()
+                        set_camera_view(eye=eye, target=target, camera_prim_path=self._env_opts["camera_prim_path"])
+                       
+            self._world.render()
+            # optional height grid visualization
+            if self._env_opts["enable_height_vis"]:
+                for robot_name, vis in self._height_vis.items():
+                    # use latest stored states
+                    if robot_name not in self._height_imgs or robot_name not in self._height_sensors:
+                        continue
+                    heights = self._height_imgs.get(robot_name, None)
+                    if heights is None or heights.numel() == 0:
+                        continue
+                    pos_src = self._root_p.get(robot_name, None)
+                    quat_src = self._root_q.get(robot_name, None)
+                    if pos_src is None or quat_src is None:
+                        continue
+                    step = self._height_vis_step.get(robot_name, 0)
+                    period = max(1, int(self._env_opts["height_vis_update_period"]))
+                    if step % period == 0:
+                        try:
+                            vis.update(
+                                base_positions=pos_src,
+                                base_quats=quat_src,
+                                heights=heights)
+                        except Exception as exc:
+                            print(f"[height_vis] update failed for {robot_name}: {exc}")
+                    self._height_vis_step[robot_name] = step + 1
+            return None
+        elif mode == "rgb_array":
+            # check if viewport is enabled -- if not, then complain because we won't get any data
+            if not self._render or not self._record:
+                exception = f"Cannot render '{mode}' when rendering is not enabled. Please check the provided" + \
+                    "arguments to the environment class at initialization."
+                Journal.log(self.__class__.__name__,
+                    "__init__",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = True)
+            # obtain the rgb data
+            rgb_data = self._rgb_annotator.get_data()
+            # convert to numpy array
+            rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape)
+            # return the rgb data
+            return rgb_data[:, :, :3]
+        else:    
+            return None
+
+    def _create_viewport_render_product(self, resolution=None):
+        """Create a render product of the viewport for rendering."""
+
+        try:
+
+            # create render product
+            camera_prim = self._env_opts["camera_prim_path"]
+            res = resolution
+            if res is None:
+                res = tuple(int(x) for x in self._env_opts["render_resolution"])
+            self._render_product = rep.create.render_product(camera_prim, res)
+            # create rgb annotator -- used to read data from the render product
+            self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu")
+            self._rgb_annotator.attach([self._render_product])
+            self._record = True
+        except Exception as e:
+            carb.log_info("omni.replicator.core could not be imported. Skipping creation of render product.")
+            carb.log_info(str(e))
+
+    def _close(self):
+        if self._simulation_app.is_running():
+            self._simulation_app.close()
+    
+    def _step_world(self): 
+        self._world.step(render=False, step_sim=True)
+
+        if (self._render) and (self._render_step_counter%self._env_opts["rendering_freq"]==0):
+            # if self._env_opts["render_to_file"]:
+            #     rep.orchestrator.step()
+            self._render_sim() # manually trigger rendering (World.step(render=True) for some reason 
+            # will step the simulation for a dt==rendering_dt)
+        self._render_step_counter += 1
+
+    def _generate_jnt_imp_control(self, robot_name: str):
+        
+        jnt_imp_controller = OmniJntImpCntrl(articulation=self._robots_articulations[robot_name],
+            device=self._device,
+            dtype=self._dtype,
+            enable_safety=True,
+            urdf_path=self._urdf_dump_paths[robot_name],
+            config_path=self._jnt_imp_config_paths[robot_name],
+            enable_profiling=False,
+            debug_checks=self._debug,
+            override_art_controller=self._override_low_lev_controller)
+        
+        return jnt_imp_controller
+
+    def _reset_sim(self):
+        self._world.reset(soft=False)
+    
+    def _reset_state(self,
+        robot_name: str,
+        env_indxs: torch.Tensor = None,
+        randomize: bool = False):
+
+        if env_indxs is not None:
+            if self._debug:
+                if self._use_gpu:
+                    if not env_indxs.device.type == "cuda":
+                            error = "Provided env_indxs should be on GPU!"
+                            Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)
+                else:
+                    if not env_indxs.device.type == "cpu":
+                        error = "Provided env_indxs should be on CPU!"
+                        Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)
+            if randomize:
+                self._randomize_yaw(robot_name=robot_name,env_indxs=env_indxs)
+
+            # root q
+            self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][env_indxs, :],
+                                                orientations=self._root_q_default[robot_name][env_indxs, :],
+                                                indices = env_indxs)
+            # jnts q
+            self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][env_indxs, :],
+                                                    indices = env_indxs)
+            # root v and omega
+            self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][env_indxs, :],
+                                                    indices = env_indxs)
+            # jnts v
+            concatenated_vel = torch.cat((self._root_v_default[robot_name][env_indxs, :], 
+                                            self._root_omega_default[robot_name][env_indxs, :]), dim=1)
+            self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel,
+                                                    indices = env_indxs)
+            # jnts eff
+            self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][env_indxs, :],
+                                                    indices = env_indxs)
+            self._reset_perturbations(robot_name=robot_name, env_indxs=env_indxs)
+        else:
+
+            if randomize:
+                self._randomize_yaw(robot_name=robot_name,env_indxs=None)
+
+            # root q
+            self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][:, :],
+                                                orientations=self._root_q_default[robot_name][:, :],
+                                                indices = None)
+            # jnts q
+            self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][:, :],
+                                                    indices = None)
+            # root v and omega
+            self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][:, :],
+                                                    indices = None)
+            # jnts v
+            concatenated_vel = torch.cat((self._root_v_default[robot_name][:, :], 
+                                            self._root_omega_default[robot_name][:, :]), dim=1)
+            self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel,
+                                                    indices = None)
+            # jnts eff
+            self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][:, :],
+                                                    indices = None)
+            self._reset_perturbations(robot_name=robot_name, env_indxs=None)
+
+        # we update the robots state 
+        self._read_root_state_from_robot(env_indxs=env_indxs, 
+            robot_name=robot_name)
+        self._read_jnts_state_from_robot(env_indxs=env_indxs,
+            robot_name=robot_name)
+
+    def _reset_perturbations(self, robot_name: str, env_indxs: torch.Tensor = None):
+        """Clear perturbation state and wrenches for selected envs."""
+        if robot_name not in self._pert_active:
+            return
+        if env_indxs is None:
+            self._pert_active[robot_name].zero_()
+            self._pert_steps_remaining[robot_name].zero_()
+            self._pert_forces_world[robot_name].zero_()
+            self._pert_torques_world[robot_name].zero_()
+            self._pert_det_counter[robot_name].zero_()
+        else:
+            self._pert_active[robot_name][env_indxs] = False
+            self._pert_steps_remaining[robot_name][env_indxs] = 0
+            self._pert_forces_world[robot_name][env_indxs, :] = 0
+            self._pert_torques_world[robot_name][env_indxs, :] = 0
+            self._pert_det_counter[robot_name][env_indxs] = 0
+    
+    def _process_perturbations(self):
+
+        # Iterate over each robot view
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+
+            # Pre-fetch views for code clarity (references, not copies)
+            active = self._pert_active[robot_name]
+            steps_rem = self._pert_steps_remaining[robot_name]
+            forces_world = self._pert_forces_world[robot_name]
+            torques_world = self._pert_torques_world[robot_name]
+            planar_only = self._env_opts["pert_planar_only"] if "pert_planar_only" in self._env_opts else False
+
+            # --- 1. Update Active Counters (In-Place) ---
+            if active.any():
+                # In-place subtraction
+                steps_rem[active] -= 1
+
+            # --- 2. Reset Finished Perturbations (In-Place) ---
+            # Logic: Active AND (Steps <= 0)
+            # Note: Creating 'newly_ended' boolean mask is a tiny unavoidable allocation
+            newly_ended = active & (steps_rem <= 0)
+
+            if newly_ended.any():
+                # Use masked_fill_ for in-place zeroing
+                active.masked_fill_(newly_ended, False)
+                forces_world[newly_ended, :]=0
+                torques_world[newly_ended, :]=0
+                steps_rem.masked_fill_(newly_ended, 0)
+
+            # --- 3. Trigger New Perturbations ---
+
+            det_rate = self._env_opts["det_pert_rate"]
+            if det_rate:
+                # deterministic spacing: count physics steps and trigger when threshold reached (if not already active)
+                det_counter = self._pert_det_counter[robot_name]
+                det_counter += 1
+                trigger_mask = (det_counter >= self._pert_det_steps) & (~active)
+            else:
+                # Reuse scratch buffer for probability check
+                # Assumes self._pert_scratch is (num_envs, 1) pre-allocated
+                self._pert_scratch[robot_name].uniform_(0.0, 1.0) # used for triggering new perturbations
+                # Check probs against threshold
+                # Flatten scratch to (N,) to match 'active' mask
+                trigger_mask = (self._pert_scratch[robot_name].flatten() < self._pert_wrenches_prob) & (~active)
+
+            if trigger_mask.any():
+
+                # Cache weights (references)
+                weight = self._weights[robot_name]
+
+                #   we now treat the configured "max_*_impulse_*" as the maximum **impulse** (N·s or N·m·s),
+                #   and convert impulse -> force/torque by dividing by the sampled duration (seconds).
+                #   Use per-robot weight scaling as before.
+                lin_impulse_max = self._env_opts["max_lin_impulse_norm"] * weight
+                ang_impulse_max = self._env_opts["max_ang_impulse_norm"] * weight
+
+                # --- Force (Impulse) Direction Generation (Reuse _pert_lindir buffer) ---
+                lindir = self._pert_lindir[robot_name]  # (N, 3)
+
+                if planar_only:
+                    # planar push direction from random yaw
+                    yaw_angles = self._pert_scratch[robot_name].uniform_(0.0, 2*math.pi).flatten()
+                    lindir[:, 0] = torch.cos(yaw_angles)
+                    lindir[:, 1] = torch.sin(yaw_angles)
+                    lindir[:, 2] = 0.0
+                else:
+                    # 1. Fill with Standard Normal noise in-place
+                    lindir.normal_()
+                    # 2. Normalize in-place
+                    norms = torch.norm(lindir, dim=1, keepdim=True).clamp_min_(1e-6)
+                    lindir.div_(norms)
+
+                # 3. Sample linear impulse magnitudes (reuse scratch)
+                # scratch has shape (N,1) - uniform [0,1]
+                self._pert_scratch[robot_name].uniform_(self._env_opts["lin_impulse_mag_min"], self._env_opts["lin_impulse_mag_max"])
+                # impulse vectors = unit_dir * (rand * lin_impulse_max)
+
+                lindir.mul_(self._pert_scratch[robot_name] * lin_impulse_max)  # now contains linear impulses (N,3)
+
+                # --- Angular (Impulse) Direction Generation (Reuse _pert_angdir buffer) ---
+                angdir = self._pert_angdir[robot_name]  # (N, 3)
+
+                if planar_only:
+                    angdir.zero_()  # no torque when planar-only is requested
+                else:
+                    # 1. Fill with Standard Normal noise
+                    angdir.normal_()
+                    # 2. Normalize
+                    norms = torch.norm(angdir, dim=1, keepdim=True).clamp_min_(1e-6)
+                    angdir.div_(norms)
+
+                # 3. Sample angular impulse magnitudes (reuse scratch)
+                self._pert_scratch[robot_name].uniform_(0.0, 1.0)
+                angdir.mul_(self._pert_scratch[robot_name] * ang_impulse_max)  # now contains angular impulses (N,3)
+
+                # --- Duration Generation (Reuse _pert_durations) ---
+                # Keep integer steps sampling (same shape/device/dtype)
+                self._pert_durations[robot_name] = torch.randint_like(
+                    self._pert_durations[robot_name],
+                    low=self._pert_min_steps,
+                    high=self._pert_max_steps + 1
+                )
+
+                # --- convert to float
+                duration_steps = self._pert_durations[robot_name].to(dtype=lindir.dtype, device=lindir.device)
+                # duration in seconds (shape (N,))
+                duration_seconds = duration_steps * self.physics_dt()
+                # avoid divide-by-zero
+                duration_seconds = duration_seconds.clamp_min_(1e-6)
+
+                # compute per-step forces/torques = impulse / duration_seconds
+                # lindir currently holds linear impulses, angdir holds angular impulses
+                forces_to_apply = lindir / duration_seconds
+                torques_to_apply = angdir / duration_seconds
+
+                # Optional clipping based on robot weight (min/max)
+                f_norm = torch.norm(forces_to_apply, dim=1, keepdim=True).clamp_min_(1e-9)
+                target_norm = f_norm
+
+                f_clip_scale_max = self._env_opts["pert_force_max_weight_scale"]
+                if f_clip_scale_max > 0.0:
+                    force_max = self._weights[robot_name] * f_clip_scale_max  # (N,1)
+                    target_norm = torch.minimum(target_norm, force_max)
+
+                f_clip_scale_min = self._env_opts["pert_force_min_weight_scale"]
+                if f_clip_scale_min > 0.0:
+                    force_min = self._weights[robot_name] * f_clip_scale_min
+                    target_norm = torch.maximum(target_norm, force_min)
+
+                forces_to_apply = forces_to_apply * (target_norm / f_norm)
+
+                t_clip_scale = self._env_opts["pert_torque_max_weight_scale"]
+                if t_clip_scale > 0.0:
+                    torque_max = self._weights[robot_name] * self._env_opts["max_ang_impulse_lever"] * t_clip_scale
+                    t_norm = torch.norm(torques_to_apply, dim=1, keepdim=True).clamp_min_(1e-9)
+                    t_scale = torch.minimum(torch.ones_like(t_norm), torque_max / t_norm)
+                    torques_to_apply = torques_to_apply * t_scale
+
+                # --- Update State Buffers ---
+                # Use boolean indexing to scatter only triggered values
+                active[trigger_mask] = True
+                steps_rem[trigger_mask] = self._pert_durations[robot_name][trigger_mask, :].flatten()
+                forces_world[trigger_mask, :] = forces_to_apply[trigger_mask, :]
+                torques_world[trigger_mask, :] = torques_to_apply[trigger_mask, :]
+                if det_rate:
+                    det_counter[trigger_mask] = 0
+
+            # --- 4. Apply Wrenches (Vectorized) ---
+            # Only call API if there are active perturbations to minimize overhead
+            
+            forces_world[~active, :]=0
+            torques_world[~active, :]=0
+
+            self._blink_rigid_prim_views[robot_name].apply_forces_and_torques_at_pos(
+                forces=forces_world,
+                torques=torques_world,
+                positions=None, # body frame origin
+                is_global=True
+                )
+
+    @override                    
+    def _pre_step(self):
+        
+        if self._env_opts["use_random_pertub"]:
+            self._process_perturbations()
+
+        success=super()._pre_step()
+
+        return success
+
+    @override
+    def _pre_step_db(self):
+        
+        if self._env_opts["use_random_pertub"]:
+            self._process_perturbations()
+
+        success=super()._pre_step_db()
+
+        return success
+
+    @override
+    def _update_contact_state(self, 
+            robot_name: str, 
+            env_indxs: torch.Tensor = None):
+        
+        super()._update_contact_state(robot_name, env_indxs)
+        
+        if self._env_opts["use_random_pertub"]:
+            # write APPLIED perturbations to root wrench (mainly for debug)
+            self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_forces_world[robot_name][env_indxs, :], 
+                                    data_type="f",
+                                    contact_name="root", 
+                                    robot_idxs = env_indxs, 
+                                    gpu=self._use_gpu)
+            self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_torques_world[robot_name][env_indxs, :],
+                                    data_type="t",
+                                    contact_name="root", 
+                                    robot_idxs = env_indxs, 
+                                    gpu=self._use_gpu)
+        
+    def _post_warmup_validation(self, robot_name: str):
+        """Validate warmup state: base height, tilt, and (if available) contacts."""
+        envs = torch.arange(self._num_envs, device=self._device)
+
+        # terrain height query
+        def _terrain_height(xy: torch.Tensor):
+            if self._env_opts["use_flat_ground"] or self.terrain_generator is None:
+                return torch.zeros((xy.shape[0],), device=xy.device, dtype=self._dtype)
+            heights = []
+            half_extent = self._env_opts["spawn_height_check_half_extent"]
+            for k in range(xy.shape[0]):
+                h = self.terrain_generator.get_max_height_in_rect(
+                    float(xy[k, 0]), float(xy[k, 1]), half_extent=half_extent)
+                heights.append(h)
+            return torch.as_tensor(heights, device=xy.device, dtype=self._dtype)
+
+        # base height check
+        base_xy = self._root_p[robot_name][:, 0:2]
+        base_z = self._root_p[robot_name][:, 2]
+        ground_z = _terrain_height(base_xy)
+        margin = float(self._env_opts["spawn_height_cushion"])
+        bad_z = base_z < (ground_z + margin)
+
+        # tilt check (angle between base up and world up)
+        q = self._root_q[robot_name]
+        # quaternion to up vector
+        w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3]
+        up = torch.stack([
+            2 * (x * z - w * y),
+            2 * (y * z + w * x),
+            1 - 2 * (x * x + y * y)
+        ], dim=1)
+        cos_tilt = up[:, 2].clamp(-1.0, 1.0)
+        tilt_thresh_deg = 35.0
+        bad_tilt = cos_tilt < math.cos(math.radians(tilt_thresh_deg))
+
+        # contact check (only if sensors exist)
+        # bad_contact = torch.zeros_like(base_z, dtype=torch.bool)
+        # if robot_name in self.omni_contact_sensors and self.omni_contact_sensors[robot_name] is not None:
+        #     counts = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device)
+        #     for link in self._contact_names.get(robot_name, []):
+        #         f_contact = self._get_contact_f(robot_name=robot_name,
+        #                                         contact_link=link,
+        #                                         env_indxs=None)
+        #         if f_contact is None:
+        #             continue
+        #         # use normal component (assume z-up); ignore tangential forces
+        #         active = f_contact[:, 2] > 1e-3
+        #         counts += active.int()
+        #     bad_contact = counts < 3
+
+        failing = torch.nonzero(bad_z | bad_tilt, as_tuple=False).flatten()
+        if failing.numel() > 0:
+            # remediate: lift to terrain+margin, upright (preserve yaw), zero root velocities
+            # yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z))
+            # safe_z = (ground_z + margin)[failing]
+            # self._root_p[robot_name][failing, 2] = safe_z
+            # cos_h = torch.cos(yaw[failing] / 2)
+            # sin_h = torch.sin(yaw[failing] / 2)
+            # upright = torch.zeros((failing.shape[0], 4), device=self._device, dtype=self._dtype)
+            # upright[:, 0] = cos_h
+            # upright[:, 3] = sin_h
+            # self._root_q[robot_name][failing, :] = upright
+            # self._root_v[robot_name][failing, :] = 0.0
+            # self._root_omega[robot_name][failing, :] = 0.0
+
+            msgs = [] # print db message
+            if bad_z.any():
+                msgs.append(f"low_z envs {torch.nonzero(bad_z, as_tuple=False).flatten().tolist()}")
+            if bad_tilt.any():
+                msgs.append(f"tilt envs {torch.nonzero(bad_tilt, as_tuple=False).flatten().tolist()}")
+            Journal.log(self.__class__.__name__,
+                        "_post_warmup_validation",
+                        f"Warmup validation failures for {robot_name}: " + "; ".join(msgs),
+                        LogType.WARN,
+                        throw_when_excep=False)
+        return failing
+        
+    def _import_urdf(self, 
+        robot_name: str,
+        fix_base = False, 
+        self_collide = False, 
+        merge_fixed = True):
+        
+        import_config=_urdf.ImportConfig()
+        # status,import_config=omni_kit.commands.execute("URDFCreateImportConfig")
+
+        Journal.log(self.__class__.__name__,
+            "update_root_offsets",
+            "importing robot URDF",
+            LogType.STAT,
+            throw_when_excep = True)
+        _urdf.acquire_urdf_interface()  
+        # we overwrite some settings which are bound to be fixed
+        import_config.merge_fixed_joints = merge_fixed # makes sim more stable
+        # in case of fixed joints with light objects
+        import_config.import_inertia_tensor = True
+        # import_config.convex_decomp = False
+        import_config.fix_base = fix_base
+        import_config.self_collision = self_collide
+        # import_config.distance_scale = 1
+        # import_config.make_default_prim = True
+        # import_config.create_physics_scene = True
+        # import_config.default_drive_strength = 1047.19751
+        # import_config.default_position_drive_damping = 52.35988
+        # import_config.default_drive_type = _urdf.UrdfJointTargetType.JOINT_DRIVE_POSITION
+        # import URDF
+        success, robot_prim_path_default = omni_kit.commands.execute(
+            "URDFParseAndImportFile",
+            urdf_path=self._urdf_dump_paths[robot_name],
+            import_config=import_config, 
+            # get_articulation_root=True,
+        )
+
+        robot_base_prim_path = self._env_opts["template_env_ns"] + "/" + robot_name
+
+        if success:
+            Journal.log(self.__class__.__name__,
+                "_import_urdf",
+                "Successfully importedf URDF into IsaacSim",
+                LogType.STAT)
+        else:
+            Journal.log(self.__class__.__name__,
+                "_import_urdf",
+                "Failed to import URDF into IsaacSim",
+                LogType.EXCEP,
+                throw_when_excep = True)
+        
+        # moving default prim to base prim path for cloning
+        move_prim(robot_prim_path_default, # from
+                robot_base_prim_path) # to
+        
+        robot_base_prim = prim_utils.get_prim_at_path(robot_base_prim_path)
+        children = prim_utils.get_prim_children(robot_base_prim)
+        # log imported prim children to the journal (print was getting truncated in logs)
+        Journal.log(self.__class__.__name__,
+            "_import_urdf",
+            f"Imported robot URDF children: {children}",
+            LogType.STAT)
+
+        # improve collision tesselation for cylinders (e.g., wheels) if requested
+        # self._apply_collision_refinement(robot_base_prim_path,
+        #                                  self._env_opts["collision_refinement_level"])
+
+        return success
+
+    def _apply_collision_refinement(self, robot_base_prim_path: str, refinement_level: int):
+        """Set refinement level on collision cylinders to avoid coarse faceting."""
+        if refinement_level is None:
+            return
+        stage = get_current_stage()
+        coll_prefix = robot_base_prim_path + "/collisions"
+        count = 0
+        for prim in stage.Traverse():
+            if not prim.IsValid():
+                continue
+            path_str = prim.GetPath().pathString
+            if not path_str.startswith(coll_prefix):
+                continue
+            if prim.GetTypeName() == "Cylinder":
+                attr = prim.GetAttribute("refinementLevel")
+                if not attr.IsValid():
+                    attr = prim.CreateAttribute("refinementLevel", Sdf.ValueTypeNames.Int)
+                attr.Set(int(refinement_level))
+                count += 1
+        Journal.log(self.__class__.__name__,
+            "_apply_collision_refinement",
+            f"Applied refinement level {refinement_level} to {count} cylinder collision prims under {coll_prefix}",
+            LogType.STAT)
+
+    def apply_collision_filters(self, 
+                                physicscene_path: str, 
+                                coll_root_path: str):
+
+        self._cloner.filter_collisions(physicsscene_path = physicscene_path,
+                                collision_root_path = coll_root_path, 
+                                prim_paths=self._envs_prim_paths, 
+                                global_paths=self._ground_plane_prim_paths # can collide with these prims
+                                )
+
+    def _read_root_state_from_robot(self,
+        robot_name: str,
+        env_indxs: torch.Tensor = None):
+
+        self._get_root_state(numerical_diff=self._env_opts["use_diff_vels"],
+                env_indxs=env_indxs,
+                robot_name=robot_name)
+        
+        # height grid sensor readout
+        if robot_name in self._height_sensors:
+            pos_src = self._root_p[robot_name] if env_indxs is None else self._root_p[robot_name][env_indxs]
+            quat_src = self._root_q[robot_name] if env_indxs is None else self._root_q[robot_name][env_indxs]
+            heights = self._height_sensors[robot_name].read(pos_src, quat_src)
+            if self._env_opts["use_flat_ground"]:
+                heights.zero_()
+            if env_indxs is None:
+                self._height_imgs[robot_name] = heights
+            else:
+                # clone to avoid overlapping write/read views
+                self._height_imgs[robot_name][env_indxs] = heights.clone()
+
+            # print("height image")
+            # print(self._height_imgs[robot_name][0, :, : ])
+
+    def _read_jnts_state_from_robot(self,
+        robot_name: str,
+        env_indxs: torch.Tensor = None):
+        
+        self._get_robots_jnt_state(
+            numerical_diff=self._env_opts["use_diff_vels"],
+            env_indxs=env_indxs,
+            robot_name=robot_name)
+             
+    def _get_root_state(self, 
+        robot_name: str,
+        env_indxs: torch.Tensor = None,
+        numerical_diff: bool = False,
+        base_loc: bool = True):
+        
+        # reading = self._is.get_sensor_reading("/World/Cube/Imu_Sensor", 
+        #     use_latest_data = True)
+
+        dt=self._cluster_dt[robot_name] # getting diff state always at cluster rate
+
+        # measurements from simulator are in world frame 
+        if env_indxs is not None:
+        
+            pose = self._robots_art_views[robot_name].get_world_poses( 
+                                            clone = True,
+                                            indices=env_indxs) # tuple: (pos, quat), quat is [w, i, j, k] in Isaac4.2
+            
+            self._root_p[robot_name][env_indxs, :] = pose[0] 
+
+            going_to_fly=self._root_p[robot_name][env_indxs, 0:2]>(self._env_opts["terrain_border"]-0.1)
+            if going_to_fly.any():
+                flying=going_to_fly.sum().item()
+                warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!"
+                Journal.log(self.__class__.__name__,
+                    "_get_root_state",
+                    warn,
+                    LogType.WARN,
+                    throw_when_excep = True)
+            self._root_q[robot_name][env_indxs, :] = pose[1] # root orientation
+            if not numerical_diff:
+                # we get velocities from the simulation. This is not good since 
+                # these can actually represent artifacts which do not have physical meaning.
+                # It's better to obtain them by differentiation to avoid issues with controllers, etc...
+                self._root_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_linear_velocities(
+                                            clone = True,
+                                            indices=env_indxs) # root lin. velocity               
+                self._root_omega[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_angular_velocities(
+                                            clone = True,
+                                            indices=env_indxs) # root ang. velocity
+                
+                # for now obtain root a numerically
+                self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \
+                                                self._root_v_prev[robot_name][env_indxs, :]) / dt 
+                self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \
+                                                self._root_omega_prev[robot_name][env_indxs, :]) / dt 
+                
+                self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] 
+                self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :]
+            else:
+                # differentiate numerically
+                self._root_v[robot_name][env_indxs, :] = (self._root_p[robot_name][env_indxs, :] - \
+                                                self._root_p_prev[robot_name][env_indxs, :]) / dt 
+                self._root_omega[robot_name][env_indxs, :] = quat_to_omega( 
+                                                            self._root_q_prev[robot_name][env_indxs, :], 
+                                                            self._root_q[robot_name][env_indxs, :],
+                                                            dt)
+
+                self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \
+                                                self._root_v_prev[robot_name][env_indxs, :]) / dt 
+                self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \
+                                                self._root_omega_prev[robot_name][env_indxs, :]) / dt 
+
+                # update "previous" data for numerical differentiation
+                self._root_p_prev[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :] 
+                self._root_q_prev[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :]
+                self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] 
+                self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :]
+            self._track_terrain_hits(robot_name=robot_name, env_indxs=env_indxs)
+
+        else:
+            # updating data for all environments
+            pose = self._robots_art_views[robot_name].get_world_poses( 
+                                            clone = True) # tuple: (pos, quat)
+            self._root_p[robot_name][:, :] = pose[0]  
+            self._root_q[robot_name][:, :] = pose[1] # root orientation
+
+            going_to_fly=self._root_p[robot_name][:, 0:2]>(self._env_opts["terrain_border"]-0.1)
+            if going_to_fly.any():
+                flying=going_to_fly.sum().item()
+                warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!"
+                Journal.log(self.__class__.__name__,
+                    "_get_root_state",
+                    warn,
+                    LogType.WARN,
+                    throw_when_excep = True)
+                
+            if not numerical_diff:
+                # we get velocities from the simulation. This is not good since 
+                # these can actually represent artifacts which do not have physical meaning.
+                # It's better to obtain them by differentiation to avoid issues with controllers, etc...
+                self._root_v[robot_name][:, :] = self._robots_art_views[robot_name].get_linear_velocities(
+                                            clone = True) # root lin. velocity 
+                self._root_omega[robot_name][:, :] = self._robots_art_views[robot_name].get_angular_velocities(
+                                                clone = True) # root ang. velocity
+                
+                self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \
+                                                self._root_v_prev[robot_name][:, :]) / dt 
+                self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \
+                                                self._root_omega_prev[robot_name][:, :]) / dt 
+                
+                self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] 
+                self._root_omega_prev[robot_name][:, :]  = self._root_omega[robot_name][:, :]
+            else: 
+                # differentiate numerically
+                self._root_v[robot_name][:, :] = (self._root_p[robot_name][:, :] - \
+                                                self._root_p_prev[robot_name][:, :]) / dt 
+                self._root_omega[robot_name][:, :] = quat_to_omega(self._root_q_prev[robot_name][:, :], 
+                                                        self._root_q[robot_name][:, :], 
+                                                        dt)
+
+                self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \
+                                                self._root_v_prev[robot_name][:, :]) / dt 
+                self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \
+                                                self._root_omega_prev[robot_name][:, :]) / dt 
+                
+                # update "previous" data for numerical differentiation
+                self._root_p_prev[robot_name][:, :] = self._root_p[robot_name][:, :] 
+                self._root_q_prev[robot_name][:, :] = self._root_q[robot_name][:, :]
+                self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] 
+                self._root_omega_prev[robot_name][:, :]  = self._root_omega[robot_name][:, :]
+            self._track_terrain_hits(robot_name=robot_name, env_indxs=None)
+        
+        if base_loc:
+            # rotate robot twist in base local
+            twist_w=torch.cat((self._root_v[robot_name], 
+                self._root_omega[robot_name]), 
+                dim=1)
+            twist_base_loc=torch.cat((self._root_v_base_loc[robot_name], 
+                self._root_omega_base_loc[robot_name]), 
+                dim=1)
+            world2base_frame(t_w=twist_w,q_b=self._root_q[robot_name],t_out=twist_base_loc)
+            self._root_v_base_loc[robot_name]=twist_base_loc[:, 0:3]
+            self._root_omega_base_loc[robot_name]=twist_base_loc[:, 3:6]
+            
+            # rotate robot a in base local
+            a_w=torch.cat((self._root_a[robot_name], 
+                self._root_alpha[robot_name]), 
+                dim=1)
+            a_base_loc=torch.cat((self._root_a_base_loc[robot_name], 
+                self._root_alpha_base_loc[robot_name]), 
+                dim=1)
+            world2base_frame(t_w=a_w,q_b=self._root_q[robot_name],t_out=a_base_loc)
+            self._root_a_base_loc[robot_name]=a_base_loc[:, 0:3]
+            self._root_alpha_base_loc[robot_name]=a_base_loc[:, 3:6]
+            
+            # rotate gravity in base local
+            world2base_frame3D(v_w=self._gravity_normalized[robot_name],q_b=self._root_q[robot_name],
+                v_out=self._gravity_normalized_base_loc[robot_name])
+
+    def _get_robots_jnt_state(self, 
+        robot_name: str,
+        env_indxs: torch.Tensor = None,
+        numerical_diff: bool = False):
+        
+        dt= self.physics_dt() if self._override_low_lev_controller else self._cluster_dt[robot_name]
+        
+        # measurements from simulator are in world frame 
+        if env_indxs is not None:
+        
+            self._jnts_q[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_positions(
+                                            clone = True,
+                                            indices=env_indxs) # joint positions 
+            if not numerical_diff:
+                self._jnts_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_velocities( 
+                                            clone = True,
+                                            indices=env_indxs) # joint velocities
+            else:
+                # differentiate numerically
+                self._jnts_v[robot_name][env_indxs, :] = (self._jnts_q[robot_name][env_indxs, :] - \
+                                                self._jnts_q_prev[robot_name][env_indxs, :]) / dt
+                # update "previous" data for numerical differentiation
+                self._jnts_q_prev[robot_name][env_indxs, :] = self._jnts_q[robot_name][env_indxs, :]
+
+            self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( 
+                                            clone = True,
+                                            joint_indices=None,
+                                            indices=env_indxs) # measured joint efforts (computed by joint force solver)
+
+        else:
+            self._jnts_q[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_positions(
+                                            clone = True) # joint positions 
+            if not numerical_diff:
+                self._jnts_v[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_velocities( 
+                                                clone = True) # joint velocities
+            else: 
+                self._jnts_v[robot_name][:, :] = (self._jnts_q[robot_name][:, :] - \
+                                                self._jnts_q_prev[robot_name][:, :]) / dt
+                
+                self._jnts_q_prev[robot_name][:, :] = self._jnts_q[robot_name][:, :]
+            
+            self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( 
+                                            clone = True) # measured joint efforts (computed by joint force solver)
+
+    def _get_contact_f(self, 
+        robot_name: str, 
+        contact_link: str,
+        env_indxs: torch.Tensor) -> torch.Tensor:
+        
+        if self.omni_contact_sensors[robot_name] is not None:
+            return self.omni_contact_sensors[robot_name].get(dt=self.physics_dt(),
+                            contact_link=contact_link,
+                            env_indxs=env_indxs,
+                            clone=False)
+    
+    def _set_jnts_to_homing(self, robot_name: str):
+        self._robots_art_views[robot_name].set_joints_default_state(positions=self._homing, 
+            velocities = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \
+                                dtype=self._dtype, device=self._device), 
+            efforts = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \
+                                dtype=self._dtype, device=self._device))
+                
+    def _set_root_to_defconfig(self, robot_name: str):
+        self._robots_art_views[robot_name].set_default_state(positions=self._root_p_default[robot_name], 
+            orientations=self._root_q_default[robot_name])
+        
+    def _alter_twist_warmup(self, robot_name: str, env_indxs: torch.Tensor = None):
+        """Zero angular velocities and joint velocities for the given robot/envs."""
+        if env_indxs is None:
+            twist = self._robots_art_views[robot_name].get_velocities(clone=True)
+            twist[:, 0] = 0.0
+            twist[:, 1] = 0.0
+            twist[:, 3:] = 0.0  # zero angular part, preserve current linear
+            self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=None)
+
+            # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities(
+            #         indices = None, clone=True)
+            # jnt_vel[:, :] = 0.0
+
+            # self._robots_art_views[robot_name].set_joint_velocities(
+            #     velocities = jnt_vel,
+            #         indices = None)
+            
+        else:
+            twist = self._robots_art_views[robot_name].get_velocities(clone=True, indices=env_indxs)
+            # twist[:, 0:2] = 0.0
+            twist[:, 0] = 0.0
+            twist[:, 1] = 0.0
+            twist[:, 3:] = 0.0
+            self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=env_indxs)
+        
+            # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities(
+            #         indices = env_indxs, clone=True)
+            # jnt_vel[:, :] = 0.0
+
+            # self._robots_art_views[robot_name].set_joint_velocities(
+            #     velocities = jnt_vel,
+            #         indices = env_indxs)
+        
+    def _get_solver_info(self):
+        for i in range(0, len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            self._solver_position_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_position_iteration_counts()
+            self._solver_velocity_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_velocity_iteration_counts()
+            self._solver_stabilization_threshs[robot_name] = self._robots_art_views[robot_name].get_stabilization_thresholds()
+    
+    def _update_art_solver_options(self):
+        
+        # sets new solver iteration options for specifc articulations
+        self._get_solver_info() # gets current solver info for the articulations of the 
+        # environments, so that dictionaries are filled properly
+        
+        for i in range(0, len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            # increase by a factor
+            self._solver_position_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_position_iteration_count)
+            self._solver_velocity_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_velocity_iteration_count)
+            self._solver_stabilization_threshs[robot_name] = torch.full((self._num_envs,), self._solver_stabilization_thresh)
+            self._robots_art_views[robot_name].set_solver_position_iteration_counts(self._solver_position_iteration_counts[robot_name])
+            self._robots_art_views[robot_name].set_solver_velocity_iteration_counts(self._solver_velocity_iteration_counts[robot_name])
+            self._robots_art_views[robot_name].set_stabilization_thresholds(self._solver_stabilization_threshs[robot_name])
+            self._get_solver_info() # gets again solver info for articulation, so that it's possible to debug if
+            # the operation was successful
+
+    def _print_envs_info(self):
+
+        ground_info = f"[Ground info]" + "\n" + \
+            "static friction coeff.: " + str(self._terrain_material.GetStaticFrictionAttr().Get()) + "\n" + \
+            "dynamics friction coeff.: " + str(self._terrain_material.GetDynamicFrictionAttr().Get()) + "\n" + \
+            "restitution coeff.: " + str(self._terrain_material.GetRestitutionAttr().Get()) + "\n" +\
+            "friction comb. mode: " + str(self._terrain_physix_material.GetFrictionCombineModeAttr().Get()) + "\n" + \
+            "damping comb. mode: " + str(self._terrain_physix_material.GetDampingCombineModeAttr().Get()) + "\n" + \
+            "restitution comb. mode: " + str(self._terrain_physix_material.GetRestitutionCombineModeAttr().Get()) + "\n"
+        
+        Journal.log(self.__class__.__name__,
+            "_print_envs_info",
+            ground_info,
+            LogType.STAT,
+            throw_when_excep = True)
+        
+        for i in range(0, len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            task_info = f"[{robot_name}]" + "\n" + \
+                "bodies: " + str(self._robots_art_views[robot_name].body_names) + "\n" + \
+                "n. prims: " + str(self._robots_art_views[robot_name].count) + "\n" + \
+                "prims names: " + str(self._robots_art_views[robot_name].prim_paths) + "\n" + \
+                "n. bodies: " + str(self._robots_art_views[robot_name].num_bodies) + "\n" + \
+                "n. dofs: " + str(self._robots_art_views[robot_name].num_dof) + "\n" + \
+                "dof names: " + str(self._robots_art_views[robot_name].dof_names) + "\n" + \
+                "solver_position_iteration_counts: " + str(self._solver_position_iteration_counts[robot_name]) + "\n" + \
+                "solver_velocity_iteration_counts: " + str(self._solver_velocity_iteration_counts[robot_name]) + "\n" + \
+                "stabiliz. thresholds: " + str(self._solver_stabilization_threshs[robot_name])
+            # print("dof limits: " + str(self._robots_art_views[robot_name].get_dof_limits()))
+            # print("effort modes: " + str(self._robots_art_views[robot_name].get_effort_modes()))
+            # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains()))
+            # print("dof max efforts: " + str(self._robots_art_views[robot_name].get_max_efforts()))
+            # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains()))
+            # print("physics handle valid: " + str(self._robots_art_views[robot_name].is_physics_handle_valid())
+            Journal.log(self.__class__.__name__,
+                "_print_envs_info",
+                task_info,
+                LogType.STAT,
+                throw_when_excep = True)
+    
+    def _fill_robot_info_from_world(self):
+        for i in range(0, len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            self._robot_bodynames[robot_name] = self._robots_art_views[robot_name].body_names
+            self._robot_n_links[robot_name] = self._robots_art_views[robot_name].num_bodies
+            self._robot_n_dofs[robot_name] = self._robots_art_views[robot_name].num_dof
+            self._robot_dof_names[robot_name] = self._robots_art_views[robot_name].dof_names
+    
+    def _set_initial_camera_params(self, 
+                                camera_position=[10, 10, 3], 
+                                camera_target=[0, 0, 0],
+                                camera_prim_path="/OmniverseKit_Persp"):
+        set_camera_view(eye=camera_position, 
+                        target=camera_target, 
+                        camera_prim_path=camera_prim_path)
+
+    def _init_contact_sensors(self, robot_name: str):
+        self.omni_contact_sensors[robot_name]=None
+        sensor_radii={}
+        contact_offsets={}
+        self._contact_names[robot_name]=self._env_opts["contact_prims"]
+        for contact_prim in self._env_opts["contact_prims"]:
+            sensor_radii[contact_prim]=self._env_opts["sensor_radii"]
+            contact_offsets[contact_prim]=np.array([0.0, 0.0, 0.0])
+        if not (len(self._env_opts["contact_prims"])==0):
+            self.omni_contact_sensors[robot_name]=OmniContactSensors(
+                name=robot_name, 
+                n_envs=self._num_envs, 
+                contact_prims=self._env_opts["contact_prims"], 
+                contact_offsets=contact_offsets, 
+                sensor_radii=sensor_radii, 
+                device=self._device, 
+                dtype=self._dtype,
+                enable_debug=self._debug,
+                filter_paths=self._ground_plane_prim_paths)
+            self.omni_contact_sensors[robot_name].create_contact_sensors(
+                self._world,
+                envs_namespace=self._env_opts["envs_ns"])            
+    
+    def _init_robots_state(self):
+        
+        self._masses = {}
+        self._weights = {}
+
+        self._pert_active = {}            # bool mask: (num_envs,)
+        self._pert_steps_remaining = {}   # int steps: (num_envs,)
+        self._pert_forces_world = {}       # (num_envs,3)
+        self._pert_torques_world = {}      # (num_envs,3)
+        self._pert_force_local = {}       # (num_envs,3)  (if needed)
+        self._pert_torque_local = {}      # (num_envs,3)
+        self._pert_lindir = {}
+        self._pert_angdir = {}
+        self._pert_durations = {}
+        self._pert_scratch = {}
+        self._pert_det_counter = {}
+
+        # convert durations in seconds to integer physics steps (min 1 step)
+        self._pert_min_steps = max(1, int(math.ceil(self._env_opts["pert_wrenches_min_duration"] / self.physics_dt())))
+        self._pert_max_steps = max(self._pert_min_steps, int(math.ceil(self._env_opts["pert_wrenches_max_duration"] / self.physics_dt())))
+
+        pert_wrenches_step_rate=self._env_opts["pert_wrenches_rate"]/self.physics_dt() # 1 pert every n physics steps
+        self._pert_det_steps = max(1, int(round(pert_wrenches_step_rate)))
+        self._pert_wrenches_prob=min(1.0, 1.0/pert_wrenches_step_rate) # sampling prob to be used when not deterministic
+
+        self._calc_robot_distrib()
+
+        for i in range(0, len(self._robot_names)):
+
+            robot_name = self._robot_names[i]
+
+            pose = self._robots_art_views[robot_name].get_world_poses( 
+                clone = True) # tuple: (pos, quat)
+
+            # root p (measured, previous, default)
+            self._root_p[robot_name] = pose[0]  
+            self._root_p_prev[robot_name] = torch.clone(pose[0])
+            # print(self._root_p_default[robot_name].device)
+            self._root_p_default[robot_name] = torch.clone(pose[0]) + self._distr_offset[robot_name]
+            # root q (measured, previous, default)
+            self._root_q[robot_name] = pose[1] # root orientation
+            self._root_q_prev[robot_name] = torch.clone(pose[1])
+            self._root_q_default[robot_name] = torch.clone(pose[1])
+            # jnt q (measured, previous, default)
+            self._jnts_q[robot_name] = self._robots_art_views[robot_name].get_joint_positions(
+                                            clone = True) # joint positions 
+            self._jnts_q_prev[robot_name] = self._robots_art_views[robot_name].get_joint_positions(
+                                            clone = True) 
+            self._jnts_q_default[robot_name] = torch.full((self._jnts_q[robot_name].shape[0], 
+                                                           self._jnts_q[robot_name].shape[1]), 
+                                                            0.0, 
+                                                            dtype=self._dtype, 
+                                                            device=self._device)
+            
+            # root v (measured, default)
+            self._root_v[robot_name] = self._robots_art_views[robot_name].get_linear_velocities(
+                                            clone = True) # root lin. velocityù
+            self._root_v_base_loc[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+            self._root_v_prev[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+            self._root_v_default[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+            # root omega (measured, default)
+            self._root_omega[robot_name] = self._robots_art_views[robot_name].get_angular_velocities(
+                                            clone = True) # root ang. velocity
+            self._root_omega_prev[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0)
+
+            self._root_omega_base_loc[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0)
+            self._root_omega_default[robot_name] = torch.full((self._root_omega[robot_name].shape[0], self._root_omega[robot_name].shape[1]), 
+                                                        0.0, 
+                                                        dtype=self._dtype, 
+                                                        device=self._device)
+            # root a (measured,)
+            self._root_a[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+            self._root_a_base_loc[robot_name] = torch.full_like(self._root_a[robot_name], fill_value=0.0)
+            self._root_alpha[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0)
+            self._root_alpha_base_loc[robot_name] = torch.full_like(self._root_alpha[robot_name], fill_value=0.0)
+
+            # height grid sensor storage
+            grid_sz = int(self._env_opts["height_sensor_pixels"])
+            self._height_imgs[robot_name] = torch.zeros((self._num_envs, grid_sz, grid_sz),
+                                                        dtype=self._dtype,
+                                                        device=self._device)
+
+            # joints v (measured, default)
+            self._jnts_v[robot_name] = self._robots_art_views[robot_name].get_joint_velocities( 
+                                            clone = True) # joint velocities
+            self._jnts_v_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), 
+                                                        0.0, 
+                                                        dtype=self._dtype, 
+                                                        device=self._device)
+            
+            # joints efforts (measured, default)
+            self._jnts_eff[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), 
+                                                0.0, 
+                                                dtype=self._dtype, 
+                                                device=self._device)
+            self._jnts_eff_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), 
+                                                    0.0, 
+                                                    dtype=self._dtype, 
+                                                    device=self._device)
+            self._root_pos_offsets[robot_name] = torch.zeros((self._num_envs, 3), 
+                                device=self._device) # reference position offses
+            
+            self._root_q_offsets[robot_name] = torch.zeros((self._num_envs, 4), 
+                                device=self._device)
+            self._root_q_offsets[robot_name][:, 0] = 1.0 # init to valid identity quaternion
+
+            # boolean active flag per env
+            self._pert_active[robot_name] = torch.zeros((self._num_envs,), dtype=torch.bool, device=self._device)
+            # remaining steps as integer tensor
+            self._pert_steps_remaining[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device)
+            # world force & torque (N and N*m) stored as floats
+            self._pert_forces_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+            self._pert_torques_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+            # local frame copies (if you want to store local-frame versions)
+            self._pert_force_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+            self._pert_torque_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+                
+            self._pert_lindir[robot_name] =  torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+            self._pert_angdir[robot_name] =  torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device)
+
+            self._pert_durations[robot_name] = torch.zeros((self._num_envs, 1), dtype=torch.int32, device=self._device)
+
+            self._pert_scratch[robot_name] = torch.zeros((self._num_envs, 1), dtype=self._dtype, device=self._device)
+            self._pert_det_counter[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device)
+            
+            self._masses[robot_name] = torch.sum(self._robots_art_views[robot_name].get_body_masses(clone=True), dim=1).to(dtype=self._dtype, device=self._device)
+
+            self._weights[robot_name] = (self._masses[robot_name] * abs(self._env_opts["gravity"][2].item())).reshape((self._num_envs, 1))
+    
+    def _track_terrain_hits(self, robot_name: str, env_indxs: torch.Tensor = None):
+        """Track transitions into the terrain boundary margin (1 m) and count hits per env."""
+        if self._env_opts["use_flat_ground"]:
+            return
+        border = float(self._env_opts["terrain_border"])
+        threshold = max(0.0, border - self._terrain_hit_margin)
+        state = self._terrain_hit_active[robot_name]
+        counts = self._terrain_hit_counts[robot_name]
+        if env_indxs is None:
+            pos_xy = self._root_p[robot_name][:, 0:2]
+            hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1)
+            new_hits = (~state) & hitting
+            if new_hits.any():
+                counts[new_hits] += 1
+            state.copy_(hitting)
+        else:
+            pos_xy = self._root_p[robot_name][env_indxs, 0:2]
+            hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1)
+            prev_state = state[env_indxs]
+            new_hits = (~prev_state) & hitting
+            if new_hits.any():
+                counts[env_indxs[new_hits]] += 1
+            state[env_indxs] = hitting
+
+    def _maybe_log_terrain_hits(self):
+        """Log boundary hits at low frequency only when counters change."""
+        if self._env_opts["use_flat_ground"]:
+            return
+        period = int(self._env_opts["terrain_hit_log_period"])
+        if period <= 0 or (self.step_counter % period != 0):
+            return
+        for robot_name in self._robot_names:
+            active = self._terrain_hit_active.get(robot_name, None)
+            if active is None:
+                continue
+            active_now = int(active.sum().item())
+            if active_now == 0:
+                continue
+            counts = self._terrain_hit_counts[robot_name]
+            last = self._terrain_hit_counts_last_logged.get(robot_name, None)
+            if last is not None and torch.equal(counts, last):
+                continue
+            total_hits = int(counts.sum().item())
+            msg = f"{active_now} {robot_name} robots within {self._terrain_hit_margin}m of terrain border. Total hits: {total_hits}."
+            Journal.log(self.__class__.__name__,
+                "_terrain_hits",
+                msg,
+                LogType.WARN,
+                throw_when_excep = True)
+            self._terrain_hit_counts_last_logged[robot_name] = counts.clone()
+
+    @override
+    def _post_world_step(self) -> bool:
+        res = super()._post_world_step()
+        self._maybe_log_terrain_hits()
+        return res
+
+    @override
+    def _post_world_step_db(self) -> bool:
+        res = super()._post_world_step_db()
+        self._maybe_log_terrain_hits()
+        return res
+            
+    def current_tstep(self):
+        self._world.current_time_step_index
+    
+    def world_time(self, robot_name: str):
+        return self._world.current_time
+    
+    def physics_dt(self):
+        return self._world.get_physics_dt()
+    
+    def rendering_dt(self):
+        return self._env_opts["rendering_dt"]
+    
+    def set_physics_dt(self, physics_dt:float):
+        self._world.set_simulation_dt(physics_dt=physics_dt,rendering_dt=None)
+    
+    def set_rendering_dt(self, rendering_dt:float):
+        self._world.set_simulation_dt(physics_dt=None,rendering_dt=rendering_dt)
+    
+    def _robot_jnt_names(self, robot_name: str):
+        return self._robots_art_views[robot_name].dof_names
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe4c2fdf7657e65e5e558c3daf8b27cf57ee22ec
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config.yaml
@@ -0,0 +1,17 @@
+XBotInterface:
+  urdf_path: $PWD/xmj_env_files/kyon.urdf
+  srdf_path: $PWD/xmj_env_files/kyon.srdf
+
+# defaults
+motor_pd:
+  "hip_roll*": [200, 60]
+  "hip_pitch*": [200, 60]
+  "knee_pitch*": [200, 60]
+  "wheel_joint*": [0, 30]
+
+# startup pd config
+startup_motor_pd:
+  "hip_roll*": [100, 10]
+  "hip_pitch*": [100, 10]
+  "knee_pitch*": [100, 10]
+  "wheel_joint*": [0, 30]
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config_b2w.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config_b2w.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc07eec1283f3b176a387b8b2cc404391a5d7b3a
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/jnt_imp_config_b2w.yaml
@@ -0,0 +1,17 @@
+XBotInterface:
+  urdf_path: $PWD/xmj_env_files/b2w/unitree_b2w.urdf
+  srdf_path: $PWD/xmj_env_files/b2w/unitree_b2w.srdf
+
+# defaults
+motor_pd:
+  "*hip_joint": [200, 60]
+  "*thigh_joint": [200, 60]
+  "*calf_joint": [200, 60]
+  "*foot_joint": [0, 30]
+
+# startup pd config
+startup_motor_pd:
+  "*hip_joint": [200, 60]
+  "*thigh_joint": [200, 60]
+  "*calf_joint": [200, 60]
+  "*foot_joint": [0, 30]
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_control_cluster.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_control_cluster.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c484f45d52cd3cff4da3401453a30258d1e53ab
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_control_cluster.py
@@ -0,0 +1,106 @@
+import os
+import argparse
+import multiprocessing as mp
+import importlib.util
+import inspect
+
+from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict
+
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+this_script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0]
+
+# Function to dynamically import a module from a specific file path
+def import_env_module(env_path):
+    spec = importlib.util.spec_from_file_location("env_module", env_path)
+    env_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(env_module)
+    return env_module
+
+if __name__ == "__main__":  
+
+    # Parse command line arguments for CPU affinity
+    parser = argparse.ArgumentParser(description="Set CPU affinity for the script.")
+    parser.add_argument('--ns', type=str, help='Namespace to be used for cluster shared memory')
+    parser.add_argument('--urdf_path', type=str, help='Robot description package path for URDF ')
+    parser.add_argument('--srdf_path', type=str, help='Robot description package path for SRDF ')
+    parser.add_argument('--size', type=int, help='cluster size', default=1)
+
+    # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8
+    parser.add_argument('--cloop',action='store_true', help='whether to use RHC controllers in closed loop mode')
+    parser.add_argument('--cluster_dt', type=float, default=0.05, help='dt used by MPC controllers for discretization')
+    parser.add_argument('--n_nodes', type=int, default=31, help='n nodes used by MPC controllers')
+
+    parser.add_argument('--verbose',action='store_true', help='run in verbose mode')
+
+    parser.add_argument('--enable_debug',action='store_true', help='enable debug mode for cluster client and all controllers')
+
+    parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data")
+
+    parser.add_argument('--no_mp_fork',action='store_true', help='whether to multiprocess with forkserver context')
+
+    parser.add_argument('--set_affinity',action='store_true', help='set affinity to a core for each controller')
+
+    parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="")
+    parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000)
+    parser.add_argument('--codegen_override_dir', type=str, help='Path to base dir where codegen is to be loaded', default="")
+
+    parser.add_argument('--custom_args_names', nargs='+', default=None,
+                            help='list of custom arguments names')
+    parser.add_argument('--custom_args_vals', nargs='+', default=None,
+                            help='list of custom arguments values')
+    parser.add_argument('--custom_args_dtype', nargs='+', default=None,
+                            help='list of custom arguments data types')
+    
+    parser.add_argument('--cluster_client_fname', type=str, 
+        default="aug_mpc.controllers.rhc.hybrid_quad_client",
+        help="cluster client file import pattern (without extension)")
+
+    args = parser.parse_args()
+    
+    # Ensure custom_args_names and custom_args_vals have the same length
+    custom_opts = generate_custom_arg_dict(args=args)
+    custom_opts.update({"cloop": args.cloop, 
+        "cluster_dt": args.cluster_dt, 
+        "n_nodes": args.n_nodes, 
+        "codegen_override_dir": args.codegen_override_dir})
+    if args.no_mp_fork: # this needs to be in the main
+        mp.set_start_method('spawn')
+    else:
+        # mp.set_start_method('forkserver')
+        mp.set_start_method('fork')
+        
+    cluster_module=importlib.import_module(args.cluster_client_fname)
+    # Get all classes defined in the module
+    classes_in_module = [name for name, obj in inspect.getmembers(cluster_module, inspect.isclass) 
+                        if obj.__module__ == cluster_module.__name__]
+    if len(classes_in_module) == 1:
+        cluster_classname=classes_in_module[0]
+        ClusterClient = getattr(cluster_module, cluster_classname)
+        cluster_client = ClusterClient(namespace=args.ns, 
+            cluster_size=args.size,
+            urdf_xacro_path=args.urdf_path,
+            srdf_xacro_path=args.srdf_path,
+            open_loop=not args.cloop,
+            use_mp_fork = not args.no_mp_fork,
+            verbose=args.verbose,
+            debug=args.enable_debug,
+            base_dump_dir=args.dmpdir,
+            timeout_ms=args.timeout_ms,
+            custom_opts=custom_opts,
+            codegen_override=args.codegen_override_dir,
+            set_affinity=args.set_affinity)
+        cluster_client.run()
+        
+    else:
+        class_list_str = ", ".join(classes_in_module)
+        Journal.log("launch_control_cluster.py",
+            "",
+            f"Found more than one class in cluster client file {args.cluster_client_fname}. Found: {class_list_str}",
+            LogType.EXCEP,
+            throw_when_excep = False)
+        exit()
+
+    # control_cluster_client = 
+    # control_cluster_client.run() # spawns the controllers on separate processes (blocking)
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_train_env.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_train_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..1864f2e4f20d85297c2b5568666ea16d9bd8f644
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_train_env.py
@@ -0,0 +1,358 @@
+from aug_mpc.utils.determinism import deterministic_run
+
+from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo
+from mpc_hive.utilities.shared_data.cluster_data import SharedClusterInfo
+
+from EigenIPC.PyEigenIPC import VLevel, Journal, LogType
+from EigenIPC.PyEigenIPC import StringTensorServer
+
+import os, argparse, sys, types, inspect
+
+from perf_sleep.pyperfsleep import PerfSleep
+
+import importlib.util
+import torch
+import signal
+
+algo = None  # global to make it accessible by signal handler
+exit_request=False
+dummy_step_exit_req=False
+
+def handle_sigint(signum, frame):
+    global exit_request, dummy_step_exit_req
+    Journal.log("launch_train_env.py",
+        "",
+        f"Received sigint. Will stop training.",
+        LogType.WARN)
+    exit_request=True
+    dummy_step_exit_req=True # in case dummy_step_loop was used
+    
+# Function to dynamically import a module from a specific file path
+# def import_env_module(env_path):
+#     spec = importlib.util.spec_from_file_location("env_module", env_path)
+#     env_module = importlib.util.module_from_spec(spec)
+#     spec.loader.exec_module(env_module)
+#     return env_module
+
+def import_env_module(env_path, local_env_root: str = None):
+    """
+    env_path: full path to the child env .py file to exec
+    local_env_root: directory where local copies of aug_mpc_envs.training_envs modules live
+    """
+    if local_env_root is not None:
+        local_env_root = os.path.abspath(local_env_root)
+        # override aug_mpc_envs.training_envs package to point to the local_env_root
+        pkg_name = "aug_mpc_envs.training_envs"
+        if pkg_name not in sys.modules:
+            mod = types.ModuleType(pkg_name)
+            mod.__path__ = [local_env_root]  # tell Python to look here first
+            sys.modules[pkg_name] = mod
+        else:
+            existing = getattr(sys.modules[pkg_name], "__path__", None)
+            if existing is None:
+                sys.modules[pkg_name].__path__ = [local_env_root]
+            elif local_env_root not in existing:
+                existing.insert(0, local_env_root)
+
+    # load the module as usual
+    spec = importlib.util.spec_from_file_location("env_module", env_path)
+    env_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(env_module)
+    return env_module
+
+def log_env_hierarchy(env_class, env_path, env_type="training"):
+    """
+    Logs the env class, its file, and full inheritance hierarchy with file paths.
+    env_class: the child env class
+    env_path: file path where the child class was loaded from
+    env_type: string label, e.g., "training", "evaluation", "resumed_training"
+    """
+    def get_bases_recursive(cls):
+        """Recursively get all base classes with their file paths."""
+        info = []
+        for base in cls.__bases__:
+            try:
+                file = inspect.getfile(base)
+            except TypeError:
+                file = "built-in or unknown"
+            info.append(f"{base.__name__} (from {file})")
+            # Recurse unless it's object
+            if base is not object:
+                info.extend(get_bases_recursive(base))
+        return info
+
+    hierarchy_info = get_bases_recursive(env_class)
+    hierarchy_str = " -> ".join(hierarchy_info) if hierarchy_info else "No parents"
+
+    Journal.log(
+        "launch_train_env.py",
+        "",
+        f"loading {env_type} env {env_class.__name__} (from {env_path}) "
+        f"with hierarchy: {hierarchy_str}",
+        LogType.INFO,
+        throw_when_excep=True
+    )
+
+def dummy_step_loop(env):
+    global dummy_step_exit_req
+    while True:
+        if dummy_step_exit_req: 
+            return True
+        step_ok=env.step(action=env.safe_action) # not a busy loop because of MPC in the step
+        if not step_ok:
+            return False
+
+if __name__ == "__main__":  
+
+    signal.signal(signal.SIGINT, handle_sigint)
+
+    # Parse command line arguments for CPU affinity
+    parser = argparse.ArgumentParser(description="Set CPU affinity for the script.")
+
+    parser.add_argument('--run_name', type=str, default=None, help='Name of training run')
+    parser.add_argument('--ns', type=str, help='Namespace to be used for shared memory')
+    parser.add_argument('--timeout_ms', type=int, help='Connection timeout after which the script self-terminates', default=60000)
+    parser.add_argument('--drop_dir', type=str, help='Directory root where all run data will be dumped')
+    parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="")
+    parser.add_argument('--seed', type=int, help='Seed', default=1)
+    parser.add_argument('--use_cpu',action='store_true', help='If set, all the training (data included) will be performed on CPU')
+
+    parser.add_argument('--db',action='store_true', help='Whether to enable local data logging for the algorithm (reward metrics, etc.)')
+    parser.add_argument('--env_db',action='store_true', help='Whether to enable env db data logging on shared mem (e.g. reward metrics are not available for reading anymore)')
+    parser.add_argument('--full_env_db',action='store_true', help='Whether to enable detailed episodic data storage (data over single transitions)')
+    parser.add_argument('--rmdb',action='store_true', help='Whether to enable remote debug (e.g. data logging on remote servers)')
+
+    parser.add_argument('--tot_tsteps', type=int, help='Total number of timesteps to be collected', default=int(30e6))
+    parser.add_argument('--action_repeat', type=int, help='Frame skipping (1-> no skip)', default=1)
+    parser.add_argument('--discount_factor', type=float, help='', default=0.99)
+    parser.add_argument('--obs_norm',action='store_true', help='Whether to enable the use of running normalizer in agent')
+    parser.add_argument('--obs_rescale',action='store_true', help='Whether to rescale observation depending on their expected range')
+    parser.add_argument('--add_weight_norm',action='store_true', help='Whether to add weight normalization to agent interal llayers')
+    parser.add_argument('--add_layer_norm',action='store_true', help='Whether to add layer normalization to agent internal llayers')
+    parser.add_argument('--add_batch_norm',action='store_true', help='Whether to add batch normalization to agent internal llayers')
+
+    parser.add_argument('--act_rescale_critic',action='store_true', help='Whether to rescale actions provided to critic (if SAC) to be in range [-1, 1]')
+    parser.add_argument('--use_period_resets',action='store_true', help='')
+
+    parser.add_argument('--sac',action='store_true', help='Use SAC, otherwise PPO, unless dummy is set')
+    parser.add_argument('--dummy',action='store_true', help='Use dummy agent (useful for testing and debugging environments)')
+
+    parser.add_argument('--dump_checkpoints',action='store_true', help='Whether to dump model checkpoints during training')
+
+    parser.add_argument('--demo_envs_perc', type=float, help='[0, 1]', default=0.0)
+    parser.add_argument('--demo_stop_thresh', type=float, default=None, 
+        help='Performance hreshold above which demonstration envs should be deactivated.')
+    
+    parser.add_argument('--expl_envs_perc', type=float, help='[0, 1]', default=0)
+    
+    parser.add_argument('--use_rnd',action='store_true', help='Whether to use RND for exploration')
+
+    parser.add_argument('--eval',action='store_true', help='Whether to perform an evaluation run')
+    parser.add_argument('--n_eval_timesteps', type=int, help='Total number of timesteps to be evaluated', default=int(1e6))
+    parser.add_argument('--det_eval',action='store_true', help='Whether to perform a deterministic eval (only action mean is used). Only valid if --eval.')
+    parser.add_argument('--allow_expl_during_eval',action='store_true', help='Whether to allow expl envs during evaluation (useful to tune exploration)')
+    
+    parser.add_argument('--resume',action='store_true', help='Resume a previous training using a checkpoint')
+    parser.add_argument('--mpath', type=str, help='Model path to be used for policy evaluation', default=None)
+    parser.add_argument('--mname', type=str, help='Model name', default=None)
+    parser.add_argument('--override_env',action='store_true', help='Whether to override env when running evaluation')
+    
+    parser.add_argument('--anomaly_detect',action='store_true', help='Whether to enable anomaly detection (useful for debug)')
+
+    parser.add_argument('--compression_ratio', type=float,
+        help='If e.g. 0.8, the fist layer will be of dimension [input_features_size x (input_features_size*compression_ratio)]', default=-1.0)
+    parser.add_argument('--actor_lwidth', type=int, help='Actor network layer width', default=128)
+    parser.add_argument('--critic_lwidth', type=int, help='Critic network layer width', default=256)
+    parser.add_argument('--actor_n_hlayers', type=int, help='Actor network size', default=3)
+    parser.add_argument('--critic_n_hlayers', type=int, help='Critic network size', default=4)
+
+    parser.add_argument('--env_fname', type=str, default="twist_tracking_env", help='Training env file name (without extension)')
+    parser.add_argument('--env_classname', type=str, default="TwistTrackingEnv", help='Training env class name')
+    parser.add_argument('--override_agent_actions',action='store_true', help='Whether to override agent actions with custom ones from shared mem (useful for db)')
+    parser.add_argument('--override_agent_refs',action='store_true', help='Whether to override automatically generated agent refs (useful for debug)')
+    
+    parser.add_argument('--step_while_setup',action='store_true', help='Continue stepping env with default actions while setting up agent, etc..')
+    parser.add_argument('--reset_on_init',action='store_true', help='Whether to reset the environment on initialization')
+
+    args = parser.parse_args()
+    args_dict = vars(args)
+
+    if args.eval and args.resume:
+        Journal.log("launch_train_env.py",
+            "",
+            f"Cannot set both --eval and --resume flags. Exiting.",
+            LogType.EXCEP,
+            throw_when_excep = True)
+
+    deterministic_run(seed=args.seed, torch_det_algos=False)
+
+    anomaly_detect=False
+    if args.anomaly_detect:
+        torch.autograd.set_detect_anomaly(True)
+
+    if (not args.mpath is None) and (not args.mname is None):
+        mpath_full = os.path.join(args.mpath, args.mname)
+    else:
+        mpath_full=None
+    
+    env_fname=args.env_fname
+    env_classname = args.env_classname
+    env_path=""
+    env_module=None
+    if (not args.eval and not args.resume) or (args.override_env):
+        # if starting a fresh traning or overriding env, load from a fresh env from aug_mpc
+        env_path = f"aug_mpc_envs.training_envs.{env_fname}"
+        env_module = importlib.import_module(env_path)
+    else:
+        if args.mpath is None:
+            Journal.log("launch_train_env.py",
+                "",
+                f"no mpath provided! Cannot load env. Either provide a mpath or run with --override_env",
+                LogType.EXCEP,
+                throw_when_excep = True)
+
+        env_path = os.path.join(args.mpath, env_fname + ".py")
+        env_module = import_env_module(env_path, local_env_root=args.mpath)
+       
+    EnvClass = getattr(env_module, env_classname)
+    env_type = "training" if not args.eval else "evaluation"
+    if args.resume:
+        env_type = "resumed_training"
+    log_env_hierarchy(EnvClass, env_path, env_type) # db print of env class
+    
+    env = EnvClass(namespace=args.ns,
+            verbose=True,
+            vlevel=VLevel.V2,
+            use_gpu=not args.use_cpu,
+            debug=args.env_db,
+            override_agent_refs=args.override_agent_refs,
+            timeout_ms=args.timeout_ms,
+            env_opts=args_dict)
+    if not env.is_ready(): # something went wrong
+        exit()
+    
+    dummy_step_thread = None
+    if args.step_while_setup:
+        import threading
+        # spawn step thread (we don't true parallelization, thread is fine)
+        # start the dummy stepping in a separate thread so setup can continue concurrently
+        dummy_step_thread = threading.Thread(target=dummy_step_loop, args=(env,), daemon=True)
+        dummy_step_thread.start()
+    
+    # getting some sim info for debugging
+    sim_data = {}
+    sim_info_shared = SharedEnvInfo(namespace=args.ns,
+                is_server=False,
+                safe=False)
+    sim_info_shared.run()
+    sim_info_keys = sim_info_shared.param_keys
+    sim_info_data = sim_info_shared.get().flatten()
+    for i in range(len(sim_info_keys)):
+        sim_data[sim_info_keys[i]] = sim_info_data[i]
+    
+    # getting come cluster info for debugging
+    cluster_data={}
+    cluste_info_shared = SharedClusterInfo(namespace=args.ns,
+                is_server=False,
+                safe=False)
+    cluste_info_shared.run()
+    cluster_info_keys = cluste_info_shared.param_keys
+    cluster_info_data = cluste_info_shared.get().flatten()
+    for i in range(len(cluster_info_keys)):
+        cluster_data[cluster_info_keys[i]] = cluster_info_data[i]
+
+    custom_args={}
+    custom_args["uname_host"]="user_host"
+    try:
+        username = os.getlogin() # add machine info to db data
+        hostname = os.uname().nodename
+        user_host = f"{username}@{hostname}"
+        custom_args["uname_host"]=user_host
+    except:
+        pass
+    
+    algo=None
+    if not args.dummy:
+        if args.sac:
+            from aug_mpc.training_algs.sac.sac import SAC
+
+            algo = SAC(env=env, 
+                debug=args.db, 
+                remote_db=args.rmdb,
+                seed=args.seed)
+        else:
+            from aug_mpc.training_algs.ppo.ppo import PPO
+
+            algo = PPO(env=env, 
+                debug=args.db, 
+                remote_db=args.rmdb,
+                seed=args.seed)
+    else:
+        from aug_mpc.training_algs.dummy.dummy import Dummy
+
+        algo=Dummy(env=env, 
+                debug=args.db, 
+                remote_db=args.rmdb,
+                seed=args.seed)
+
+    custom_args.update(args_dict)
+    custom_args.update(cluster_data)
+    custom_args.update(sim_data)
+
+    run_name=env_classname if args.run_name is None else args.run_name
+    algo.setup(run_name=run_name, 
+        ns=args.ns,
+        verbose=True,
+        drop_dir_name=args.drop_dir,
+        custom_args=custom_args,
+        comment=args.comment,
+        eval=args.eval,
+        resume=args.resume,
+        model_path=mpath_full,
+        n_eval_timesteps=args.n_eval_timesteps,
+        dump_checkpoints=args.dump_checkpoints,
+        norm_obs=args.obs_norm,
+        rescale_obs=args.obs_rescale)
+    
+    full_drop_dir=algo.drop_dir()
+    shared_drop_dir = StringTensorServer(length=1, 
+        basename="SharedTrainingDropDir", 
+        name_space=args.ns,
+        verbose=True, 
+        vlevel=VLevel.V2, 
+        force_reconnection=True)
+    shared_drop_dir.run()
+    
+    while True:
+        if not shared_drop_dir.write_vec([full_drop_dir], 0):
+            ns=1000000000
+            PerfSleep.thread_sleep(ns)
+            continue
+        else:
+            break
+        
+    if args.step_while_setup:
+        # stop dummy step thread and give algo authority on step
+        dummy_step_exit_req=True
+        # wait for thread to join
+        if dummy_step_thread is not None:
+            dummy_step_thread.join()
+        Journal.log("launch_train_env.py",
+            "",
+            f"Dummy env step thread joined. Moving step authority to algo.",
+            LogType.INFO)
+
+    eval=args.eval
+    if args.override_agent_actions:
+        eval=True
+    if not eval:
+        while not exit_request:
+            if not algo.learn():
+                break
+    else: # eval phase
+        with torch.no_grad(): # no need for grad computation
+            while not exit_request:
+                if not algo.eval():
+                    break
+    
+    algo.done() # make sure to terminate training properly
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_world_interface.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_world_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..1836cedcdd2191f40b35db291e86f4ff4a7b8295
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/launch_world_interface.py
@@ -0,0 +1,207 @@
+import os
+import argparse
+import importlib.util
+import inspect
+
+from aug_mpc.utils.rt_factor import RtFactor
+from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict
+from aug_mpc.utils.determinism import deterministic_run
+
+from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0]
+
+# Function to dynamically import a module from a specific file path
+def import_world_module(env_path):
+    spec = importlib.util.spec_from_file_location("world_module", env_path)
+    world_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(world_module)
+    return world_module
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description="Sim. env launcher")
+    # Add arguments
+    parser.add_argument('--robot_name', type=str, help='Alias to be used for the robot and also shared memory')
+    parser.add_argument('--urdf_path', type=str, help='path to the URDF file description for each robot')
+    parser.add_argument('--srdf_path', type=str, help='path to the SRDF file description for each robot (used for homing)')
+    parser.add_argument('--jnt_imp_config_path', type=str, help='path to a valid YAML file containing information on jnt impedance gains')
+    parser.add_argument('--num_envs', type=int, default=1)
+    parser.add_argument('--n_contacts', type=int, default=4)
+    parser.add_argument('--cluster_dt', type=float, default=0.03, help='dt at which the control cluster runs')
+    parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data")
+    parser.add_argument('--remote_stepping',action='store_true',
+                help='Whether to use remote stepping for cluster triggering (to be set during training)')
+    
+    # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8
+    parser.add_argument('--use_gpu',action='store_true', help='Whether to use gpu simulation')
+
+    parser.add_argument('--enable_debug',action='store_true', help='Whether to enable debug mode (may introduce significant overhead)')
+
+    parser.add_argument('--headless',action='store_true', help='Whether to run simulation in headless mode')
+
+    parser.add_argument('--verbose',action='store_true', help='Enable verbose mode')
+
+    parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="")
+    parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000)
+    parser.add_argument('--physics_dt', type=float, default=5e-4, help='')
+
+    parser.add_argument('--use_custom_jnt_imp',action='store_true', 
+        help='Whether to override the default PD controller with a custom one')
+
+    parser.add_argument('--diff_vels',action='store_true', 
+        help='Whether to obtain velocities by differentiation or not')
+    
+    parser.add_argument('--init_timesteps', type=int, help='initialization timesteps', default=None)
+    parser.add_argument('--seed', type=int, help='seed', default=0)
+
+    parser.add_argument('--custom_args_names', nargs='+', default=None,
+                            help='list of custom arguments names')
+    parser.add_argument('--custom_args_vals', nargs='+', default=None,
+                            help='list of custom arguments values')
+    parser.add_argument('--custom_args_dtype', nargs='+', default=None,
+                            help='list of custom arguments data types')
+    
+    parser.add_argument('--world_iface_fname', type=str, 
+        default="aug_mpc_envs.world_interfaces.isaac_world_interface",
+        help="world interface file import pattern (without extension)")
+    
+    args = parser.parse_args()
+    
+    deterministic_run(seed=args.seed, torch_det_algos=False)
+
+    default_init_duration=3.0 # [s]
+    default_init_tsteps=int(default_init_duration/args.physics_dt)
+    init_tsteps=args.init_timesteps 
+    if init_tsteps is None:
+        init_tsteps=default_init_tsteps
+    # Ensure custom_args_names, custom_args_vals, and custom_args_dtype have the same length
+    custom_opt = generate_custom_arg_dict(args=args)
+
+    Journal.log("launch_world_interface.py",
+            "",
+            f"Will warmup world interface for {default_init_duration}s ({default_init_tsteps} physics steps)",
+            LogType.STAT)
+
+    robot_names = [args.robot_name]
+    robot_urdf_paths = [args.urdf_path]
+    robot_srdf_paths = [args.srdf_path]
+    control_clust_dts = [float(args.cluster_dt)]
+    use_remote_stepping = [args.remote_stepping]
+    n_contacts = [args.n_contacts]
+    jnt_imp_config_paths = [args.jnt_imp_config_path]
+    num_envs = args.num_envs
+    control_clust_dt = args.cluster_dt # [s]. Dt at which RHC controllers run 
+    headless = args.headless
+
+    # simulation parameters
+    remote_env_params = {}
+    remote_env_params["physics_dt"] = args.physics_dt # physics_dt?
+    remote_env_params["n_envs"] = num_envs 
+    remote_env_params["use_gpu"] =  args.use_gpu 
+    remote_env_params["substepping_dt"] = control_clust_dts[0]
+    remote_env_params["headless"] = headless
+    remote_env_params["debug_enabled"] = args.enable_debug
+    remote_env_params["seed"] = args.seed
+    remote_env_params.update(custom_opt)
+    # sim info to be broadcasted on shared memory
+    # adding some data to dict for debugging
+
+    shared_sim_infos = []
+    for i in range(len(robot_names)):
+        shared_sim_infos.append(SharedEnvInfo(
+            namespace=robot_names[i],
+            is_server=True, 
+            env_params_dict=remote_env_params,
+            verbose=True,
+            vlevel=VLevel.V2,
+            force_reconnection=True))
+        shared_sim_infos[i].run()
+
+    world_module=importlib.import_module(args.world_iface_fname)
+    classes_in_module = [name for name, obj in inspect.getmembers(world_module, inspect.isclass) 
+                        if obj.__module__ == world_module.__name__]
+    if len(classes_in_module) == 1:
+        cluster_classname=classes_in_module[0]
+        WorldInterface = getattr(world_module, cluster_classname)
+    else:
+        class_list_str = ", ".join(classes_in_module)
+        Journal.log("launch_world_interface.py",
+            "",
+            f"Found more than one class in world file {args.world_iface_fname}. Found: {class_list_str}",
+            LogType.EXCEP,
+            throw_when_excep = False)
+        exit()
+
+    world_interface = WorldInterface(robot_names=robot_names,
+        robot_urdf_paths=robot_urdf_paths,
+        robot_srdf_paths=robot_srdf_paths,
+        cluster_dt=control_clust_dts,
+        jnt_imp_config_paths=jnt_imp_config_paths,
+        n_contacts=n_contacts,
+        use_remote_stepping=use_remote_stepping,
+        name=classes_in_module[0],
+        num_envs=num_envs,
+        debug=args.enable_debug,
+        verbose=args.verbose,
+        vlevel=VLevel.V2,
+        n_init_step=init_tsteps,
+        timeout_ms=args.timeout_ms,
+        env_opts=remote_env_params,
+        use_gpu=args.use_gpu,
+        override_low_lev_controller=args.use_custom_jnt_imp) # create environment
+    # reset_ok=world_interface.reset(reset_sim=True)
+    # if not reset_ok:
+    #     world_interface.close()
+    #     exit()
+
+    rt_factor = RtFactor(dt_nom=world_interface.physics_dt(),
+                window_size=100)
+    
+    while True:
+        
+        if rt_factor.reset_due():
+            rt_factor.reset()
+
+        step_ok=world_interface.step() 
+
+        if not step_ok:
+            break
+
+        rt_factor.update()
+
+        for i in range(len(robot_names)):
+            robot_name=robot_names[i]
+            n_steps = world_interface.cluster_sim_step_counters[robot_name]
+            sol_counter = world_interface.cluster_servers[robot_name].solution_counter()
+            trigger_counter = world_interface.cluster_servers[robot_name].trigger_counter()
+            shared_sim_infos[i].write(dyn_info_name=["sim_rt_factor", 
+                                                "total_rt_factor", 
+                                                "env_stepping_dt",
+                                                "world_stepping_dt",
+                                                "time_to_get_states_from_env",
+                                                "cluster_state_update_dt",
+                                                "cluster_sol_time",
+                                                "n_sim_steps",
+                                                "n_cluster_trigger_steps",
+                                                "n_cluster_sol_steps",
+                                                "sim_time",
+                                                "cluster_time"],
+                                val=[rt_factor.get(), 
+                                    rt_factor.get() * num_envs,
+                                    rt_factor.get_avrg_step_time(),
+                                    world_interface.debug_data["time_to_step_world"],
+                                    world_interface.debug_data["time_to_get_states_from_env"],
+                                    world_interface.debug_data["cluster_state_update_dt"][robot_name],
+                                    world_interface.debug_data["cluster_sol_time"][robot_name],
+                                    n_steps,
+                                    trigger_counter,
+                                    sol_counter,
+                                    world_interface.debug_data["sim_time"][robot_name],
+                                    sol_counter*world_interface.cluster_servers[robot_name].cluster_dt()
+                                    ])
+            
+    world_interface.close()
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/__init__.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d88c7b914b4bb37297e0371dfd1acbd7eafee04
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py
@@ -0,0 +1,123 @@
+from mpc_hive.cluster_client.control_cluster_client import ControlClusterClient
+from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf
+from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds
+from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds
+
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+from typing import List, Dict
+
+import os 
+
+from abc import abstractmethod
+
+class AugMpcClusterClient(ControlClusterClient):
+    
+    def _import_aux_libs(self):
+        super()._import_aux_libs()
+        
+    def __init__(self, 
+            namespace: str, 
+            urdf_xacro_path: str,
+            srdf_xacro_path: str,
+            cluster_size: int,
+            set_affinity: bool = False,
+            use_mp_fork: bool = False,
+            isolated_cores_only: bool = False,
+            core_ids_override_list: List[int] = None,
+            verbose: bool = False,
+            debug: bool = False,
+            codegen_base_dirname: str = "CodeGen",
+            base_dump_dir: str = "/tmp",
+            codegen_override: str = None,
+            custom_opts: Dict = {}):
+               
+        self._base_dump_dir = base_dump_dir
+    
+        self._temp_path = base_dump_dir + "/" + f"{self.__class__.__name__}" + f"_{namespace}"
+        
+        self._codegen_base_dirname = codegen_base_dirname
+        self._codegen_basedir = self._temp_path + "/" + self._codegen_base_dirname
+
+        self._codegen_override = codegen_override # can be used to manually override
+        # the default codegen dir 
+
+        if not os.path.exists(self._temp_path):
+            os.makedirs(self._temp_path)
+        if not os.path.exists(self._codegen_basedir):
+            os.makedirs(self._codegen_basedir)
+
+        self._urdf_xacro_path = urdf_xacro_path
+        self._srdf_xacro_path = srdf_xacro_path
+        self._urdf_path=""
+        self._srdf_path=""
+        
+        super().__init__(namespace = namespace, 
+                        cluster_size=cluster_size,
+                        isolated_cores_only = isolated_cores_only,
+                        set_affinity = set_affinity,
+                        use_mp_fork = use_mp_fork,
+                        core_ids_override_list = core_ids_override_list,
+                        verbose = verbose,
+                        debug = debug,
+                        custom_opts=custom_opts)
+        self._generate_srdf(namespace=namespace)
+
+        self._generate_urdf(namespace=namespace)
+
+    
+    def codegen_dir(self):
+
+        return self._codegen_basedir
+    
+    def codegen_dir_override(self):
+
+        return self._codegen_override
+    
+    def _generate_srdf(self,namespace:str):
+        
+        custom_xacro_args=extract_custom_xacro_args(self._custom_opts)
+        cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(),
+            new_cmds=custom_xacro_args)
+
+        # call _xrdf_cmds_override in case some cmds need to be overridden
+        override_cmds=self._xrdf_cmds_override()
+        cmds=merge_xacro_cmds(prev_cmds=cmds,
+            new_cmds=override_cmds)
+        
+        self._srdf_path=generate_srdf(robot_name=namespace,
+            xacro_path=self._srdf_xacro_path,
+            dump_path=self._temp_path,
+            xrdf_cmds=cmds)
+    
+    def _generate_urdf(self,namespace:str):
+        
+        custom_xacro_args=extract_custom_xacro_args(self._custom_opts)
+        cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(),
+            new_cmds=custom_xacro_args)
+
+        # call _xrdf_cmds_override in case some cmds need to be overridden
+        override_cmds=self._xrdf_cmds_override()
+        cmds=merge_xacro_cmds(prev_cmds=cmds,
+            new_cmds=override_cmds)
+        
+        self._urdf_path=generate_urdf(robot_name=namespace,
+            xacro_path=self._urdf_xacro_path,
+            dump_path=self._temp_path,
+            xrdf_cmds=cmds)
+            
+    @abstractmethod
+    def _xrdf_cmds(self):
+        
+        # to be implemented by parent class (
+        # for an example have a look at utils/centauro_xrdf_gen.py)
+
+        pass    
+
+    def _xrdf_cmds_override(self):
+        
+        # to be overridden by parent class 
+
+        to_be_overridden = ["dummy_cmd:=true"]
+
+        return to_be_overridden    
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea5cf7226db1ae834b7bb599c33b426022fcc25d
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py
@@ -0,0 +1,43 @@
+from mpc_hive.cluster_server.control_cluster_server import ControlClusterServer
+from typing import List
+from EigenIPC.PyEigenIPC import VLevel
+
+class AugMpcClusterServer(ControlClusterServer):
+
+    def __init__(self, 
+            robot_name: str,
+            cluster_size: int, 
+            cluster_dt: float, 
+            control_dt: float, 
+            jnt_names: List[str],
+            n_contacts: int,
+            contact_linknames: List[str] = None,
+            verbose: bool = False, 
+            vlevel: VLevel = VLevel.V1,
+            debug: bool = False,
+            use_gpu: bool = True,
+            force_reconnection: bool = True,
+            timeout_ms: int = 60000,
+            enable_height_sensor: bool = False,
+            height_grid_size: int = None,
+            height_grid_resolution: float = None):
+
+        self.robot_name = robot_name
+                
+        super().__init__( 
+            namespace=self.robot_name,
+            cluster_size=cluster_size, 
+            cluster_dt=cluster_dt, 
+            control_dt=control_dt, 
+            jnt_names=jnt_names,
+            n_contacts = n_contacts,
+            contact_linknames = contact_linknames, 
+            verbose=verbose, 
+            vlevel=vlevel,
+            debug=debug,
+            use_gpu=use_gpu,
+            force_reconnection=force_reconnection,
+            timeout_ms=timeout_ms,
+            enable_height_sensor=enable_height_sensor,
+            height_grid_size=height_grid_size,
+            height_grid_resolution=height_grid_resolution)
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/__init__.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db15baee95a0dcc257d9c1ccb788a7aabdb6e456
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml
@@ -0,0 +1,200 @@
+# A dummy example of a rhc controller configuration built on top of horizon
+# and iLQR
+
+solver:
+  type: ilqr
+  ilqr.tol: 0.01
+  ilqr.constr_viol_tol: 0.01
+  ilqr.suppress_all_output: 'yes'
+  ilqr.codegen_enabled: true
+#  ilqr.codegen_workdir: /tmp/tyhio
+  ilqr.enable_gn: true
+  ilqr.hxx_reg_base: 0.0
+  ilqr.n_threads: 0
+  print_time: 0
+
+constraints:
+  - ball_1_contact
+  - ball_2_contact
+  - ball_3_contact
+  - ball_4_contact
+  - z_ball_1
+  - z_ball_2
+  - z_ball_3
+  - z_ball_4
+
+costs:
+  - joint_regularization
+  - joint_posture
+  - base_position
+  - base_orientation
+
+.define:
+    - &w_base_pos 10
+    - &w_base_ori 1
+    - &w_ball_z 1
+    # - &w_postural 0.0001
+    - &w_jnt_v_reg 0.01
+    - &w_jnt_a_reg 0.001
+    - &w_jnt_f_reg 0.0001
+    - &wheel_radius 0.124
+
+base_position: 
+  type: Cartesian
+  distal_link: base_link
+  indices: [0, 1, 2]
+  nodes: ${N}
+  weight: *w_base_pos
+
+base_orientation:
+  type: Cartesian
+  distal_link: base_link
+  indices: [3, 4, 5]
+  nodes: ${N}
+  weight: *w_base_ori
+
+# ===============================
+
+rolling_contact_1:
+  type: Rolling
+  frame: wheel_1
+  radius: *wheel_radius
+
+rolling_contact_2:
+  type: Rolling
+  frame: wheel_2
+  radius: *wheel_radius
+
+rolling_contact_3:
+  type: Rolling
+  frame: wheel_3
+  radius: *wheel_radius
+
+rolling_contact_4:
+  type: Rolling
+  frame: wheel_4
+  radius: *wheel_radius
+
+# contact_1:
+#   type: Cartesian
+#   distal_link: ball_1
+#   indices: [0, 1, 2]
+#   cartesian_type: velocity
+
+# contact_2:
+#   type: Cartesian
+#   distal_link: ball_2
+#   indices: [0, 1, 2]
+#   cartesian_type: velocity
+
+# contact_3:
+#   type: Cartesian
+#   distal_link: ball_3
+#   indices: [0, 1, 2]
+#   cartesian_type: velocity
+
+# contact_4:
+#   type: Cartesian
+#   distal_link: ball_4
+#   indices: [0, 1, 2]
+#   cartesian_type: velocity
+
+# ==================================
+
+interaction_wheel_1:
+  type: VertexForce
+  frame: ball_1
+  fn_min: 10.0
+  enable_fc: true
+  friction_coeff: 0.5
+  vertex_frames:
+    - wheel_1
+
+interaction_wheel_2:
+  type: VertexForce
+  frame: ball_2
+  fn_min: 10.0
+  enable_fc: true
+  friction_coeff: 0.5
+  vertex_frames:
+    - wheel_2
+
+interaction_wheel_3:
+  type: VertexForce
+  frame: ball_3
+  fn_min: 10.0
+  enable_fc: true
+  friction_coeff: 0.5
+  vertex_frames:
+    - wheel_3
+
+interaction_wheel_4:
+  type: VertexForce
+  frame: ball_4
+  fn_min: 10.0
+  enable_fc: true
+  friction_coeff: 0.5
+  vertex_frames:
+    - wheel_4
+
+ball_1_contact:
+  type: Contact
+  subtask: [interaction_wheel_1, rolling_contact_1]
+
+ball_2_contact:
+  type: Contact
+  subtask: [interaction_wheel_2, rolling_contact_2]
+
+ball_3_contact:
+  type: Contact
+  subtask: [interaction_wheel_3, rolling_contact_3]
+
+ball_4_contact:
+  type: Contact
+  subtask: [interaction_wheel_4, rolling_contact_4]
+
+# joint_posture:
+#   type: Postural
+#   weight: *w_postural
+#   indices: [0, 1, 2,
+#             4, 5, 6,
+#             8, 9, 10,
+#             12, 13, 14]
+#   nodes: ${range(N-8, N)}
+
+# todo: wrong, as the order COUNTS. If I add the contacts after the joint regularization, they wont get considered.
+joint_regularization:
+  type: Regularization
+  nodes: all # maybe not on first nodes??
+  weight:
+    velocity: *w_jnt_v_reg
+    acceleration: *w_jnt_a_reg 
+    force: *w_jnt_f_reg
+
+z_ball_1:
+  type: Cartesian
+  distal_link: ball_1
+  indices: [2]
+  cartesian_type: position
+  weight: *w_ball_z
+
+z_ball_2:
+  type: Cartesian
+  distal_link: ball_2
+  indices: [2]
+  cartesian_type: position
+  weight: *w_ball_z
+
+z_ball_3:
+  type: Cartesian
+  distal_link: ball_3
+  indices: [2]
+  cartesian_type: position
+  weight: *w_ball_z
+
+z_ball_4:
+  type: Cartesian
+  distal_link: ball_4
+  indices: [2]
+  cartesian_type: position
+  weight: *w_ball_z
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..448a35a2251bef0c51e573d6b32d93fcb2e7d9ce
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py
@@ -0,0 +1,566 @@
+import numpy as np
+
+from aug_mpc.controllers.rhc.horizon_based.horizon_imports import *
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+from typing import Dict
+
+class GaitManager:
+
+    def __init__(self, 
+            task_interface: TaskInterface, 
+            phase_manager: pymanager.PhaseManager, 
+            injection_node: int = None,
+            keep_yaw_vert: bool = False,
+            yaw_vertical_weight: float = None,
+            vertical_landing: bool = False,
+            landing_vert_weight: float = None,
+            phase_force_reg: float = None,
+            flight_duration: int = 15,
+            post_flight_stance: int = 3,
+            step_height: float = 0.1,
+            dh: float = 0.0,
+            custom_opts: Dict = {}):
+
+        self._custom_opts=custom_opts
+
+        self._is_open_loop=False
+        if "is_open_loop" in self._custom_opts:
+            self._is_open_loop=self._custom_opts["is_open_loop"]
+
+        self.task_interface = task_interface
+        self._n_nodes_prb=self.task_interface.prb.getNNodes()
+        
+        self._phase_manager = phase_manager
+        self._model=self.task_interface.model
+        self._q0=self._model.q0
+        self._kin_dyn=self.task_interface.model.kd
+        
+        # phase weights and regs
+        self._keep_yaw_vert=keep_yaw_vert
+        self._yaw_vertical_weight=yaw_vertical_weight
+        self._vertical_landing=vertical_landing
+        self._landing_vert_weight=landing_vert_weight
+        self._phase_force_reg=phase_force_reg
+        self._total_weight = np.atleast_2d(np.array([0, 0, self._kin_dyn.mass() * 9.81])).T 
+        
+        self._f_reg_ref={}
+
+        # flight parameters
+        self._post_flight_stance=post_flight_stance
+        self._flight_info_now=None 
+        self._flight_duration_max=self._n_nodes_prb-(injection_node+1)
+        self._flight_duration_min=3
+        self._flight_duration_default=flight_duration 
+        # apex bounds/defaults
+        self._step_height_default=step_height
+        self._step_height_min=0.0
+        self._step_height_max=0.5
+        # end height bounds/defaults
+        self._dh_default=dh
+        self._dh_min=0.0
+        self._dh_max=0.5
+        # landing dx, dy bounds/defaults
+        self._land_dx_default=0.0
+        self._land_dx_min=-0.5
+        self._land_dx_max=0.5
+        self._land_dy_default=0.0       
+        self._land_dy_min=-0.5
+        self._land_dy_max=0.5
+        
+        # timeline data
+        self._contact_timelines = dict()
+        self.timeline_names=[]
+
+        self._flight_phases = {}
+        self._touchdown_phases = {}
+        self._contact_phases = {}
+        self._fk_contacts = {}
+        self._fkd_contacts = {}
+        self._f_reg_ref = {}
+
+        # reference traj
+        self._tg = trajectoryGenerator.TrajectoryGenerator()
+        self._traj_der= [None, 0, 0]
+        self._traj_second_der=[None, 0, 0]
+        self._third_traj_der=[None, None, 0]
+
+        self._ref_trjs = {}
+        self._ref_vtrjs = {}
+
+        if injection_node is None:
+            self._injection_node = round(self.task_interface.prb.getNNodes()/2.0)
+        else:
+            self._injection_node = injection_node
+        
+        self._init_contact_timelines()  
+        
+        self._reset_contact_timelines()
+
+    def _init_contact_timelines(self):
+
+        short_stance_duration=1
+        flight_phase_short_duration=1
+
+        self.n_contacts=len(self._model.cmap.keys()) 
+        self._dt=float(self.task_interface.prb.getDt())
+
+        self._name_to_idx_map={}
+
+        j=0
+        for contact in self._model.cmap.keys():
+            
+            self._fk_contacts[contact]=self._kin_dyn.fk(contact)
+            self._fkd_contacts[contact]=self._kin_dyn.frameVelocity(contact, self._model.kd_frame)
+            self.timeline_names.append(contact)
+            self._contact_timelines[contact]=self._phase_manager.createTimeline(f'{contact}_timeline')
+            # stances
+            self._contact_phases[contact] = self._contact_timelines[contact].createPhase(short_stance_duration, 
+                                    f'stance_{contact}_short')
+                
+            if self.task_interface.getTask(f'{contact}') is not None:
+                self._contact_phases[contact].addItem(self.task_interface.getTask(f'{contact}'))
+            else:
+                Journal.log(self.__class__.__name__,
+                    "_init_contact_timelines",
+                    f"contact task {contact} not found",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+            i=0
+            self._f_reg_ref[contact]=[]
+            for force in self._model.cmap[contact]:
+                f_ref=self.task_interface.prb.createParameter(name=f"{contact}_force_reg_f{i}_ref",
+                    dim=3)
+                force_reg=self.task_interface.prb.createResidual(f'{contact}_force_reg_f{i}', self._phase_force_reg * (force - f_ref), 
+                    nodes=[])
+                self._f_reg_ref[contact].append(f_ref)
+                self.set_f_reg(contact_name=contact, scale=4)                
+                self._contact_phases[contact].addCost(force_reg, nodes=list(range(0, short_stance_duration)))
+                i+=1
+            
+            # flights
+            self._flight_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, 
+                                    f'flight_{contact}_short')
+
+            # sanity checks (z trajectory)
+            self._zpos_task_found=True
+            self._zvel_task_found=True
+            self._xypos_task_found=True
+            self._xyvel_task_found=True
+            if self.task_interface.getTask(f'z_{contact}') is None:
+                self._zpos_task_found=False
+            if self.task_interface.getTask(f'vz_{contact}') is None:
+                self._zvel_task_found=False
+            if self.task_interface.getTask(f'xy_{contact}') is None:
+                self._xypos_task_found=False
+            if self.task_interface.getTask(f'vxy_{contact}') is None:
+                self._xyvel_task_found=False
+            if (not self._zpos_task_found) and (not self._zvel_task_found):
+                Journal.log(self.__class__.__name__,
+                    "_init_contact_timelines",
+                    f"neither pos or vel task for contacts were found! Aborting.",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+            if (not self._zpos_task_found) and self._is_open_loop:
+                Journal.log(self.__class__.__name__,
+                    "_init_contact_timelines",
+                    f"Running in open loop, but no contact pos task found. Aborting.",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+            if self._zpos_task_found and self._xyvel_task_found:
+                Journal.log(self.__class__.__name__,
+                    "_init_contact_timelines",
+                    f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+            if self._zvel_task_found and self._xypos_task_found:
+                Journal.log(self.__class__.__name__,
+                    "_init_contact_timelines",
+                    f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+            if (not self._xypos_task_found) and (not self._xyvel_task_found):
+                Journal.log(self.__class__.__name__,
+                    "_init_contact_timelines",
+                    f"neither pos or vel task for contact {contact} xy were found! Will proceed without xy landing constraints.",
+                    LogType.WARN)
+            # if (not self._zvel_task_found) and (not self._is_open_loop):
+            #     Journal.log(self.__class__.__name__,
+            #         "_init_contact_timelines",
+            #         f"Running in closed loop, but contact vel task not found. Aborting",
+            #         LogType.EXCEP,
+            #         throw_when_excep=True)
+            
+            self._ref_trjs[contact]=None
+            self._ref_vtrjs[contact]=None
+            self._touchdown_phases[contact]=None
+
+            if self._zpos_task_found: # we use pos trajectory
+                self._ref_trjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()])
+                init_z_foot = self._fk_contacts[contact](q=self._q0)['ee_pos'].elements()[2]
+                if self._is_open_loop:
+                    self._ref_trjs[contact][2, :] = np.atleast_2d(init_z_foot)
+                else:
+                    self._ref_trjs[contact][2, :] = 0.0 # place foot at ground level initially ()
+                
+                # z
+                self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'z_{contact}'), 
+                    self._ref_trjs[contact][2, 0:1], 
+                    nodes=list(range(0, flight_phase_short_duration)))
+
+                if self._xypos_task_found: # xy, we add a landing phase of unit duration to enforce landing pos costs
+                    
+                    self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, 
+                                    f'touchdown_{contact}_short') 
+
+                    self._touchdown_phases[contact].addItemReference(self.task_interface.getTask(f'xy_{contact}'), 
+                        self._ref_trjs[contact][0:2, 0:1], 
+                        nodes=list(range(0, short_stance_duration)))
+                    
+            else: # foot traj in velocity
+                # ref vel traj
+                self._ref_vtrjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) # allocate traj
+                # of max length eual to number of nodes
+                self._ref_vtrjs[contact][2, :] = np.atleast_2d(0) 
+
+                # z
+                self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vz_{contact}'), 
+                    self._ref_vtrjs[contact][2, 0:1], 
+                    nodes=list(range(0, flight_phase_short_duration)))
+                
+                if self._xyvel_task_found: # xy (when in vel the xy vel is set on the whole flight phase)
+                    self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vxy_{contact}'), 
+                        self._ref_vtrjs[contact][0:2, 0:1], 
+                        nodes=list(range(0, flight_phase_short_duration)))
+                    
+                if self._vertical_landing: # add touchdown phase for vertical landing
+                    self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, 
+                                        f'touchdown_{contact}_short')
+
+            if self._vertical_landing and self._touchdown_phases[contact] is not None:
+                v_xy=self._fkd_contacts[contact](q=self._model.q, qdot=self._model.v)['ee_vel_linear'][0:2]
+                vertical_landing=self.task_interface.prb.createResidual(f'{contact}_only_vert_v', 
+                    self._landing_vert_weight * v_xy, 
+                    nodes=[])
+                self._touchdown_phases[contact].addCost(vertical_landing, nodes=list(range(0, short_stance_duration)))
+
+            if self._keep_yaw_vert:
+                # keep ankle vertical over the whole horizon (can be useful with wheeled robots)
+                c_ori = self._model.kd.fk(contact)(q=self._model.q)['ee_rot'][2, :]
+                cost_ori = self.task_interface.prb.createResidual(f'{contact}_ori', self._yaw_vertical_weight * (c_ori.T - np.array([0, 0, 1])))
+                # flight_phase.addCost(cost_ori, nodes=list(range(0, flight_duration+post_landing_stance)))
+            
+            self._name_to_idx_map[contact]=j
+
+            j+=1
+
+        # current pos [c0, c1, ....], current length, nominal length, nom. apex, no. landing height, landing dx, landing dy (local world aligned) 
+        self._flight_info_now=np.zeros(shape=(7*self.n_contacts))
+
+        self.update()
+
+    def _reset_contact_timelines(self):
+
+        for contact in self._model.cmap.keys():
+            
+            idx=self._name_to_idx_map[contact]
+            # we follow same order as in shm for more efficient writing 
+            self._flight_info_now[idx]= -1.0 # pos [nodes]
+            self._flight_info_now[idx+1*self.n_contacts]= -1.0 # duration (remaining) [nodes]
+            self._flight_info_now[idx+2*self.n_contacts]=self._flight_duration_default # [nodes]
+            self._flight_info_now[idx+3*self.n_contacts]=self._step_height_default
+            self._flight_info_now[idx+4*self.n_contacts]=self._dh_default
+            self._flight_info_now[idx+5*self.n_contacts]=self._land_dx_default
+            self._flight_info_now[idx+6*self.n_contacts]=self._land_dy_default
+            # fill timeline with stances
+            contact_timeline=self._contact_timelines[contact]
+            contact_timeline.clear() # remove phases
+            short_stance_phase = contact_timeline.getRegisteredPhase(f'stance_{contact}_short')
+            while contact_timeline.getEmptyNodes() > 0:
+                contact_timeline.addPhase(short_stance_phase)   
+            
+            self.update()
+            
+    def reset(self):
+        # self.phase_manager.clear()
+        self.task_interface.reset()
+        self._reset_contact_timelines()
+    
+    def set_f_reg(self, 
+        contact_name,
+        scale: float = 4.0):
+        f_refs=self._f_reg_ref[contact_name]
+        for force in f_refs:
+            ref=self._total_weight/(scale*len(f_refs))
+            force.assign(ref)
+    
+    def set_flight_duration(self, contact_name, val: float):
+        self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts]=val
+    
+    def get_flight_duration(self, contact_name):
+        return self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts]
+    
+    def set_step_apexdh(self, contact_name, val: float):
+        self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts]=val
+    
+    def get_step_apexdh(self, contact_name):
+        return self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts]
+    
+    def set_step_enddh(self, contact_name, val: float):
+        self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts]=val
+    
+    def get_step_enddh(self, contact_name):
+        return self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts]
+    
+    def get_step_landing_dx(self, contact_name):
+        return self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts]
+    
+    def set_step_landing_dx(self, contact_name, val: float):
+        self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts]=val
+    
+    def get_step_landing_dy(self, contact_name):
+        return self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts]     
+    
+    def set_step_landing_dy(self, contact_name, val: float):
+        self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts]=val
+
+    def add_stand(self, contact_name):
+        # always add stand at the end of the horizon
+        timeline = self._contact_timelines[contact_name]
+        if timeline.getEmptyNodes() > 0:
+            timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short'))
+    
+    def add_flight(self, contact_name,
+        robot_q: np.ndarray):
+
+        timeline = self._contact_timelines[contact_name]
+
+        flights_on_horizon=self._contact_timelines[contact_name].getPhaseIdx(self._flight_phases[contact_name]) 
+        
+        last_flight_idx=self._injection_node-1 # default to make things work
+        if not len(flights_on_horizon)==0: # some flight phases are there
+            last_flight_idx=flights_on_horizon[-1]+self._post_flight_stance
+
+        if last_flight_idx<self._injection_node: # allow injecting
+            
+            flight_duration_req=int(self.get_flight_duration(contact_name=contact_name))
+            flight_apex_req=self.get_step_apexdh(contact_name=contact_name)
+            flight_enddh_req=self.get_step_enddh(contact_name=contact_name)
+            flight_land_dx_req=self.get_step_landing_dx(contact_name=contact_name)
+            flight_land_dy_req=self.get_step_landing_dy(contact_name=contact_name)
+            if not flight_duration_req>1:
+                Journal.log(self.__class__.__name__,
+                    "add_flight",
+                    f"Got flight duration {flight_duration_req} < 1!",
+                    LogType.WARN,
+                    throw_when_excep=True)
+
+            # process requests to ensure flight params are valid
+            # duration
+            if flight_duration_req<self._flight_duration_min:
+                flight_duration_req=self._flight_duration_min
+            if flight_duration_req>self._flight_duration_max:
+                flight_duration_req=self._flight_duration_max
+            # apex height
+            if flight_apex_req<self._step_height_min:
+                flight_apex_req=self._step_height_min
+            if flight_apex_req>self._step_height_max:
+                flight_apex_req=self._step_height_max
+            # landing height
+            if flight_enddh_req<self._dh_min:
+                flight_enddh_req=self._dh_min
+            if flight_enddh_req>self._dh_max:
+                flight_enddh_req=self._dh_max
+            # landing dx
+            if flight_land_dx_req<self._land_dx_min:
+                flight_land_dx_req=self._land_dx_min
+            if flight_land_dx_req>self._land_dx_max:
+                flight_land_dx_req=self._land_dx_max
+            # landing dy                
+            if flight_land_dy_req<self._land_dy_min:
+                flight_land_dy_req=self._land_dy_min
+            if flight_land_dy_req>self._land_dy_max:
+                flight_land_dy_req=self._land_dy_max   
+
+            land_dx_w = flight_land_dx_req
+            land_dy_w = flight_land_dy_req
+            if self._xypos_task_found or self._xyvel_task_found:
+                # landing dx/dy are specified in horizontal frame; rotate into world aligned frame
+                q_base = robot_q[3:7]
+                if q_base.ndim == 1:
+                    q_base = q_base.reshape(-1, 1)
+                q_w = q_base[3, 0]
+                q_x = q_base[0, 0]
+                q_y = q_base[1, 0]
+                q_z = q_base[2, 0]
+                r11 = 1 - 2 * (q_y * q_y + q_z * q_z)
+                r21 = 2 * (q_x * q_y + q_z * q_w)
+                norm = np.hypot(r11, r21)
+                if norm > 0.0:
+                    cos_yaw = r11 / norm
+                    sin_yaw = r21 / norm
+                else:
+                    cos_yaw = 1.0
+                    sin_yaw = 0.
+                    
+                land_dx_w = flight_land_dx_req * cos_yaw - flight_land_dy_req * sin_yaw
+                land_dy_w = flight_land_dx_req * sin_yaw + flight_land_dy_req * cos_yaw
+
+            if self._ref_vtrjs[contact_name] is not None and \
+                self._ref_trjs[contact_name] is not None: # only allow one mode (pos/velocity traj)
+                Journal.log(self.__class__.__name__,
+                    "add_flight",
+                    f"Both pos and vel traj for contact {contact_name} are not None! This is not allowed, aborting.",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+                
+            # inject pos traj if pos mode
+            if self._ref_trjs[contact_name] is not None:
+                # recompute trajectory online (just needed if using pos traj)
+                foot_pos=self._fk_contacts[contact_name](q=robot_q)['ee_pos'].elements()
+                starting_pos=foot_pos[2] # compute foot traj (local world aligned)
+                starting_x_pos=foot_pos[0]
+                starting_y_pos=foot_pos[1]
+                # starting_pos=0.0
+                self._ref_trjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.from_derivatives(
+                    flight_duration_req, 
+                    p_start=starting_pos, 
+                    p_goal=starting_pos+flight_enddh_req, 
+                    clearance=flight_apex_req,
+                    derivatives=self._traj_der,
+                    second_der=self._traj_second_der,
+                    third_der=self._third_traj_der)
+                    )
+                if self._xypos_task_found: # we use _ref_trjs to write xy pos references
+                    self._ref_trjs[contact_name][0, -1]=starting_x_pos+land_dx_w
+                    self._ref_trjs[contact_name][1, -1]=starting_y_pos+land_dy_w
+
+                for i in range(flight_duration_req):
+                    res, phase_token_flight=timeline.addPhase(self._flight_phases[contact_name], 
+                        pos=self._injection_node+i, 
+                        absolute_position=True)
+                    phase_token_flight.setItemReference(f'z_{contact_name}',
+                        self._ref_trjs[contact_name][:, i])
+                    
+                if self._touchdown_phases[contact_name] is not None:
+                    # add touchdown phase after flight
+                    res, phase_token_touchdown=timeline.addPhase(self._touchdown_phases[contact_name], 
+                            pos=self._injection_node+flight_duration_req, 
+                            absolute_position=True)    
+                    if self._xypos_task_found:
+                        phase_token_touchdown.setItemReference(f'xy_{contact_name}',
+                            self._ref_trjs[contact_name][:, -1])                    
+                    
+            # inject vel traj if vel mode
+            if self._ref_vtrjs[contact_name] is not None:
+                self._ref_vtrjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.derivative_of_trajectory(
+                    flight_duration_req, 
+                    p_start=0.0, 
+                    p_goal=flight_enddh_req, 
+                    clearance=flight_apex_req,
+                    derivatives=self._traj_der,
+                    second_der=self._traj_second_der,
+                    third_der=self._third_traj_der))
+                if self._xyvel_task_found: # compute vel reference using problem dt and flight length
+                    flight_duration_sec=float(flight_duration_req)*self._dt
+                    self._ref_vtrjs[contact_name][0, 0:flight_duration_req]=land_dx_w/flight_duration_sec
+                    self._ref_vtrjs[contact_name][1, 0:flight_duration_req]=land_dy_w/flight_duration_sec
+                    
+                for i in range(flight_duration_req):
+                    res, phase_token=timeline.addPhase(self._flight_phases[contact_name], 
+                        pos=self._injection_node+i, 
+                        absolute_position=True)
+                    phase_token.setItemReference(f'vz_{contact_name}',
+                        self._ref_vtrjs[contact_name][2:3, i:i+1])
+                if self._touchdown_phases[contact_name] is not None:
+                    # add touchdown phase for forcing vertical landing
+                    res, phase_token=timeline.addPhase(self._touchdown_phases[contact_name], 
+                            pos=self._injection_node+flight_duration_req, 
+                            absolute_position=True)       
+
+        if timeline.getEmptyNodes() > 0: # fill empty nodes at the end of the horizon, if any, with stance
+            timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short'))
+    
+    def update(self):
+        self._phase_manager.update()
+        
+    def update_flight_info(self, timeline_name):
+
+        # get current position and remaining duration of flight phases over the horizon for a single contact
+
+        # phase indexes over timeline
+        phase_idxs=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])
+        
+        if not len(phase_idxs)==0: # at least one flight phase on horizon -> read info from timeline
+
+            # all active phases on timeline
+            active_phases=self._contact_timelines[timeline_name].getActivePhases()
+
+            phase_idx_start=phase_idxs[0]
+            # active_nodes_start=active_phases[phase_idx_start].getActiveNodes()
+            pos_start=active_phases[phase_idx_start].getPosition()
+            # n_nodes_start=active_phases[phase_idx_start].getNNodes()
+
+            phase_idx_last=phase_idxs[-1] # just get info for last phase on the horizon
+            # active_nodes_last=active_phases[phase_idx_last].getActiveNodes()
+            pos_last=active_phases[phase_idx_last].getPosition()
+            # n_nodes_last=active_phases[phase_idx_last].getNNodes()
+            
+            # write to info
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts]=pos_last
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts]=pos_last - pos_start
+
+            return True
+        
+        return False
+    
+    def get_flight_info(self, timeline_name):
+        return (self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts], 
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts],
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+2*self.n_contacts],
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+3*self.n_contacts],
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+4*self.n_contacts],
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+5*self.n_contacts],
+            self._flight_info_now[self._name_to_idx_map[timeline_name]+6*self.n_contacts])
+    
+    def get_flight_info_all(self):
+        return self._flight_info_now
+    
+    def set_ref_pos(self,
+        timeline_name:str,
+        ref_height: np.array = None,
+        threshold: float = 0.05):
+        
+        if ref_height is not None:
+            self._ref_trjs[timeline_name][2, :]=ref_height
+            if ref_height>threshold:
+                self.add_flight(timeline_name=timeline_name)
+                this_flight_token_idx=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])[-1]
+                active_phases=self._contact_timelines[timeline_name].getActivePhases()
+                active_phases[this_flight_token_idx].setItemReference(f'z_{timeline_name}',
+                    self._ref_trjs[timeline_name])
+            else:
+                self.add_stand(timeline_name=timeline_name)
+
+    def set_force_feedback(self,
+        timeline_name: str,
+        force_norm: float):
+        
+        flight_tokens=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])
+        contact_tokens=self._contact_phases[timeline_name].getPhaseIdx(self._contact_phases[timeline_name])
+        if not len(flight_tokens)==0:
+            first_flight=flight_tokens[0]
+            first_flight
+    
+    def check_horizon_full(self,
+        timeline_name):
+        timeline = self._contact_timelines[timeline_name]
+        if timeline.getEmptyNodes() > 0:
+            error = f"Empty nodes detected over the horizon for timeline {timeline}! Make sure to fill the whole horizon with valid phases!!"
+            Journal.log(self.__class__.__name__,
+                "check_horizon_full",
+                error,
+                LogType.EXCEP,
+                throw_when_excep = True)
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py
new file mode 100644
index 0000000000000000000000000000000000000000..991636dd18679c11050f97d3c47dd7df2d97eb5e
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py
@@ -0,0 +1,18 @@
+# robot modeling and automatic differentiation
+import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn
+import casadi as cs
+
+# horizon stuff
+import horizon.utils.kin_dyn as kd
+from horizon.problem import Problem
+from horizon.rhc.model_description import FullModelInverseDynamics
+from horizon.rhc.taskInterface import TaskInterface
+from horizon.rhc.tasks.interactionTask import VertexContact
+from horizon.rhc.tasks.contactTask import ContactTask
+from horizon.utils import trajectoryGenerator, utils
+
+# phase managing
+import phase_manager.pymanager as pymanager
+import phase_manager.pyphase as pyphase
+import phase_manager.pytimeline as pytimeline
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py
new file mode 100644
index 0000000000000000000000000000000000000000..679e7d460ab545b0bd2f741904b5ac8af893f897
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py
@@ -0,0 +1,28 @@
+"""
+Dynamically import all necessary Horizon and related dependencies.
+This function is intended to be used within the import_child_lib method
+of a class, enabling the parent process to load all required libraries.
+"""
+def import_horizon_global():
+    # Global imports to make modules accessible in child processes
+    global casadi_kin_dyn, cs, kd, Problem, FullModelInverseDynamics
+    global TaskInterface, VertexContact, ContactTask, trajectoryGenerator, utils
+    global pymanager, pyphase, pytimeline
+
+    # robot modeling and automatic differentiation
+    import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn
+    import casadi as cs
+
+    # horizon stuff
+    import horizon.utils.kin_dyn as kd
+    from horizon.problem import Problem
+    from horizon.rhc.model_description import FullModelInverseDynamics
+    from horizon.rhc.taskInterface import TaskInterface
+    from horizon.rhc.tasks.interactionTask import VertexContact
+    from horizon.rhc.tasks.contactTask import ContactTask
+    from horizon.utils import trajectoryGenerator, utils
+
+    # phase managing
+    import phase_manager.pymanager as pymanager
+    import phase_manager.pyphase as pyphase
+    import phase_manager.pytimeline as pytimeline
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py
@@ -0,0 +1,1324 @@
+from mpc_hive.controllers.rhc import RHController
+
+from aug_mpc.controllers.rhc.horizon_based.horizon_imports import *
+
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs
+from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+
+import numpy as np
+
+import os
+
+import time
+
+from typing import Dict, List
+
+class HybridQuadRhc(RHController):
+
+    def __init__(self, 
+            srdf_path: str,
+            urdf_path: str,
+            config_path: str,
+            robot_name: str, # used for shared memory namespaces
+            codegen_dir: str, 
+            n_nodes:float = 25,
+            injection_node:int = 10,
+            dt: float = 0.02,
+            max_solver_iter = 1, # defaults to rt-iteration
+            open_loop: bool = True,
+            close_loop_all: bool = False,
+            dtype = np.float32,
+            verbose = False, 
+            debug = False,
+            refs_in_hor_frame = True,
+            timeout_ms: int = 60000,
+            custom_opts: Dict = {}):
+
+        self._refs_in_hor_frame = refs_in_hor_frame
+
+        self._injection_node = injection_node
+
+        self._open_loop = open_loop
+        self._close_loop_all = close_loop_all
+
+        self._codegen_dir = codegen_dir
+        if not os.path.exists(self._codegen_dir):
+            os.makedirs(self._codegen_dir)
+        # else:
+        #     # Directory already exists, delete it and recreate
+        #     shutil.rmtree(self._codegen_dir)
+        #     os.makedirs(self._codegen_dir)
+
+        self.step_counter = 0
+        self.sol_counter = 0
+    
+        self.max_solver_iter = max_solver_iter
+        
+        self._timer_start = time.perf_counter()
+        self._prb_update_time = time.perf_counter()
+        self._phase_shift_time = time.perf_counter()
+        self._task_ref_update_time = time.perf_counter()
+        self._rti_time = time.perf_counter()
+
+        self.robot_name = robot_name
+        
+        self.config_path = config_path
+
+        self.urdf_path = urdf_path
+        # read urdf and srdf files
+        with open(self.urdf_path, 'r') as file:
+            self.urdf = file.read()
+        self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
+
+        self._c_timelines = dict()
+        self._f_reg_timelines = dict()
+        
+        self._custom_opts={"replace_continuous_joints": False,
+            "use_force_feedback": False,
+            "lin_a_feedback": False,
+            "is_open_loop": self._open_loop, # fully open (just for db)
+            "fully_closed": False, # closed loop with full feedback (just for db)
+            "closed_partial": False, # closed loop with partial feedback
+            "adaptive_is": True, # closed loop with adaptation
+            "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase
+            "alpha_from_outside": False, # alpha set ext. from shared memory
+            "alpha_half": 1.0, 
+            "only_vel_wheels": True, # whether wheels (if present) are just vel controlled
+            "use_jnt_v_feedback": False
+            }
+        
+        self._custom_opts.update(custom_opts)
+        
+        self._alpha_half=self._custom_opts["alpha_half"]
+
+        if self._open_loop:
+            self._custom_opts["fully_closed"]=False
+            self._custom_opts["adaptive_is"]=False
+            self._custom_opts["closed_partial"]=False
+        else:
+            self._custom_opts["is_open_loop"]=False
+            if self._custom_opts["fully_closed"]:
+                self._custom_opts["adaptive_is"]=False
+                self._custom_opts["closed_partial"]=False
+                self._custom_opts["lin_a_feedback"]=False
+            if self._custom_opts["closed_partial"]:
+                self._custom_opts["adaptive_is"]=False
+                self._custom_opts["fully_closed"]=False
+                self._custom_opts["lin_a_feedback"]=False
+            if self._custom_opts["adaptive_is"]:
+                self._custom_opts["closed_partial"]=False
+                self._custom_opts["fully_closed"]=False
+
+        super().__init__(srdf_path=srdf_path,
+                        n_nodes=n_nodes,
+                        dt=dt,
+                        namespace=self.robot_name,
+                        dtype=dtype,
+                        verbose=verbose, 
+                        debug=debug,
+                        timeout_ms=timeout_ms)
+
+        self._rhc_fpaths.append(self.config_path)
+
+    def _config_override(self):
+        pass
+
+    def _post_problem_init(self):
+
+        self.rhc_costs={}
+        self.rhc_constr={}
+
+        self._fail_idx_scale=0.0
+        self._expl_idx_window_size=int(1*self._n_nodes) 
+        self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size))
+        self._expl_idx_counter=0
+        self._expl_idx_buffer_counter=0
+
+        self._pred_node_idx=self._n_nodes-1
+
+        self._nq=self.nq()
+        self._nq_jnts=self._nq-7# assuming floating base
+        self._nv=self.nv()
+        self._nv_jnts=self._nv-6
+
+        self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype)
+        self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype)
+        self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype)
+        self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype)
+        self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype)
+        self._alphas_q_root[:, :]=1.0 # default to all open
+        self._alphas_q_jnts[:, :]=1.0 
+        self._alphas_twist_root[:, :]=1.0 
+        self._alphas_v_jnts[:, :]=1.0 
+        self._alphas_a[:, :]=1.0
+
+    def _init_problem(self,
+            fixed_jnt_patterns: List[str] = None,
+            wheels_patterns: List[str] = None,
+            foot_linkname: str = None,
+            flight_duration: int = 10,
+            post_flight_stance: int = 3,
+            step_height: float = 0.12,
+            keep_yaw_vert: bool = False,
+            yaw_vertical_weight: float = 2.0,
+            vertical_landing: bool = False,
+            vertical_land_weight: float = 1.0,
+            phase_force_reg: float = 1e-2,
+            vel_bounds_weight: float = 1.0):
+        
+        self._fixed_jnt_patterns=fixed_jnt_patterns
+
+        self._config_override()
+        
+        Journal.log(self.__class__.__name__,
+            "_init_problem",
+            f" Will use horizon config file at {self.config_path}",
+            LogType.INFO,
+            throw_when_excep=True)
+        
+        self._vel_bounds_weight=vel_bounds_weight
+        self._phase_force_reg=phase_force_reg
+        self._yaw_vertical_weight=yaw_vertical_weight
+        self._vertical_land_weight=vertical_land_weight
+        # overrides parent
+        self._prb = Problem(self._n_intervals, 
+                        receding=True, 
+                        casadi_type=cs.SX)
+        self._prb.setDt(self._dt)
+
+        if "replace_continuous_joints" in self._custom_opts:
+            # continous joints are parametrized in So2
+            if self._custom_opts["replace_continuous_joints"]:
+                self.urdf = self.urdf.replace('continuous', 'revolute')
+        else:
+            self.urdf = self.urdf.replace('continuous', 'revolute')
+        
+        self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names 
+        self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+        
+        self._init_robot_homer()
+
+        # handle fixed joints
+        fixed_joint_map={}
+        if self._fixed_jnt_patterns is not None:
+            for jnt_name in self._get_robot_jnt_names():
+                for fixed_jnt_pattern in self._fixed_jnt_patterns:
+                    if fixed_jnt_pattern in jnt_name: 
+                        fixed_joint_map.update({f"{jnt_name}":
+                            self._homer.get_homing_val(jnt_name=jnt_name)})
+                        break # do not search for other pattern matches
+        
+        if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers
+            Journal.log(self.__class__.__name__,
+                "_init_problem",
+                f"Will fix following joints: \n{str(fixed_joint_map)}",
+                LogType.INFO,
+                throw_when_excep=True)
+            # with the fixed joint map
+            self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map)
+            # assign again controlled joints names
+            self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names())
+            # updated robot homer for controlled joints
+            self._init_robot_homer()
+
+        # handle continuous joints (need to change homing and retrieve
+        # cont jnts indexes) and homing
+        self._continuous_joints=self._get_continuous_jnt_names()
+        # reduced
+        self._continuous_joints_idxs=[]
+        self._continuous_joints_idxs_cos=[]
+        self._continuous_joints_idxs_sin=[]
+        self._continuous_joints_idxs_red=[]
+        self._rev_joints_idxs=[]
+        self._rev_joints_idxs_red=[]
+        # qfull
+        self._continuous_joints_idxs_qfull=[]
+        self._continuous_joints_idxs_cos_qfull=[]
+        self._continuous_joints_idxs_sin_qfull=[]
+        self._continuous_joints_idxs_red_qfull=[]
+        self._rev_joints_idxs_qfull=[]
+        self._rev_joints_idxs_red_qfull=[]
+        jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints))
+        jnt_names=self._get_robot_jnt_names()
+        for i in range(len(jnt_names)):
+            jnt=jnt_names[i]
+            index=self._get_jnt_id(jnt)# accounting for floating joint
+            homing_idx=index-7 # homing is only for actuated joints
+            homing_value=self._homer.get_homing_val(jnt)
+            if jnt in self._continuous_joints:
+                jnt_homing[homing_idx]=np.cos(homing_value).item()
+                jnt_homing[homing_idx+1]=np.sin(homing_value).item()
+                # just actuated joints
+                self._continuous_joints_idxs.append(homing_idx) # cos
+                self._continuous_joints_idxs.append(homing_idx+1) # sin
+                self._continuous_joints_idxs_cos.append(homing_idx)
+                self._continuous_joints_idxs_sin.append(homing_idx+1)
+                self._continuous_joints_idxs_red.append(i)
+                # q full
+                self._continuous_joints_idxs_qfull.append(index) # cos
+                self._continuous_joints_idxs_qfull.append(index+1) # sin
+                self._continuous_joints_idxs_cos_qfull.append(index)
+                self._continuous_joints_idxs_sin_qfull.append(index+1)
+                self._continuous_joints_idxs_red_qfull.append(i+7)
+            else:
+                jnt_homing[homing_idx]=homing_value
+                # just actuated joints
+                self._rev_joints_idxs.append(homing_idx) 
+                self._rev_joints_idxs_red.append(i) 
+                # q full
+                self._rev_joints_idxs_qfull.append(index) 
+                self._rev_joints_idxs_red_qfull.append(i+7) 
+
+        self._jnts_q_reduced=None
+        if not len(self._continuous_joints)==0: 
+            cont_joints=", ".join(self._continuous_joints)
+
+            Journal.log(self.__class__.__name__,
+                "_init_problem",
+                f"The following continuous joints were found: \n{cont_joints}",
+                LogType.INFO,
+                throw_when_excep=True)
+            # preallocating data 
+            self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype)
+            self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+            self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype)
+            self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype)
+        else:
+            self._custom_opts["replace_continuous_joints"]=True
+            Journal.log(self.__class__.__name__,
+                "_init_problem",
+                f"No continuous joints were found.",
+                LogType.INFO,
+                throw_when_excep=True)
+
+        # retrieve wheels indexes (not considering continuous joints, 
+        # ok just for v, eff, etc..)
+        self._wheel_patterns=wheels_patterns
+        self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns)
+        self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81]
+        
+        # we can create an init for the base
+        init = self._base_init.tolist() + jnt_homing
+
+        if foot_linkname is not None:
+            FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height
+            ground_level = FK(q=init)['ee_pos']
+            self._base_init[2] = -ground_level[2]  # override init
+        
+        self._model = FullModelInverseDynamics(problem=self._prb,
+            kd=self._kin_dyn,
+            q_init=self._homer.get_homing_map(),
+            base_init=self._base_init)
+
+        self._ti = TaskInterface(prb=self._prb, 
+                            model=self._model, 
+                            max_solver_iter=self.max_solver_iter,
+                            debug = self._debug,
+                            verbose = self._verbose, 
+                            codegen_workdir = self._codegen_dir)
+        self._ti.setTaskFromYaml(self.config_path)
+        
+        # setting initial base pos ref if exists
+        base_pos = self._ti.getTask('base_height')
+        if base_pos is not None:
+            base_pos.setRef(np.atleast_2d(self._base_init).T)
+
+        self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes?????
+
+        self._gm = GaitManager(self._ti, 
+            self._pm, 
+            self._injection_node,
+            keep_yaw_vert=keep_yaw_vert,
+            yaw_vertical_weight=self._yaw_vertical_weight,
+            vertical_landing=vertical_landing,
+            landing_vert_weight=self._vertical_land_weight,
+            phase_force_reg=self._phase_force_reg,
+            custom_opts=self._custom_opts,
+            flight_duration=flight_duration,
+            post_flight_stance=post_flight_stance,
+            step_height=step_height,
+            dh=0.0)
+                
+        self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0)
+        self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0)
+        self._ti.model.q.setInitialGuess(self._ti.model.q0)
+        self._ti.model.v.setInitialGuess(self._ti.model.v0)
+        for _, cforces in self._ti.model.cmap.items():
+            n_contact_f=len(cforces)
+            for c in cforces:
+                c.setInitialGuess(np.array(self._f0)/n_contact_f)        
+
+        vel_lims = self._model.kd.velocityLimits()
+        import horizon.utils as utils
+        self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:]))
+        self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:]))
+
+        self._meas_lin_a_par=None
+        # if self._custom_opts["lin_a_feedback"]:
+        #     # acceleration feedback on first node
+        #     self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback",
+        #         dim=3, nodes=0)   
+        #     base_lin_a_prb=self._prb.getInput().getVars()[0:3] 
+        #     self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, 
+        #             nodes=[0])
+
+        # if not self._open_loop:
+        #     # we create a residual cost to be used as an attractor to the measured state on the first node
+        #     # hard constraints injecting meas. states are pure EVIL!
+        #     prb_state=self._prb.getState()
+        #     full_state=prb_state.getVars()
+        #     state_dim=prb_state.getBounds()[0].shape[0]
+        #     meas_state=self._prb.createParameter(name="measured_state",
+        #         dim=state_dim, nodes=0)     
+        #     self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), 
+        #                 nodes=[0])
+
+        self._ti.finalize()
+        self._ti.bootstrap()
+
+        self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes
+        self._ti.load_initial_guess()
+
+        self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we
+        # know n_dofs -> we assign it (by default = None)
+
+        self.n_contacts = len(self._model.cmap.keys())
+        
+        # remove variables bounds (before they were necessary to have a good
+        # quality bootstrap solution)
+        self._q_inf=np.zeros((self.nq(), 1))
+        self._q_inf[:, :]=np.inf
+        self._v_inf=np.zeros((self.nv(), 1))
+        self._v_inf[:, :]=np.inf
+        self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0)
+        self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0)
+
+        # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb)
+
+    def get_file_paths(self):
+        # can be overriden by child
+        paths = super().get_file_paths()
+        return paths
+    
+    def _get_quat_remap(self):
+        # overrides parent
+        return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention
+    
+    def _zmp(self, model):
+
+        num = cs.SX([0, 0])
+        den = cs.SX([0])
+        pos_contact = dict()
+        force_val = dict()
+
+        q = cs.SX.sym('q', model.nq)
+        v = cs.SX.sym('v', model.nv)
+        a = cs.SX.sym('a', model.nv)
+
+        com = model.kd.centerOfMass()(q=q, v=v, a=a)['com']
+
+        n = cs.SX([0, 0, 1])
+        for c in model.fmap.keys():
+            pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos']
+            force_val[c] = cs.SX.sym('force_val', 3)
+            num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n)
+            den += cs.dot(force_val[c], n)
+
+        zmp = com[0:2] + (num / den)
+        input_list = []
+        input_list.append(q)
+        input_list.append(v)
+        input_list.append(a)
+
+        for elem in force_val.values():
+            input_list.append(elem)
+
+        f = cs.Function('zmp', input_list, [zmp])
+        return f
+    
+    def _add_zmp(self):
+
+        input_zmp = []
+
+        input_zmp.append(self._model.q)
+        input_zmp.append(self._model.v)
+        input_zmp.append(self._model.a)
+
+        for f_var in self._model.fmap.values():
+            input_zmp.append(f_var)
+
+        c_mean = cs.SX([0, 0, 0])
+        for c_name, f_var in self._model.fmap.items():
+            fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos']
+            c_mean += fk_c_pos
+
+        c_mean /= len(self._model.cmap.keys())
+
+        zmp_nominal_weight = 10.
+        zmp_fun = self._zmp(self._model)(*input_zmp)
+
+        if 'wheel_joint_1' in self._model.kd.joint_names():
+            zmp_residual = self._prb.createIntermediateResidual('zmp',  zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2]))
+
+    def _quaternion_multiply(self, 
+                    q1, q2):
+        x1, y1, z1, w1 = q1
+        x2, y2, z2, w2 = q2
+        w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+        x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+        y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+        z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+        return np.array([x, y, z, w])
+    
+    def _get_continuous_jnt_names(self):
+        import xml.etree.ElementTree as ET
+        root = ET.fromstring(self.urdf)
+        continuous_joints = []
+        for joint in root.findall('joint'):
+            joint_type = joint.get('type')
+            if joint_type == 'continuous':
+                joint_name = joint.get('name')
+                continuous_joints.append(joint_name)
+        return continuous_joints
+    
+    def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]):
+        jnt_names=self._get_robot_jnt_names()
+        wheels_idxs=[]
+        for i in range(len(jnt_names)):
+            jnt_name=jnt_names[i]
+            for wheel_pattern in wheel_patterns:
+                if wheel_pattern in jnt_name:
+                    wheels_idxs.append(i)
+                    break
+        return wheels_idxs
+    
+    def _get_jnt_id(self, jnt_name):
+        return self._kin_dyn.joint_iq(jnt_name)
+    
+    def _init_rhc_task_cmds(self):
+        
+        rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm,
+            robot_index_shm=self.controller_index,
+            robot_index_view=0, # when using optimize_mem the view if always of shape 1x...
+            namespace=self.namespace,
+            safe=False, 
+            verbose=self._verbose,
+            vlevel=VLevel.V2,
+            use_force_feedback=self._custom_opts["use_force_feedback"],
+            optimize_mem=True)
+        
+        rhc_refs.run()
+
+        rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller)
+        rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap())
+        
+        rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3],
+            q_ref=np.atleast_2d(self._base_init)[:, 3:7])
+        
+        return rhc_refs
+    
+    def get_vertex_fnames_from_ti(self):
+        tasks=self._ti.task_list
+        contact_f_names=[]
+        for task in tasks:
+            if isinstance(task, ContactTask):
+                interaction_task=task.dynamics_tasks[0]
+                contact_f_names.append(interaction_task.vertex_frames[0])
+        return contact_f_names
+        
+    def _get_contact_names(self):
+        # should get contact names from vertex frames
+        # list(self._ti.model.cmap.keys())
+        return self.get_vertex_fnames_from_ti()
+    
+    def _get_robot_jnt_names(self):
+
+        joints_names = self._kin_dyn.joint_names()
+        to_be_removed = ["universe", 
+                        "reference", 
+                        "world", 
+                        "floating", 
+                        "floating_base"]
+        for name in to_be_removed:
+            if name in joints_names:
+                joints_names.remove(name)
+
+        return joints_names
+    
+    def _get_ndofs(self):
+        return len(self._model.joint_names)
+    
+    def nq(self):
+        return self._kin_dyn.nq()
+    
+    def nv(self):
+        return self._kin_dyn.nv()
+    
+    def _get_robot_mass(self):
+
+        return self._kin_dyn.mass()
+
+    def _get_root_full_q_from_sol(self, node_idx=1):
+
+        root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype)
+
+        np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False)
+        np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full)
+
+        return root_q_full
+    
+    def _get_full_q_from_sol(self, node_idx=1):
+
+        return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype)
+    
+    def _get_root_twist_from_sol(self, node_idx=1):
+        # provided in world frame
+        twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6)
+        # if world_aligned:
+        #     q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+        #     r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+        #     twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3]
+        #     twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6]
+        return twist_base_local
+
+    def _get_root_a_from_sol(self, node_idx=0):
+        # provided in world frame
+        a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6)
+        # if world_aligned:
+        #     q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4]
+        #     r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix()
+        #     a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3]
+        #     a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6]
+        return a_base_local
+    
+    def _get_jnt_q_from_sol(self, node_idx=0, 
+            reduce: bool = True,
+            clamp: bool = True):
+        
+        full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype)
+        
+        np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place
+        np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place
+
+        if self._custom_opts["replace_continuous_joints"] or (not reduce):
+            if clamp:
+                return np.fmod(full_jnts_q, 2*np.pi)
+            else:
+                return full_jnts_q
+        else:
+            cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2)
+            # copy rev joint vals
+            self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1)
+            # and continuous
+            self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1)
+            return self._jnts_q_reduced
+        
+    def _get_jnt_v_from_sol(self, node_idx=1):
+        
+        jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1,  
+                    self._nv_jnts)
+        np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place
+        # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place
+
+        return jnt_v_sol
+
+    def _get_jnt_a_from_sol(self, node_idx=1):
+
+        return self._get_a_from_sol()[6:, node_idx].reshape(1,
+                    self._nv_jnts)
+
+    def _get_jnt_eff_from_sol(self, node_idx=0):
+        
+        efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx)
+        
+        # if self._custom_opts["only_vel_wheels"]:
+
+        jnt_efforts=efforts_on_node[6:, 0]
+        
+        if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v:
+            jnt_efforts[self._wheels_idxs_v]=0.0
+
+        return jnt_efforts.reshape(1,  
+                self._nv_jnts).astype(self._dtype)
+    
+    def _get_rhc_cost(self):
+
+        return self._ti.solution["opt_cost"]
+    
+    def _get_rhc_constr_viol(self):
+        
+        return self._ti.solution["residual_norm"]
+    
+    def _get_rhc_nodes_cost(self):
+
+        cost = self._ti.solver_rti.getCostValOnNodes()
+        return cost.reshape((1, -1))
+    
+    def _get_rhc_nodes_constr_viol(self):
+        
+        constr_viol = self._ti.solver_rti.getConstrValOnNodes()
+        return constr_viol.reshape((1, -1))
+    
+    def _get_rhc_niter_to_sol(self):
+
+        return self._ti.solution["n_iter2sol"]
+    
+    def _set_ig_bootstrap(self,
+            q_state: np.ndarray = None,
+            v_state: np.ndarray = None):
+
+        xig = self._ti.solution['x_opt'].copy()
+        uig = self._ti.solution['u_opt'].copy()
+
+        # Normalize and keep quaternion in the same hemisphere as the previous
+        # solution to avoid artificial 180-deg jumps in the bootstrap warm start.
+        q_state_boot = q_state.copy()
+        q_prev = xig[3:7, 0]
+        q_now = q_state_boot[3:7, 0]
+
+        q_now_norm = np.linalg.norm(q_now)
+        if q_now_norm > 1e-9:
+            q_state_boot[3:7, :] /= q_now_norm
+        else:
+            q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype)
+
+        q_prev_norm = np.linalg.norm(q_prev)
+        if q_prev_norm > 1e-9:
+            q_prev = q_prev / q_prev_norm
+
+        q_now = q_state_boot[3:7, 0]
+        if np.dot(q_prev, q_now) < 0.0:
+            q_state_boot[3:7, :] *= -1.0
+        
+        xig[0:self._nq, :] = q_state_boot
+        xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes 
+        uig[0:self._nv, :]=0.0 # 0 acceleration
+
+        # assigning ig
+        self._prb.getState().setInitialGuess(xig)
+        self._prb.getInput().setInitialGuess(uig)
+        # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype))
+        for _, cforces in self._ti.model.cmap.items():
+            n_contact_f = len(cforces)
+            if n_contact_f == 0:
+                continue
+            f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f
+            for c in cforces:
+                c.setInitialGuess(f_guess)
+
+        # print("initial guesses")
+        # print(self._nq)
+        # print(self._nv)
+        # print("q")
+        # qig=self._ti.model.q.getInitialGuess()
+        # print(qig.shape)
+        # print(qig)
+        # print("v")
+        # print(self._ti.model.v.getInitialGuess())
+        # print("a")
+        # print(self._ti.model.a.getInitialGuess())
+        # for _, cforces in self._ti.model.cmap.items():
+        #     for c in cforces:
+        #         print("force")
+        #         print(c.getInitialGuess())
+        
+        return xig, uig
+    
+    def _set_ig(self):
+
+        shift_num = -1 # shift data by one node
+
+        x_opt = self._ti.solution['x_opt']
+        u_opt = self._ti.solution['u_opt']
+
+        # building ig for state
+        xig = np.roll(x_opt, 
+                shift_num, axis=1) # rolling state sol.
+        for i in range(abs(shift_num)):
+            # state on last node is copied to the elements
+            # which are "lost" during the shift operation
+            xig[:, -1 - i] = x_opt[:, -1]
+        # building ig for inputs
+        uig = np.roll(u_opt, 
+                shift_num, axis=1) # rolling state sol.
+        for i in range(abs(shift_num)):
+            # state on last node is copied to the elements
+            # which are "lost" during the shift operation
+            uig[:, -1 - i] = u_opt[:, -1]
+
+        # assigning ig
+        self._prb.getState().setInitialGuess(xig)
+        self._prb.getInput().setInitialGuess(uig)
+        
+        return xig, uig
+    
+    def _update_open_loop(self,
+            bootstrap: bool = False):
+
+        q_state, v_state, a_state=self._set_is_open()
+
+        if not bootstrap:
+            self._set_ig()
+        else:
+            self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+        # robot_state=xig[:, 0]
+        # # open loop update:
+        # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0
+        # # is node 1 in the last opt solution)
+
+        return q_state, v_state, a_state
+    
+    def _update_closed_loop(self,
+            bootstrap: bool = False):
+        
+        # set initial state
+        q_state=None
+        v_state=None
+        a_state=None
+        if self._custom_opts["adaptive_is"]:
+            # adaptive closed loop
+            q_state, v_state, a_state=self._set_is_adaptive()
+        elif self._custom_opts["fully_closed"]:
+            q_state, v_state, a_state=self._set_is_full()
+        elif self._custom_opts["closed_partial"]:
+            q_state, v_state, a_state=self._set_is_partial()
+        else:
+            Journal.log(self.__class__.__name__,
+                    "_update_closed_loop",
+                    "Neither adaptive_is, fully_closed, or closed_partial.",
+                    LogType.EXCEP,
+                    throw_when_excep = False)
+            q_state, v_state, a_state=self._set_is()
+
+        # set initial guess for controller
+        if not bootstrap:
+            self._set_ig()
+        else:
+            self._set_ig_bootstrap(q_state=q_state, v_state=v_state)
+
+        return q_state, v_state, a_state
+    
+    def _set_is_open(self):
+        
+        # overriding states with rhc data
+        q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+        q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1)
+        
+        twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1)
+        v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_q_full_rhc=q[0:7] # root full q
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_twist_rhc=vel[0:6] # lin v.
+        jnts_v_rhc=vel[6:] # jnts v
+
+        self.rhc_refs.set_alpha(alpha=1.0) # fully open
+
+        # close state on known quantities
+        root_q_full_rhc.setBounds(lb=q_full_root,
+            ub=q_full_root, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts, 
+            ub=q_jnts, nodes=0)
+        root_twist_rhc.setBounds(lb=twist_root, 
+            ub=twist_root, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts, 
+            ub=v_jnts, nodes=0)
+        
+        # return state used for feedback
+        q_state=np.concatenate((q_full_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((twist_root, v_jnts),
+                axis=0)
+        
+        return (q_state, v_state, None)
+    
+    def _set_is_full(self):
+        
+        # measurements
+        q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        
+        q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+        if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+            self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+            self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+            self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+            q_jnts=self._jnts_q_expanded.reshape(-1,1)
+            
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_full_q_rhc=q[0:7] # root p
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_v_rhc=vel[0:3] # lin v.
+        root_omega_rhc=vel[3:6] # omega
+        jnts_v_rhc=vel[6:] # jnts v
+        acc=self._prb.getVariables("a")
+        lin_a_prb=acc[0:3] # lin acc
+        
+        self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+        root_full_q_rhc.setBounds(lb=q_full_root,
+            ub=q_full_root, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts, 
+            ub=q_jnts, nodes=0)
+        root_v_rhc.setBounds(lb=v_root, 
+            ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints)
+        root_omega_rhc.setBounds(lb=omega, 
+            ub=omega, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts, 
+            ub=v_jnts, nodes=0)
+        if self._custom_opts["lin_a_feedback"]:
+            # write base lin 13793197 from meas
+            lin_a_prb.setBounds(lb=a_root[0:3, :], 
+                ub=a_root[0:3, :], 
+                nodes=0)
+
+        # return state used for feedback
+        q_state=np.concatenate((q_full_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((v_root, omega, v_jnts),
+                axis=0)
+        a_state=np.concatenate((a_root, a_jnts),
+                axis=0)
+        
+        return (q_state, v_state, a_state)
+    
+    def _set_is_partial(self):
+        
+        # measurements
+        p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        
+        q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+        if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+            self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+            self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+            self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+            q_jnts=self._jnts_q_expanded.reshape(-1,1)
+
+        # overriding states with rhc data (-> all overridden state are open looop)
+        root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+        root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+        p_root[:, :]=root_p_from_rhc # position is always open loop
+        
+        if not self._custom_opts["estimate_v_root"]:
+            v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1)
+            # override v jnts with the ones from controller
+            if not self._custom_opts["use_jnt_v_feedback"]:
+                v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+            # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+        # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1)
+        # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1)
+        # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1)
+        # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1)
+        # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1)
+            
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_p_rhc=q[0:3] # root p
+        root_q_rhc=q[3:7] # root orientation
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_v_rhc=vel[0:3] # lin v.
+        root_omega_rhc=vel[3:6] # omega
+        jnts_v_rhc=vel[6:] # jnts v
+        acc=self._prb.getVariables("a")
+        lin_a_prb=acc[0:3] # lin acc
+        
+        self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive
+
+        root_p_rhc.setBounds(lb=p_root,
+            ub=p_root, nodes=0)
+        root_q_rhc.setBounds(lb=q_root, 
+            ub=q_root, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts, 
+            ub=q_jnts, nodes=0)
+        
+        if self._custom_opts["estimate_v_root"]:
+            root_v_rhc.setBounds(lb=-self._v_inf[0:3], 
+                ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints)
+        else: # get it from controller 
+            root_v_rhc.setBounds(lb=v_root, 
+                ub=v_root, nodes=0)
+        root_omega_rhc.setBounds(lb=omega, 
+            ub=omega, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts, 
+            ub=v_jnts, nodes=0)
+        if self._custom_opts["lin_a_feedback"]:
+            # write base lin 13793197 from meas
+            lin_a_prb.setBounds(lb=a_root[0:3, :], 
+                ub=a_root[0:3, :], 
+                nodes=0)
+
+        # return state used for feedback
+        q_state=np.concatenate((p_root, q_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((v_root, omega, v_jnts),
+                axis=0)
+        a_state=np.concatenate((a_root, a_jnts),
+                axis=0)
+        
+        return (q_state, v_state, a_state)
+    
+    def _set_is_adaptive(self):
+        
+        # measurements
+        p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        
+        q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1)
+            
+        # rhc variables to be set
+        q=self._prb.getVariables("q") # .setBounds()
+        root_p_rhc=q[0:3] # root p
+        root_q_rhc=q[3:7] # root orientation
+        jnts_q_rhc=q[7:] # jnts q
+        vel=self._prb.getVariables("v")
+        root_v_rhc=vel[0:3] # lin v.
+        root_omega_rhc=vel[3:6] # omega
+        jnts_v_rhc=vel[6:] # jnts v
+        acc=self._prb.getVariables("a")
+        lin_a_prb=acc[0:3] # lin acc
+        
+        # getting prediction defects
+        root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1)
+        a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1)
+
+        # close state on known quantities, estimate some (e.g. lin vel) and
+        # open loop if thing start to explode
+        alpha_now=1.0
+        delta=0.0 
+        if self._custom_opts["alpha_from_outside"]:
+            alpha_now=self.rhc_refs.get_alpha()
+        else: # "autotuned" alpha
+            if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.)
+                delta=np.max(np.abs(jnt_v_delta))
+            else:
+                delta=np.max(np.abs(omega_root_delta))
+            # fail_idx=self._get_failure_index()
+            # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh
+            alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0
+
+        bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1]
+        self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db
+        self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db
+
+        self._alphas_q_root[:]=alpha_now # for now single alpha for everything
+        self._alphas_q_jnts[:]=alpha_now
+        self._alphas_twist_root[:]=alpha_now
+        self._alphas_v_jnts[:]=alpha_now
+        self._alphas_a[:]=alpha_now
+        if not self._custom_opts["estimate_v_root"]:
+            self._alphas_twist_root[0:3]=1.0 # open
+            self._alphas_v_jnts[:]=1.0 # open
+
+        # position is always open loop
+        root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1)
+        root_p_from_rhc=root_q_full_from_rhc[0:3, :]
+        p_root[:, :]=root_p_from_rhc 
+    
+        # expaning meas q if continuous joints
+        if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2
+            self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:]
+            self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos
+            self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin
+            
+            # continous joints position is always open loop, but we need a delta vector of matching dimension
+            q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1)
+
+            self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:]
+
+            self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\
+                np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+                    np.cos(q_jnts[self._continuous_joints_idxs_red, :])
+            self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\
+                np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \
+                    np.sin(q_jnts[self._continuous_joints_idxs_red, :])
+
+            q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts
+            jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts
+
+            self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop
+            self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop
+
+            # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop
+
+        root_p_rhc.setBounds(lb=p_root,
+            ub=p_root, nodes=0)
+        root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, 
+            ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0)
+        jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, 
+            ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0)
+        if self._custom_opts["estimate_v_root"]:
+            root_v_rhc.setBounds(lb=-self._v_inf[0:3], 
+                ub=self._v_inf[0:3], nodes=0)
+        else:
+            root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, 
+                ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0)
+        root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, 
+            ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0)
+        jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, 
+            ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0)
+        if self._custom_opts["lin_a_feedback"]:
+            lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], 
+                ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], 
+                nodes=0)
+            
+        # return state used for feedback
+        q_state=np.concatenate((p_root, q_root, q_jnts),
+                axis=0)
+        v_state=np.concatenate((v_root, omega, v_jnts),
+                axis=0)
+        a_state=np.concatenate((a_root, a_jnts),
+                axis=0)
+        
+        return (q_state, v_state, a_state)
+
+    def _solve(self):
+        
+        if self._debug:
+            return self._db_solve(bootstrap=False)
+        else:
+            return self._min_solve(bootstrap=False)
+
+    def _bootstrap(self):
+
+        if self._debug:
+            return self._db_solve(bootstrap=True)
+        else:
+            return self._min_solve(bootstrap=True)
+        
+    def _min_solve(self, bootstrap: bool = False):
+        # minimal solve version -> no debug 
+        robot_qstate=None
+        robot_vstate=None
+        robot_astate=None
+        if self._open_loop:
+            robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using data from the solution itself
+        else: 
+            robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using robot measurements
+    
+        self._pm.shift() # shifts phases of one dt
+        if self._refs_in_hor_frame:
+            # q_base=self.robot_state.root_state.get(data_type="q", 
+            #     robot_idxs=self.controller_index).reshape(-1, 1)
+            # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+            # using internal base pose from rhc. in case of closed loop, it will be the meas state
+            force_norm=None
+            if self._custom_opts["use_force_feedback"]:
+                contact_forces=self.robot_state.contact_wrenches.get(data_type="f", 
+                    robot_idxs=self.controller_index_np,
+                    contact_name=None).reshape(self.n_contacts,3)
+                force_norm=np.linalg.norm(contact_forces, axis=1)
+            self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+                force_norm=force_norm)
+        else:
+            self.rhc_refs.step()
+            
+        try:
+            if not bootstrap:
+                converged = self._ti.rti() # RTI step
+            else:
+                converged = self._ti.bootstrap() # full solve (to convergence)
+            self.sol_counter = self.sol_counter + 1
+            return not self._check_rhc_failure()
+        except Exception as e: # fail in case of exceptions
+            return False
+    
+    def _db_solve(self, bootstrap: bool = False):
+
+        self._timer_start = time.perf_counter()
+
+        robot_qstate=None
+        robot_vstate=None
+        robot_astate=None
+        if self._open_loop:
+            robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using data from the solution itself
+        else: 
+            robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and 
+            # initial conditions using robot measurements
+
+        self._prb_update_time = time.perf_counter() 
+        self._pm.shift() # shifts phases of one dt
+        self._phase_shift_time = time.perf_counter()
+
+        if self._refs_in_hor_frame:
+            # q_base=self.robot_state.root_state.get(data_type="q", 
+            #     robot_idxs=self.controller_index).reshape(-1, 1)
+            # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1)
+            # using internal base pose from rhc. in case of closed loop, it will be the meas state
+            force_norm=None
+            if self._custom_opts["use_force_feedback"]:
+                contact_forces=self.robot_state.contact_wrenches.get(data_type="f", 
+                    robot_idxs=self.controller_index_np,
+                    contact_name=None).reshape(self.n_contacts,3)
+                force_norm=np.linalg.norm(contact_forces, axis=1)
+            self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate,
+                force_norm=force_norm)
+        else:
+            self.rhc_refs.step()
+             
+        self._task_ref_update_time = time.perf_counter() 
+    
+        try:
+            if not bootstrap:
+                converged = self._ti.rti() # RTI step
+            else:
+                converged = self._ti.bootstrap() # full solve bootstrap
+            self._rti_time = time.perf_counter() 
+            self.sol_counter = self.sol_counter + 1
+            self._update_db_data()
+            return not self._check_rhc_failure()
+        except Exception as e: # fail in case of exceptions
+            if self._verbose:
+                solve_mode = "RTI" if not bootstrap else "Bootstrap"
+                exception = f"{solve_mode}() for controller {self.controller_index} failed" + \
+                f" with exception {type(e).__name__}"
+                Journal.log(self.__class__.__name__,
+                    "solve",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = False)
+            self._update_db_data()
+            return False
+    
+    def _get_fail_idx(self):
+        
+        self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx()
+        self._expl_idx_buffer_counter+=1
+        self._expl_idx_counter+=1
+        if self._expl_idx_counter%self._expl_idx_window_size==0:
+            self._expl_idx_buffer_counter=0 # restart from 0
+        
+        running_avrg=np.mean(self._explosion_idx_buffer).item()
+        
+        return running_avrg
+    
+    def _get_explosion_idx(self):
+        explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale
+        return explosion_index
+
+    def _update_db_data(self):
+
+        self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start
+        self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time
+        self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time
+        self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time
+        self.rhc_costs.update(self._ti.solver_rti.getCostsValues())
+        self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues())
+
+    def _reset(self):
+        
+        # reset task interface (ig, solvers, etc..) + 
+        # phase manager and sets bootstap as solution
+        self._gm.reset()
+        self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution
+        self._expl_idx_counter=0.0
+        self._expl_idx_buffer_counter=0
+
+    def _get_cost_info(self):
+        
+        cost_dict = self._ti.solver_rti.getCostsValues()
+        cost_names = list(cost_dict.keys())
+        cost_dims = [1] * len(cost_names) # costs are always scalar
+        return cost_names, cost_dims
+    
+    def _get_constr_info(self):
+        
+        constr_dict = self._ti.solver_rti.getConstraintsValues()
+        constr_names = list(constr_dict.keys())
+        constr_dims = [-1] * len(constr_names)
+        i = 0
+        for constr in constr_dict:
+            constr_val = constr_dict[constr]
+            constr_shape = constr_val.shape
+            constr_dims[i] = constr_shape[0]
+            i+=1
+        return constr_names, constr_dims
+    
+    def _get_q_from_sol(self):
+        full_q=self._ti.solution['q'].astype(self._dtype)
+        if self._custom_opts["replace_continuous_joints"]:
+            return full_q
+        else:
+            cont_jnts=full_q[self._continuous_joints_idxs_qfull, :]
+            cos=cont_jnts[::2, :]
+            sin=cont_jnts[1::2, :]
+            # copy root
+            self._full_q_reduced[0:7, :]=full_q[0:7, :]
+            # copy rev joint vals
+            self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :]
+            # and continuous
+            angle=np.arctan2(sin, cos)
+            self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle
+            return self._full_q_reduced
+
+    def _get_v_from_sol(self):
+        return self._ti.solution['v'].astype(self._dtype)
+    
+    def _get_a_from_sol(self):
+        return self._ti.solution['a'].astype(self._dtype)
+    
+    def _get_a_dot_from_sol(self):
+        return None
+    
+    def _get_f_from_sol(self):
+        # to be overridden by child class
+        contact_names =self._get_contacts() # we use controller-side names
+        try: 
+            data=[]
+            for key in contact_names:
+                contact_f=self._ti.solution["f_" + key].astype(self._dtype)
+                np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False)
+                np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f)
+                data.append(contact_f)
+            return np.concatenate(data, axis=0)
+        except:
+            return None
+            
+    def _get_f_dot_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_eff_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_cost_from_sol(self,
+                    cost_name: str):
+        return self.rhc_costs[cost_name]
+    
+    def _get_constr_from_sol(self,
+                    constr_name: str):
+        return self.rhc_constr[constr_name]
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c751aa8a6e8d32c028f94d4224123863866a933
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py
@@ -0,0 +1,381 @@
+from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager
+from aug_mpc.controllers.rhc.horizon_based.utils.math_utils import hor2w_frame
+
+from mpc_hive.utilities.shared_data.rhc_data import RhcRefs
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+
+from typing import Union
+
+import numpy as np
+
+class HybridQuadRhcRefs(RhcRefs):
+
+    def __init__(self, 
+            gait_manager: GaitManager, 
+            robot_index_shm: int,
+            robot_index_view: int,
+            namespace: str, # namespace used for shared mem
+            verbose: bool = True,
+            vlevel: bool = VLevel.V2,
+            safe: bool = True,
+            use_force_feedback: bool = False,
+            optimize_mem: bool = False):
+        
+        self.robot_index = robot_index_shm
+        self.robot_index_view = robot_index_view
+        self.robot_index_np_view = np.array(self.robot_index_view)
+
+        self._step_idx = 0
+        self._print_frequency = 100
+
+        self._verbose = verbose
+
+        self._use_force_feedback=use_force_feedback
+
+        if optimize_mem:
+            super().__init__( 
+                    is_server=False,
+                    with_gpu_mirror=False,
+                    namespace=namespace,
+                    safe=safe,
+                    verbose=verbose,
+                    vlevel=vlevel,
+                    optimize_mem=optimize_mem,
+                    n_robots=1, # we just need the row corresponding to this controller
+                    n_jnts=None, # got from server
+                    n_contacts=None # got from server
+                    )
+        else:
+            super().__init__( 
+                is_server=False,
+                with_gpu_mirror=False,
+                namespace=namespace,
+                safe=safe,
+                verbose=verbose,
+                vlevel=vlevel)
+            
+        if not isinstance(gait_manager, GaitManager):
+            exception = f"Provided gait_manager argument should be of GaitManager type!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+               
+        self.gait_manager = gait_manager
+
+        self.timeline_names = self.gait_manager.timeline_names
+
+        # task interfaces from horizon for setting commands to rhc
+        self._get_tasks()
+
+        self._p_ref_default=np.zeros((1, 3))
+        self._q_ref_default=np.zeros((1, 4))
+        self._q_ref_default[0, 0]=1
+
+    def _get_tasks(self):
+        # can be overridden by child
+        # cartesian tasks are in LOCAL_WORLD_ALIGNED (frame centered at distal link, oriented as WORLD)
+        self.base_lin_velxy = self.gait_manager.task_interface.getTask('base_lin_velxy')
+        self.base_lin_velz = self.gait_manager.task_interface.getTask('base_lin_velz')
+        self.base_omega = self.gait_manager.task_interface.getTask('base_omega')
+        self.base_height = self.gait_manager.task_interface.getTask('base_height')
+
+    def run(self):
+
+        super().run()
+        if not (self.robot_index < self.rob_refs.n_robots()):
+            exception = f"Provided \(0-based\) robot index {self.robot_index} exceeds number of " + \
+                " available robots {self.rob_refs.n_robots()}."
+            Journal.log(self.__class__.__name__,
+                "run",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        contact_names = list(self.gait_manager.task_interface.model.cmap.keys())
+        if not (self.n_contacts() == len(contact_names)):
+            exception = f"N of contacts within problem {len(contact_names)} does not match n of contacts {self.n_contacts()}"
+            Journal.log(self.__class__.__name__,
+                "run",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        
+        # set some defaults from gait manager
+        for i in range(self.n_contacts()):
+            self.flight_settings_req.set(data=self.gait_manager.get_flight_duration(contact_name=contact_names[i]),
+                data_type="len_remain",
+                robot_idxs=self.robot_index_np_view,
+                contact_idx=i)
+            self.flight_settings_req.set(data=self.gait_manager.get_step_apexdh(contact_name=contact_names[i]),
+                data_type="apex_dpos",
+                robot_idxs=self.robot_index_np_view,
+                contact_idx=i)
+            self.flight_settings_req.set(data=self.gait_manager.get_step_enddh(contact_name=contact_names[i]),
+                data_type="end_dpos",
+                robot_idxs=self.robot_index_np_view,
+                contact_idx=i)
+        
+        self.flight_settings_req.synch_retry(row_index=self.robot_index,
+            col_index=0,
+            row_index_view=self.robot_index_view,
+            n_rows=1,
+            n_cols=self.flight_settings_req.n_cols,
+            read=False)
+
+    def step(self, qstate: np.ndarray = None,
+        vstate: np.ndarray = None,
+        force_norm: np.ndarray = None):
+
+        if self.is_running():
+            
+            # updates robot refs from shared mem
+            self.rob_refs.synch_from_shared_mem(robot_idx=self.robot_index, robot_idx_view=self.robot_index_view)
+            self.phase_id.synch_all(read=True, retry=True,
+                        row_index=self.robot_index,
+                        row_index_view=self.robot_index_view)
+            self.contact_flags.synch_all(read=True, retry=True,
+                        row_index=self.robot_index,
+                        row_index_view=self.robot_index_view)
+            self.flight_settings_req.synch_all(read=True, retry=True,
+                        row_index=self.robot_index,
+                        row_index_view=self.robot_index_view)
+            self._set_contact_phases(q_full=qstate)
+
+            # updated internal references with latest available ones
+            q_base=qstate[3:7,0:1] # quaternion
+            self._apply_refs_to_tasks(q_base=q_base)
+            
+            # if self._use_force_feedback:
+            #     self._set_force_feedback(force_norm=force_norm)
+
+            self._step_idx +=1
+        
+        else:
+            exception = f"{self.__class__.__name__} is not running"
+            Journal.log(self.__class__.__name__,
+                "step",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+    
+    def _set_contact_phases(self,
+        q_full: np.ndarray):
+
+        # phase_id = self.phase_id.read_retry(row_index=self.robot_index,
+        #                         col_index=0)[0]
+        
+        contact_flags_refs = self.contact_flags.get_numpy_mirror()[self.robot_index_np_view, :]
+        target_n_limbs_in_contact=np.sum(contact_flags_refs).item()
+        if target_n_limbs_in_contact==0:
+            target_n_limbs_in_contact=4
+
+        is_contact = contact_flags_refs.flatten().tolist() 
+        n_contacts=len(is_contact)
+
+        for i in range(n_contacts): # loop through contact timelines
+            timeline_name = self.timeline_names[i]
+            
+            self.gait_manager.set_f_reg(contact_name=timeline_name,
+                scale=target_n_limbs_in_contact)
+
+            if is_contact[i]==False: # release contact
+
+                # flight parameters requests are set only when inserting a flight phase
+                len_req_now=int(self.flight_settings_req.get(data_type="len_remain",
+                    robot_idxs=self.robot_index_np_view,
+                    contact_idx=i).item())
+                apex_req_now=self.flight_settings_req.get(data_type="apex_dpos",
+                    robot_idxs=self.robot_index_np_view,
+                    contact_idx=i).item()
+                end_req_now=self.flight_settings_req.get(data_type="end_dpos",
+                    robot_idxs=self.robot_index_np_view,
+                    contact_idx=i).item()
+                landing_dx_req_now=self.flight_settings_req.get(data_type="land_dx",
+                    robot_idxs=self.robot_index_np_view,
+                    contact_idx=i).item()
+                landing_dy_req_now=self.flight_settings_req.get(data_type="land_dy",
+                    robot_idxs=self.robot_index_np_view,
+                    contact_idx=i).item()
+                
+                # set flight phase properties depending on last value on shared memory
+                self.gait_manager.set_flight_duration(contact_name=timeline_name,
+                    val=len_req_now)
+                self.gait_manager.set_step_apexdh(contact_name=timeline_name,
+                    val=apex_req_now)
+                self.gait_manager.set_step_enddh(contact_name=timeline_name,
+                    val=end_req_now)
+                self.gait_manager.set_step_landing_dx(contact_name=timeline_name,
+                    val=landing_dx_req_now)
+                self.gait_manager.set_step_landing_dy(contact_name=timeline_name,
+                    val=landing_dy_req_now)   
+                # insert flight phase over the horizon
+                self.gait_manager.add_flight(contact_name=timeline_name,
+                    robot_q=q_full)
+                                
+            else: # contact phase
+                self.gait_manager.add_stand(contact_name=timeline_name)
+
+            at_least_one_flight=self.gait_manager.update_flight_info(timeline_name)
+            # flight_info=self.gait_manager.get_flight_info(timeline_name)
+            
+            self.gait_manager.check_horizon_full(timeline_name=timeline_name)
+        
+        # write flight info to shared mem for all contacts in one shot (we follow same order as in flight_info shm)
+        all_flight_info=self.gait_manager.get_flight_info_all()
+        flight_info_shared=self.flight_info.get_numpy_mirror()
+        flight_info_shared[self.robot_index_np_view, :]=all_flight_info
+        self.flight_info.synch_retry(row_index=self.robot_index, 
+                                col_index=0, 
+                                row_index_view=self.robot_index_np_view,
+                                n_rows=1, n_cols=self.flight_info.n_cols,
+                                read=False)
+                                     
+        self.gait_manager.update()
+      
+    def _apply_refs_to_tasks(self, q_base = None):
+        # overrides parent
+        if q_base is not None: # rhc refs are assumed to be specified in the so called "horizontal" 
+            # frame, i.e. a vertical frame, with the x axis aligned with the projection of the base x axis
+            # onto the plane
+            root_pose = self.rob_refs.root_state.get(data_type = "q_full", 
+                                robot_idxs=self.robot_index_np_view).reshape(-1, 1) # this should also be
+            # rotated into the horizontal frame (however, for now only the z componet is used, so it's ok)
+            
+            root_twist_ref = self.rob_refs.root_state.get(data_type="twist", 
+                                robot_idxs=self.robot_index_np_view).reshape(-1, 1)
+
+            root_twist_ref_h = root_twist_ref.copy() 
+
+            hor2w_frame(root_twist_ref, q_base, root_twist_ref_h) # horizon works in local world aligned frame
+            
+            if self.base_lin_velxy is not None:
+                self.base_lin_velxy.setRef(root_twist_ref_h[0:2, :])
+            if self.base_omega is not None:
+                self.base_omega.setRef(root_twist_ref_h[3:, :])
+            if self.base_lin_velz is not None:
+                self.base_lin_velz.setRef(root_twist_ref_h[2:3, :])
+            if self.base_height is not None:
+                self.base_height.setRef(root_pose) 
+        else:
+            root_pose = self.rob_refs.root_state.get(data_type = "q_full", 
+                                robot_idxs=self.robot_index_np_view).reshape(-1, 1)
+            root_twist_ref = self.rob_refs.root_state.get(data_type="twist", 
+                                robot_idxs=self.robot_index_np_view).reshape(-1, 1)
+
+            if self.base_lin_velxy is not None:
+                self.base_lin_velxy.setRef(root_twist_ref[0:2, :])
+            if self.base_omega is not None:
+                self.base_omega.setRef(root_twist_ref[3:, :])
+            if self.base_lin_velz is not None:
+                self.base_lin_velz.setRef(root_twist_ref[2:3, :])
+            if self.base_height is not None:
+                self.base_height.setRef(root_pose)
+    
+    # def _set_force_feedback(self,
+    #         force_norm: np.ndarray = None):
+    
+    #     is_contact=force_norm>1.0
+
+    #     for i in range(len(is_contact)):
+    #         timeline_name = self._timeline_names[i]
+    #         self.gait_manager.set_force_feedback(timeline_name=timeline_name,
+    #             force_norm=force_norm[i])
+
+    #         if not is_contact[i]:
+
+
+    def set_default_refs(self,
+        p_ref: np.ndarray,
+        q_ref: np.ndarray):
+
+        self._p_ref_default[:, :]=p_ref
+        self._q_ref_default[:, :]=q_ref
+    
+    def set_alpha(self, alpha:float):
+        # set provided value
+        alpha_shared=self.alpha.get_numpy_mirror()
+        alpha_shared[self.robot_index_np_view, :] = alpha
+        self.alpha.synch_retry(row_index=self.robot_index, col_index=0, 
+                row_index_view=self.robot_index_view,
+                n_rows=1, n_cols=self.alpha.n_cols,
+                read=False)
+            
+    def get_alpha(self):
+        self.alpha.synch_retry(row_index=self.robot_index, col_index=0, 
+                    row_index_view=self.robot_index_view,
+                    n_rows=1, n_cols=self.alpha.n_cols,
+                    read=True)
+        alpha=self.alpha.get_numpy_mirror()[self.robot_index_np_view, :].item()
+        return alpha
+
+    def set_bound_relax(self, bound_relax:float):
+        # set provided value
+        bound_rel_shared=self.bound_rel.get_numpy_mirror()
+        bound_rel_shared[self.robot_index_np_view, :] = bound_relax
+        self.bound_rel.synch_retry(row_index=self.robot_index, col_index=0, n_rows=1, 
+            row_index_view=self.robot_index_view,
+            n_cols=self.alpha.n_cols,
+            read=False)
+
+    def reset(self):
+
+        if self.is_running():
+
+            # resets shared mem
+            contact_flags_current = self.contact_flags.get_numpy_mirror()
+            phase_id_current = self.phase_id.get_numpy_mirror()
+            contact_flags_current[self.robot_index_np_view, :] = np.full((1, self.n_contacts()), dtype=np.bool_, fill_value=True)
+            phase_id_current[self.robot_index_np_view, :] = -1 # defaults to custom phase id
+
+            contact_pos_current=self.rob_refs.contact_pos.get_numpy_mirror()
+            contact_pos_current[self.robot_index_np_view, :] = 0.0
+
+            flight_info_current=self.flight_info.get_numpy_mirror()
+            flight_info_current[self.robot_index_np_view, :] = 0.0
+
+            alpha=self.alpha.get_numpy_mirror()
+            alpha[self.robot_index_np_view, :] = 0.0
+
+            self.rob_refs.root_state.set(data_type="p", data=self._p_ref_default, robot_idxs=self.robot_index_np_view)
+            self.rob_refs.root_state.set(data_type="q", data=self._q_ref_default, robot_idxs=self.robot_index_np_view)
+            self.rob_refs.root_state.set(data_type="twist", data=np.zeros((1, 6)), robot_idxs=self.robot_index_np_view)
+                                           
+            self.contact_flags.synch_retry(row_index=self.robot_index, col_index=0, 
+                                    row_index_view=self.robot_index_view,
+                                    n_rows=1, n_cols=self.contact_flags.n_cols,
+                                    read=False)
+            self.phase_id.synch_retry(row_index=self.robot_index, col_index=0, 
+                                    row_index_view=self.robot_index_view,
+                                    n_rows=1, n_cols=self.phase_id.n_cols,
+                                    read=False)
+            self.rob_refs.root_state.synch_retry(row_index=self.robot_index, col_index=0, 
+                                    row_index_view=self.robot_index_view,
+                                    n_rows=1, n_cols=self.rob_refs.root_state.n_cols,
+                                    read=False)
+
+            self.rob_refs.contact_pos.synch_retry(row_index=self.robot_index, col_index=0, 
+                                    row_index_view=self.robot_index_view,
+                                    n_rows=1, n_cols=self.rob_refs.contact_pos.n_cols,
+                                    read=False)
+            
+            self.flight_info.synch_retry(row_index=self.robot_index, 
+                                col_index=0, 
+                                row_index_view=self.robot_index_view,
+                                n_rows=1, n_cols=self.flight_info.n_cols,
+                                read=False)
+            
+            # should also empty the timeline for stepping phases
+            self._step_idx = 0
+
+        else:
+            exception = f"Cannot call reset() since run() was not called!"
+            Journal.log(self.__class__.__name__,
+                "reset",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/__init__.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d4173d5b51ee8c644a54008ca0a40f1b34d1841
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py
@@ -0,0 +1,165 @@
+
+import numpy as np
+
+def w2hor_frame(t_w: np.ndarray,
+        q_b: np.ndarray,
+        t_out: np.ndarray):
+    """
+    Transforms a twist vector expressed in WORLD frame to
+    an "horizontal" frame (z aligned as world, x aligned as the projection
+    of the x-axis of the base frame described by q_b). This is useful for specifying locomotion
+    references in a "game"-like fashion.
+    t_out will hold the result
+    """
+    # q_b = q_b / q_b.norm(dim=1, keepdim=True)
+    q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+    
+    R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+    R_21 = 2 * (q_i * q_j + q_k * q_w)
+    
+    norm = np.sqrt(R_11 ** 2 + R_21 ** 2)
+    x_proj_x = R_11 / norm
+    x_proj_y = R_21 / norm
+    
+    y_proj_x = -x_proj_y
+    y_proj_y = x_proj_x
+        
+    t_out[0, :] = t_w[0, :] * x_proj_x + t_w[1, :] * x_proj_y
+    t_out[1, :] = t_w[0, :] * y_proj_x + t_w[1, :] * y_proj_y
+    t_out[2, :] = t_w[2, :]  # z-component remains the same
+    t_out[3, :] = t_w[3, :] * x_proj_x + t_w[4, :] * x_proj_y
+    t_out[4, :] = t_w[3, :] * y_proj_x + t_w[4, :] * y_proj_y
+    t_out[5, :] = t_w[5, :]  # z-component remains the same
+
+def hor2w_frame(t_h: np.ndarray,
+        q_b: np.ndarray,
+        t_out: np.ndarray):
+    """
+    Transforms a velocity vector expressed in "horizontal" frame to WORLD
+    v_out will hold the result
+    """
+
+    # Extract quaternion components
+    q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+    
+    # Compute rotation matrix elements
+    R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+    R_21 = 2 * (q_i * q_j + q_k * q_w)
+    
+    # Normalize to get projection components
+    norm = np.sqrt(R_11 ** 2 + R_21 ** 2)
+    x_proj_x = R_11 / norm
+    x_proj_y = R_21 / norm
+    
+    # Orthogonal vector components
+    y_proj_x = -x_proj_y
+    y_proj_y = x_proj_x
+    
+    # Transform velocity vector components from horizontal to world frame
+    t_out[0, :] = t_h[0, :] * x_proj_x + t_h[1, :] * y_proj_x
+    t_out[1, :] = t_h[0, :] * x_proj_y + t_h[1, :] * y_proj_y
+    t_out[2, :] = t_h[2, :]  # z-component remains the same
+    t_out[3, :] = t_h[3, :] * x_proj_x + t_h[4, :] * y_proj_x
+    t_out[4, :] = t_h[3, :] * x_proj_y + t_h[4, :] * y_proj_y
+    t_out[5, :] = t_h[5, :]  # z-component remains the same
+
+def base2world_frame(t_b: np.ndarray, 
+                q_b: np.ndarray,
+                t_out: np.ndarray):
+    """
+    Transforms a velocity vector expressed in the base frame to
+    the WORLD frame using the given quaternion that describes the orientation
+    of the base with respect to the world frame. The result is written in v_out.
+    """
+    # q_b = q_b / q_b.norm(dim=1, keepdim=True)
+    q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+    
+    R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+    R_01 = 2 * (q_i * q_j - q_k * q_w)
+    R_02 = 2 * (q_i * q_k + q_j * q_w)
+    
+    R_10 = 2 * (q_i * q_j + q_k * q_w)
+    R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2)
+    R_12 = 2 * (q_j * q_k - q_i * q_w)
+    
+    R_20 = 2 * (q_i * q_k - q_j * q_w)
+    R_21 = 2 * (q_j * q_k + q_i * q_w)
+    R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2)
+    
+    # Transform the velocity to the world frame
+    t_out[0, :] = t_b[0, :] * R_00 + t_b[1, :] * R_01 + t_b[2, :] * R_02
+    t_out[1, :] = t_b[0, :] * R_10 + t_b[1, :] * R_11 + t_b[2, :] * R_12
+    t_out[2, :] = t_b[0, :] * R_20 + t_b[1, :] * R_21 + t_b[2, :] * R_22
+    t_out[3, :] = t_b[3, :] * R_00 + t_b[4, :] * R_01 + t_b[5, :] * R_02
+    t_out[4, :] = t_b[3, :] * R_10 + t_b[4, :] * R_11 + t_b[5, :] * R_12
+    t_out[5, :] = t_b[3, :] * R_20 + t_b[4, :] * R_21 + t_b[5, :] * R_22
+
+def world2base_frame(t_w: np.ndarray, 
+                q_b: np.ndarray, 
+                t_out: np.ndarray):
+    """
+    Transforms a velocity vector expressed in the WORLD frame to
+    the base frame using the given quaternion that describes the orientation
+    of the base with respect to the world frame. The result is written in v_out.
+    """
+    # q_b = q_b / q_b.norm(dim=1, keepdim=True)
+    q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :]
+    
+    R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2)
+    R_01 = 2 * (q_i * q_j - q_k * q_w)
+    R_02 = 2 * (q_i * q_k + q_j * q_w)
+    
+    R_10 = 2 * (q_i * q_j + q_k * q_w)
+    R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2)
+    R_12 = 2 * (q_j * q_k - q_i * q_w)
+    
+    R_20 = 2 * (q_i * q_k - q_j * q_w)
+    R_21 = 2 * (q_j * q_k + q_i * q_w)
+    R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2)
+        
+    # Transform the velocity to the base frame using the transpose of the rotation matrix
+    t_out[0, :] = t_w[0, :] * R_00 + t_w[1, :] * R_10 + t_w[2, :] * R_20
+    t_out[1, :] = t_w[0, :] * R_01 + t_w[1, :] * R_11 + t_w[2, :] * R_21
+    t_out[2, :] = t_w[0, :] * R_02 + t_w[1, :] * R_12 + t_w[2, :] * R_22
+    t_out[3, :] = t_w[3, :] * R_00 + t_w[4, :] * R_10 + t_w[5, :] * R_20
+    t_out[4, :] = t_w[3, :] * R_01 + t_w[4, :] * R_11 + t_w[5, :] * R_21
+    t_out[5, :] = t_w[3, :] * R_02 + t_w[4, :] * R_12 + t_w[5, :] * R_22
+
+if __name__ == "__main__":  
+
+    n_envs = 5000
+    t_b = np.random.rand(6, n_envs)
+
+    q_b = np.random.rand(4, n_envs)
+    q_b_norm = q_b / np.linalg.norm(q_b, axis=0, keepdims=True)
+
+    t_w = np.zeros_like(t_b)  # To hold horizontal frame velocities
+    t_b_recovered = np.zeros_like(t_b)  # To hold recovered world frame velocities
+    base2world_frame(t_b, q_b_norm, t_w)
+    world2base_frame(t_w, q_b_norm, t_b_recovered)
+    assert np.allclose(t_b, t_b_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b"
+    print("Forward test passed: t_b_recovered matches t_b")
+    
+    t_b2 = np.zeros_like(t_b)  # To hold horizontal frame velocities
+    t_w_recovered = np.zeros_like(t_b)
+    world2base_frame(t_b, q_b_norm, t_b2)
+    base2world_frame(t_b2, q_b_norm, t_w_recovered)
+    assert np.allclose(t_b, t_w_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b"
+    print("Backward test passed: t_w_recovered matches t_w")
+    
+    # test transf. world-horizontal frame
+    v_h = np.zeros_like(t_b)  # To hold horizontal frame velocities
+    v_recovered = np.zeros_like(t_b)
+    w2hor_frame(t_b, q_b_norm, v_h)
+    hor2w_frame(v_h, q_b_norm, v_recovered)
+    assert np.allclose(t_b, v_recovered, atol=1e-6), "Test failed: v_recovered does not match t_b"
+    print("horizontal forward frame test passed:  matches ")
+
+    t_w = np.zeros_like(t_b)  # To hold horizontal frame velocities
+    v_h_recovered = np.zeros_like(t_b)
+    hor2w_frame(t_b, q_b_norm, t_w)
+    w2hor_frame(t_w, q_b_norm, v_h_recovered)
+    assert np.allclose(t_b, v_h_recovered, atol=1e-6), "Test failed: v_h_recovered does not match t_b"
+    print("horizontal backward frame test passed:  matches ")
+
+    
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/hybrid_quad_client.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/hybrid_quad_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..757dc3aba72a500bbbf6c8fa21145f4ea63a2337
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/other/hybrid_quad_client.py
@@ -0,0 +1,95 @@
+from aug_mpc.controllers.rhc.augmpc_cluster_client import AugMpcClusterClient
+
+from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc
+from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds_horizon
+from aug_mpc.utils.sys_utils import PathsGetter
+
+from typing import List, Dict
+
+class HybridQuadrupedClusterClient(AugMpcClusterClient):
+    
+    def _import_aux_libs(self):
+        super()._import_aux_libs()
+        # Import Horizon and related dependencies as global libs
+        from aug_mpc.controllers.rhc.horizon_based.horizon_imports_glob import import_horizon_global
+        import_horizon_global()
+        
+    def __init__(self, 
+            namespace: str, 
+            urdf_xacro_path: str,
+            srdf_xacro_path: str,
+            cluster_size: int,
+            set_affinity: bool = False,
+            use_mp_fork: bool = False,
+            isolated_cores_only: bool = False,
+            core_ids_override_list: List[int] = None,
+            verbose: bool = False,
+            debug: bool = False,
+            open_loop: bool = True,
+            base_dump_dir: str = "/tmp",
+            timeout_ms: int = 60000,
+            codegen_override: str = None,
+            custom_opts: Dict = {}):
+        
+        self._open_loop = open_loop
+
+        self._paths = PathsGetter()
+
+        self._codegen_dir_name = namespace
+
+        self._timeout_ms = timeout_ms
+
+        super().__init__(namespace = namespace, 
+                        urdf_xacro_path = urdf_xacro_path,
+                        srdf_xacro_path = srdf_xacro_path,
+                        cluster_size=cluster_size,
+                        set_affinity = set_affinity,
+                        use_mp_fork = use_mp_fork,
+                        isolated_cores_only = isolated_cores_only,
+                        core_ids_override_list = core_ids_override_list,
+                        verbose = verbose,
+                        debug = debug,
+                        base_dump_dir=base_dump_dir,
+                        codegen_override=codegen_override,
+                        custom_opts=custom_opts)
+
+        self._n_nodes = 31 if not ("n_nodes" in self._custom_opts) else self._custom_opts["n_nodes"]
+        self._dt = 0.05 if not ("cluster_dt" in self._custom_opts) else self._custom_opts["cluster_dt"]
+        
+    def _xrdf_cmds(self):
+        parts = self._urdf_path.split('/')
+        urdf_descr_root_path = '/'.join(parts[:-2])
+        cmds = get_xrdf_cmds_horizon(urdf_descr_root_path=urdf_descr_root_path)
+        return cmds
+
+    def _process_codegen_dir(self,idx:int):
+
+        codegen_dir = self.codegen_dir() + f"/{self._codegen_dir_name}Rhc{idx}"
+        codegen_dir_ovveride = self.codegen_dir_override()
+        if not (codegen_dir_ovveride=="" or \
+                codegen_dir_ovveride=="none" or \
+                codegen_dir_ovveride=="None" or \
+                (codegen_dir_ovveride is None)): # if overrde was provided
+            codegen_dir = f"{codegen_dir_ovveride}{idx}"# override
+        
+        return codegen_dir
+        
+    def _generate_controller(self,
+                        idx: int):
+        
+        codegen_dir=self._process_codegen_dir(idx=idx)
+
+        controller = HybridQuadRhc(
+                urdf_path=self._urdf_path, 
+                srdf_path=self._srdf_path,
+                config_path = self._paths.CONFIGPATH,
+                robot_name=self._namespace,
+                codegen_dir=codegen_dir,
+                n_nodes=self._n_nodes, 
+                dt=self._dt,
+                max_solver_iter = 1, # rti
+                open_loop = self._open_loop,
+                verbose = self._verbose, 
+                debug = self._debug)
+        
+        return controller 
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rhc.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rhc.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9410261d4dca35164fbe8abe5ac12b166b3483d
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rhc.py
@@ -0,0 +1,1262 @@
+# Copyright (C) 2023  Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com)
+# 
+# This file is part of MPCHive and distributed under the General Public License version 2 license.
+# 
+# MPCHive is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+# 
+# MPCHive is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with MPCHive.  If not, see <http://www.gnu.org/licenses/>.
+# 
+from abc import ABC, abstractmethod
+# from perf_sleep.pyperfsleep import PerfSleep
+# from mpc_hive.utilities.cpu_utils.core_utils import get_memory_usage
+
+import time 
+
+import numpy as np
+
+from mpc_hive.utilities.shared_data.rhc_data import RobotState
+from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred, RhcPredDelta
+from mpc_hive.utilities.shared_data.rhc_data import RhcStatus
+from mpc_hive.utilities.shared_data.rhc_data import RhcInternal
+from mpc_hive.utilities.shared_data.cluster_profiling import RhcProfiling
+from mpc_hive.utilities.remote_triggering import RemoteTriggererClnt
+
+from mpc_hive.utilities.homing import RobotHomer
+
+from mpc_hive.utilities.math_utils import world2base_frame
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import Journal, LogType
+from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper
+from EigenIPC.PyEigenIPC import dtype
+
+from typing import List
+# from typing import TypeVar, Union
+
+import signal
+import os
+import inspect
+
+class RHController(ABC):
+
+    def __init__(self, 
+            srdf_path: str,
+            n_nodes: int,
+            dt: float,
+            namespace: str, # shared mem namespace
+            dtype = np.float32, 
+            verbose = False, 
+            debug = False,
+            timeout_ms: int = 60000,
+            allow_less_jnts: bool = True):
+    
+        signal.signal(signal.SIGINT, self._handle_sigint)
+
+        self._allow_less_jnts = allow_less_jnts # whether to allow less joints in rhc controller than the ones on the robot
+        # (e.g. some joints might not be desirable for control purposes)
+
+        self.namespace = namespace
+        self._dtype = dtype
+        self._verbose = verbose
+        self._debug = debug
+
+        # if not self._debug:
+        np.seterr(over='ignore') # ignoring overflows
+
+        self._n_nodes = n_nodes
+        self._dt = dt
+        self._n_intervals = self._n_nodes - 1 
+        self._t_horizon = self._n_intervals * dt
+        self._set_rhc_pred_idx() # prection is by default written on last node
+        self._set_rhc_cmds_idx() # default to idx 2 (i.e. cmds to get to third node)
+        self.controller_index = None # will be assigned upon registration to a cluster
+        self.controller_index_np = None 
+        self._robot_mass=1.0
+
+        self.srdf_path = srdf_path # using for parsing robot homing
+
+        self._registered = False
+        self._closed = False 
+        
+        self._allow_triggering_when_failed = True
+        
+        self._profiling_data_dict = {}
+        self._profiling_data_dict["full_solve_dt"] = np.nan
+        self._profiling_data_dict["rti_solve_dt"] = np.nan
+        self._profiling_data_dict["problem_update_dt"] = np.nan
+        self._profiling_data_dict["phases_shift_dt"] = np.nan
+        self._profiling_data_dict["task_ref_update"] = np.nan
+        
+        self.n_dofs = None
+        self.n_contacts = None
+        
+        # shared mem
+        self.robot_state = None 
+        self.rhc_status = None
+        self.rhc_internal = None
+        self.cluster_stats = None
+        self.robot_cmds = None
+        self.robot_pred = None
+        self.rhc_pred_delta = None
+        self.rhc_refs = None
+        self._remote_triggerer = None
+        self._remote_triggerer_timeout = timeout_ms # [ms]
+        
+        # remote termination
+        self._remote_term = None
+        self._term_req_received = False
+
+        # jnt names
+        self._env_side_jnt_names = []
+        self._controller_side_jnt_names = []
+        self._got_jnt_names_from_controllers = False
+
+        # data maps
+        self._to_controller = []
+        self._quat_remap = [0, 1, 2, 3] # defaults to no remap (to be overridden)
+
+        self._got_contact_names = False
+
+        self._received_trigger = False # used for proper termination
+
+        self._n_resets = 0
+        self._n_fails = 0
+        self._fail_idx_thresh = 5e3
+        self._contact_f_thresh = 1e5
+
+        self._failed = False
+
+        self._start_time = time.perf_counter() # used for profiling when in debug mode
+
+        self._homer = None # robot homing manager
+        self._homer_env = None # used for setting homing to jnts not contained in rhc prb
+
+        self._class_name_base = f"{self.__class__.__name__}"
+        self._class_name = self._class_name_base # will append controller index upon registration
+
+        self._contact_force_base_loc_aux=np.zeros((1,3),dtype=self._dtype)
+        self._norm_grav_vector_w=np.zeros((1,3),dtype=self._dtype)
+        self._norm_grav_vector_w[:, 2]=-1.0
+        self._norm_grav_vector_base_loc=np.zeros((1,3),dtype=self._dtype)
+
+        self._init() # initialize controller
+
+        if not hasattr(self, '_rhc_fpaths'):
+            self._rhc_fpaths = []
+        self._rhc_fpaths.append(os.path.abspath(__file__))
+        
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        # Get the file path of the class being initialized and append it to the list
+        if not hasattr(cls, '_rhc_fpaths'):
+            cls._rhc_fpaths = []
+        child_class_file_path = os.path.abspath(inspect.getfile(cls))
+        cls._rhc_fpaths.append(child_class_file_path)
+
+    def this_paths(self):
+        return self._rhc_fpaths
+    
+    def __del__(self):
+        self._close()
+
+    def _handle_sigint(self, signum, frame):
+        if self._verbose:
+            Journal.log(self._class_name,
+                    "_handle_sigint",
+                    "SIGINT received",
+                    LogType.WARN)
+        self._term_req_received = True
+    
+    def _set_rhc_pred_idx(self):
+        # default to last node
+        self._pred_node_idx=self._n_nodes-1
+    
+    def _set_rhc_cmds_idx(self):
+        # use index 2 by default to compensate for
+        # the inevitable action delay 
+        # (get_state, trigger sol -> +dt -> apply sol )
+        # if we apply action from second node (idenx 1) 
+        # that action will already be one dt old. We assume we were already
+        # applying the right action to get to the state of node idx 1 and get the 
+        # cmds for reaching the state at idx 1
+        self._rhc_cmds_node_idx=2
+
+    def _close(self):
+        if not self._closed:
+            self._unregister_from_cluster()
+            if self.robot_cmds is not None:
+                self.robot_cmds.close()
+            if self.robot_pred is not None:
+                self.robot_pred.close()
+            if self.rhc_pred_delta is not None:
+                self.rhc_pred_delta.close()
+            if self.robot_state is not None:
+                self.robot_state.close()
+            if self.rhc_status is not None:
+                self.rhc_status.close()
+            if self.rhc_internal is not None:
+                self.rhc_internal.close()
+            if self.cluster_stats is not None:
+                self.cluster_stats.close()
+            if self._remote_triggerer is not None:
+                self._remote_triggerer.close()
+            if self._remote_term is not None:
+                self._remote_term.close()
+            self._closed = True
+
+    def close(self):
+        self._close()
+
+    def get_file_paths(self):
+        # can be overriden by child
+        base_paths = []
+        base_paths.append(self._this_path)
+        return base_paths
+    
+    def init_rhc_task_cmds(self):
+        
+        self.rhc_refs = self._init_rhc_task_cmds()
+        self.rhc_refs.reset()
+        
+    def _init_states(self):
+        
+        quat_remap = self._get_quat_remap()
+        self.robot_state = RobotState(namespace=self.namespace,
+                                is_server=False,
+                                q_remapping=quat_remap, # remapping from environment to controller
+                                with_gpu_mirror=False,
+                                with_torch_view=False, 
+                                safe=False,
+                                verbose=self._verbose,
+                                vlevel=VLevel.V2,
+                                optimize_mem=True,
+                                n_robots=1, # we just need the row corresponding to this controller
+                                n_jnts=None, # got from server
+                                n_contacts=None # got from server
+                                ) 
+        self.robot_state.run()
+        self.robot_cmds = RhcCmds(namespace=self.namespace,
+                                is_server=False,
+                                q_remapping=quat_remap, # remapping from environment to controller
+                                with_gpu_mirror=False,
+                                with_torch_view=False, 
+                                safe=False,
+                                verbose=self._verbose,
+                                vlevel=VLevel.V2,
+                                optimize_mem=True,
+                                n_robots=1, # we just need the row corresponding to this controller
+                                n_jnts=None, # got from server
+                                n_contacts=None # got from server
+                                ) 
+        self.robot_cmds.run()
+        self.robot_pred = RhcPred(namespace=self.namespace,
+                                is_server=False,
+                                q_remapping=quat_remap, # remapping from environment to controller
+                                with_gpu_mirror=False,
+                                with_torch_view=False, 
+                                safe=False,
+                                verbose=self._verbose,
+                                vlevel=VLevel.V2,
+                                optimize_mem=True,
+                                n_robots=1, # we just need the row corresponding to this controller
+                                n_jnts=None, # got from server
+                                n_contacts=None # got from server
+                                )
+        self.robot_pred.run()
+        self.rhc_pred_delta = RhcPredDelta(namespace=self.namespace,
+                                is_server=False,
+                                q_remapping=quat_remap, # remapping from environment to controller
+                                with_gpu_mirror=False,
+                                with_torch_view=False, 
+                                safe=False,
+                                verbose=self._verbose,
+                                vlevel=VLevel.V2,
+                                optimize_mem=True,
+                                n_robots=1, # we just need the row corresponding to this controller
+                                n_jnts=None, # got from server
+                                n_contacts=None # got from server
+                                )
+        self.rhc_pred_delta.run()
+
+    def _rhc(self, rti: bool = True):
+        if self._debug:
+            self._rhc_db(rti=rti)
+        else:
+            self._rhc_min(rti=rti)
+    
+    def _rhc_db(self, rti: bool = True):
+        # rhc with debug data
+        self._start_time = time.perf_counter()
+
+        self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with
+        # latest data on shared mem
+
+        self._compute_pred_delta()
+
+        if not self.failed():
+            # we can solve only if not in failure state
+            if rti:
+                self._failed = not self._solve() # solve actual TO with RTI
+            else:
+                self._failed = not self._bootstrap() # full bootstrap solve
+            if (self._failed): 
+                # perform failure procedure
+                self._on_failure()                       
+        else:
+            if not self._allow_triggering_when_failed:
+                Journal.log(self._class_name,
+                    "solve",
+                    f"Received solution req, but controller is in failure state. " + \
+                        " You should have reset() the controller!",
+                    LogType.EXCEP,
+                    throw_when_excep = True)
+            else: 
+                if self._verbose:
+                    Journal.log(self._class_name,
+                        "solve",
+                        f"Received solution req, but controller is in failure state. No solution will be performed. " + \
+                            " Use the reset() method to continue solving!",
+                        LogType.WARN)
+            
+        self._write_cmds_from_sol() # we update update the views of the cmds
+        # from the latest solution
+    
+        # in debug, rhc internal state is streamed over 
+        # shared mem.
+        self._update_rhc_internal()
+        self._profiling_data_dict["full_solve_dt"] = time.perf_counter() - self._start_time
+        self._update_profiling_data() # updates all profiling data
+        if self._verbose:
+            Journal.log(self._class_name,
+                "solve",
+                f"RHC full solve loop execution time  -> " + str(self._profiling_data_dict["full_solve_dt"]),
+                LogType.INFO,
+                throw_when_excep = True) 
+
+    def _rhc_min(self, rti: bool = True):
+
+        self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with
+        # latest data on shared mem
+
+        self._compute_pred_delta()
+
+        if not self.failed():
+            # we can solve only if not in failure state
+            if rti:
+                self._failed = not self._solve() # solve actual TO with RTI
+            else:
+                self._failed = not self._bootstrap() # full bootstrap solve
+            if (self._failed):  
+                # perform failure procedure
+                self._on_failure()                       
+        else:
+            if not self._allow_triggering_when_failed:
+                Journal.log(self._class_name,
+                    "solve",
+                    f"Received solution req, but controller is in failure state. " + \
+                        " You should have reset() the controller!",
+                    LogType.EXCEP,
+                    throw_when_excep = True)
+            else: 
+                if self._verbose:
+                    Journal.log(self._class_name,
+                        "solve",
+                        f"Received solution req, but controller is in failure state. No solution will be performed. " + \
+                            " Use the reset() method to continue solving!",
+                        LogType.WARN)
+                    
+        self._write_cmds_from_sol() # we update the views of the cmds
+        # from the latest solution even if failed
+        
+    def solve_once(self):
+        # run a single iteration of the solve loop (used for pooling)
+        if self._term_req_received:
+            return False
+
+        if not self._remote_triggerer.wait(self._remote_triggerer_timeout):
+            Journal.log(self._class_name,
+                f"solve",
+                "Didn't receive any remote trigger req within timeout!",
+                LogType.EXCEP,
+                throw_when_excep = False)
+            return False
+
+        self._received_trigger = True
+
+        if self.rhc_status.resets.read_retry(row_index=self.controller_index,
+                                col_index=0,
+                                row_index_view=0)[0]:
+            self.reset() # rhc is reset
+
+        if self.rhc_status.trigger.read_retry(row_index=self.controller_index,
+                    col_index=0,
+                    row_index_view=0)[0]:
+            rti_solve = self.rhc_status.rti_solve.read_retry(row_index=self.controller_index,
+                        col_index=0,
+                        row_index_view=0)[0]
+            self._rhc(rti=rti_solve) # run solution with requested solve mode
+            self.rhc_status.trigger.write_retry(False, 
+                row_index=self.controller_index,
+                col_index=0,
+                row_index_view=0) # allow next solution trigger 
+        
+        self._remote_triggerer.ack() # send ack signal to server
+        self._received_trigger = False
+        
+        self._term_req_received = self._term_req_received or self._remote_term.read_retry(row_index=0,
+                                                        col_index=0,
+                                                        row_index_view=0)[0]
+        
+        if self._term_req_received:
+            self.close()
+            return False
+
+        return True
+
+    def solve(self):
+        
+        # run the solution loop and wait for trigger signals
+        # using cond. variables (efficient)
+        while True:
+            if not self.solve_once():
+                break
+        
+        self.close() # is not stricly necessary
+
+    def reset(self):
+        
+        if not self._closed:
+            self.reset_rhc_data()
+            self._failed = False # allow triggering
+            self._n_resets += 1
+            self.rhc_status.fails.write_retry(False, 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+            self.rhc_status.resets.write_retry(False, 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+
+    def _create_jnt_maps(self):
+        
+        # retrieve env-side joint names from shared mem
+        self._env_side_jnt_names = self.robot_state.jnt_names()
+        self._check_jnt_names_compatibility() # will raise exception if not self._allow_less_jnts
+        if not self._got_jnt_names_from_controllers:
+            exception = f"Cannot run the solve(). assign_env_side_jnt_names() was not called!"
+            Journal.log(self._class_name,
+                    "_create_jnt_maps",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = True)
+
+        # robot homer guarantees that _controller_side_jnt_names is at least contained in self._env_side_jnt_names (or equal)
+        self._to_controller = [self._env_side_jnt_names.index(element) for element in self._controller_side_jnt_names]
+        # set joint remappings for shared data from env data to controller
+        # all shared data is by convention specified according to the env (jnts are ordered that way)
+        # the remapping is used so that when data is returned, its a remapped view from env to controller,
+        # so that data can be assigned direclty from the rhc controller without any issues
+        self.robot_state.set_jnts_remapping(jnts_remapping=self._to_controller)
+        self.robot_cmds.set_jnts_remapping(jnts_remapping=self._to_controller)
+        self.robot_pred.set_jnts_remapping(jnts_remapping=self._to_controller)
+        self.rhc_pred_delta.set_jnts_remapping(jnts_remapping=self._to_controller)
+
+        return True
+
+    def reset_rhc_data(self):
+        
+        self._reset() # custom reset (e.g. it should set the current solution to some
+        # default solution, like a bootstrap)
+
+        self.rhc_refs.reset() # reset rhc refs to default (has to be called after _reset)
+
+        self._write_cmds_from_sol() # use latest solution (e.g. from bootstrap if called before running
+        # the first solve) as default state
+    
+    def failed(self):
+        return self._failed
+
+    def robot_mass(self):
+        return self._robot_mass
+    
+    def _assign_cntrl_index(self, reg_state: np.ndarray):
+        state = reg_state.flatten() # ensure 1D tensor
+        free_spots = np.nonzero(~state.flatten())[0]
+        return free_spots[0].item()  # just return the first free spot
+    
+    def _register_to_cluster(self):
+        
+        self.rhc_status = RhcStatus(is_server=False,
+            namespace=self.namespace, 
+            verbose=self._verbose, 
+            vlevel=VLevel.V2,
+            with_torch_view=False, 
+            with_gpu_mirror=False,
+            optimize_mem=True,
+            cluster_size=1, # we just need the row corresponding to this controller
+            n_contacts=None, # we get this from server
+            n_nodes=None # we get this from server
+            )
+        self.rhc_status.run() # rhc status (reg. flags, failure, tot cost, tot cnstrl viol, etc...)
+
+        # acquire semaphores since we have to perform non-atomic operations
+        # on the whole memory views
+        self.rhc_status.registration.data_sem_acquire()
+        self.rhc_status.controllers_counter.data_sem_acquire()
+        self.rhc_status.controllers_counter.synch_all(retry = True,
+                                                read = True)
+
+        available_spots = self.rhc_status.cluster_size
+        # from here on all pre registration ops can be done
+
+        # incrementing cluster controllers counter
+        controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror()
+        if controllers_counter[0, 0] + 1 > available_spots: # no space left -> return 
+            self.rhc_status.controllers_counter.data_sem_release()
+            self.rhc_status.registration.data_sem_release()
+            exception = "Cannot register to cluster. No space left " + \
+                f"({controllers_counter[0, 0]} controllers already registered)"
+            Journal.log(self._class_name,
+                    "_register_to_cluster",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = True) 
+        
+        # now we can register 
+
+        # increment controllers counter
+        controllers_counter += 1 
+        self.controller_index = controllers_counter.item() -1 
+        
+        # actually register to cluster
+        self.rhc_status.controllers_counter.synch_all(retry = True,
+            read = False) # writes to shared mem (just one for all controllers (i.e n_rows = 1))
+        
+        # read current registration state
+        self.rhc_status.registration.synch_all(retry = True,
+                                        read = True,
+                                        row_index=self.controller_index,
+                                        row_index_view=0)
+        registrations = self.rhc_status.registration.get_numpy_mirror()
+        # self.controller_index = self._assign_cntrl_index(registrations)
+        
+
+        self._class_name_base = self._class_name_base+str(self.controller_index)
+        # self.controller_index_np = np.array(self.controller_index)
+        self.controller_index_np = np.array(0) # given that we use optimize_mem, the shared mem copy has shape 1 x n_cols (we can write and read at [0, :])
+        
+        registrations[self.controller_index_np, 0] = True
+        self.rhc_status.registration.synch_all(retry = True,
+                                        read = False,
+                                        row_index=self.controller_index,
+                                        col_index=0,
+                                        row_index_view=0) 
+
+        # now all heavy stuff that would otherwise make the registration slow
+        self._remote_term = SharedTWrapper(namespace=self.namespace,
+            basename="RemoteTermination",
+            is_server=False,
+            verbose = self._verbose, 
+            vlevel = VLevel.V2,
+            with_gpu_mirror=False,
+            with_torch_view=False,
+            dtype=dtype.Bool)
+        self._remote_term.run()
+
+        # other initializations
+        
+        self.cluster_stats = RhcProfiling(is_server=False, 
+                                    name=self.namespace,
+                                    verbose=self._verbose,
+                                    vlevel=VLevel.V2,
+                                    safe=True,
+                                    optimize_mem=True,
+                                    cluster_size=1 # we just need the row corresponding to this controller
+                                    ) # profiling data
+        self.cluster_stats.run()
+        self.cluster_stats.synch_info()
+    
+        self._create_jnt_maps()
+        self.init_rhc_task_cmds() # initializes rhc interface to external commands (defined by child class)
+        self._consinstency_checks() # sanity checks
+
+        if self._homer is None:
+            self._init_robot_homer() # call this in case it wasn't called by child
+    
+        self._robot_mass = self._get_robot_mass() # uses child class implemented method
+        self._contact_f_scale = self._get_robot_mass() * 9.81
+
+        # writing some static info about this controller
+        # self.rhc_status.rhc_static_info.synch_all(retry = True,
+        #     read = True,
+        #     row_index=self.controller_index,
+        #     col_index=0) # first read current static info from other controllers (not necessary if optimize_mem==True)
+        self.rhc_status.rhc_static_info.set(data=np.array(self._dt),
+            data_type="dts",
+            rhc_idxs=self.controller_index_np,
+            gpu=False)
+        self.rhc_status.rhc_static_info.set(data=np.array(self._t_horizon),
+            data_type="horizons",
+            rhc_idxs=self.controller_index_np,
+            gpu=False)
+        self.rhc_status.rhc_static_info.set(data=np.array(self._n_nodes),
+            data_type="nnodes",
+            rhc_idxs=self.controller_index_np,
+            gpu=False)
+        # writing some static rhc info which is only available after problem init
+        self.rhc_status.rhc_static_info.set(data=np.array(len(self._get_contacts())),
+            data_type="ncontacts",
+            rhc_idxs=self.controller_index_np,
+            gpu=False)
+        self.rhc_status.rhc_static_info.set(data=np.array(self.robot_mass()),
+            data_type="robot_mass",
+            rhc_idxs=self.controller_index_np,
+            gpu=False)
+        self.rhc_status.rhc_static_info.set(data=np.array(self._pred_node_idx),
+            data_type="pred_node_idx",
+            rhc_idxs=self.controller_index_np,
+            gpu=False)
+        
+        self.rhc_status.rhc_static_info.synch_retry(row_index=self.controller_index, 
+            col_index=0,
+            row_index_view=0,
+            n_rows=1, n_cols=self.rhc_status.rhc_static_info.n_cols,
+            read=False)
+        
+        # we set homings also for joints which are not in the rhc homing map
+        # since this is usually required on server side
+    
+        homing_full = self._homer_env.get_homing().reshape(1, 
+                        self.robot_cmds.n_jnts())
+    
+        null_action = np.zeros((1, self.robot_cmds.n_jnts()), 
+                        dtype=self._dtype)
+
+        self.robot_cmds.jnts_state.set(data=homing_full, data_type="q", 
+                            robot_idxs=self.controller_index_np,
+                            no_remap=True)
+        self.robot_cmds.jnts_state.set(data=null_action, data_type="v", 
+                            robot_idxs=self.controller_index_np,
+                            no_remap=True)
+        self.robot_cmds.jnts_state.set(data=null_action, data_type="eff", 
+                            robot_idxs=self.controller_index_np,
+                            no_remap=True)
+        
+        # write all joints (including homing for env-only ones)
+        self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0,
+                                row_index_view=0,
+                                n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols,
+                                read=False) # only write data corresponding to this controller
+        
+        self.reset() # reset controller
+        self._n_resets=0
+        
+        # for last we create the trigger client
+        self._remote_triggerer = RemoteTriggererClnt(namespace=self.namespace,
+                                        verbose=self._verbose,
+                                        vlevel=VLevel.V2) # remote triggering
+        self._remote_triggerer.run()
+
+        if self._debug:
+            # internal solution is published on shared mem
+            # we assume the user has made available the cost
+            # and constraint data at this point (e.g. through
+            # the solution of a bootstrap)
+            cost_data = self._get_cost_info()
+            constr_data = self._get_constr_info()
+            config = RhcInternal.Config(is_server=True, 
+                        enable_q= True, 
+                        enable_v=True, 
+                        enable_a=True, 
+                        enable_a_dot=False, 
+                        enable_f=True,
+                        enable_f_dot=False, 
+                        enable_eff=False, 
+                        cost_names=cost_data[0], 
+                        cost_dims=cost_data[1],
+                        constr_names=constr_data[0],
+                        constr_dims=constr_data[1],
+                        )
+            self.rhc_internal = RhcInternal(config=config, 
+                                    namespace=self.namespace,
+                                    rhc_index = self.controller_index,
+                                    n_contacts=self.n_contacts,
+                                    n_jnts=self.n_dofs,
+                                    jnt_names=self._controller_side_jnt_names,
+                                    n_nodes=self._n_nodes,
+                                    verbose = self._verbose,
+                                    vlevel=VLevel.V2,
+                                    force_reconnection=True,
+                                    safe=True)
+            self.rhc_internal.run()
+
+        self._class_name = self._class_name + f"-{self.controller_index}"
+
+        Journal.log(self._class_name,
+                    "_register_to_cluster",
+                    f"controller registered",
+                    LogType.STAT,
+                    throw_when_excep = True)
+        
+        # we can now release everything so that other controllers can register
+        self.rhc_status.controllers_counter.data_sem_release()
+        self.rhc_status.registration.data_sem_release()
+
+        self._registered = True
+
+    def _unregister_from_cluster(self):
+        
+        if self._received_trigger:
+            # received interrupt during solution --> 
+            # send ack signal to server anyway
+            self._remote_triggerer.ack() 
+        if self._registered:
+            # acquire semaphores since we have to perform operations
+            # on the whole memory views
+            self.rhc_status.registration.data_sem_acquire()
+            self.rhc_status.controllers_counter.data_sem_acquire()
+            self.rhc_status.registration.write_retry(False, 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+            self._deactivate()
+            # decrementing controllers counter
+            self.rhc_status.controllers_counter.synch_all(retry = True,
+                                                    read = True)
+            controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror()
+            controllers_counter -= 1 
+            self.rhc_status.controllers_counter.synch_all(retry = True,
+                                                    read = False)
+            Journal.log(self._class_name,
+                    "_unregister_from_cluster",
+                    "Done",
+                    LogType.STAT,
+                    throw_when_excep = True)
+            # we can now release everything
+            self.rhc_status.registration.data_sem_release()
+            self.rhc_status.controllers_counter.data_sem_release()
+            self._registered = False
+
+    def _get_quat_remap(self):
+        # to be overridden by child class if necessary
+        return [0, 1, 2, 3]
+    
+    def _consinstency_checks(self):
+        
+        # check controller dt
+        server_side_cluster_dt = self.cluster_stats.get_info(info_name="cluster_dt")
+        if not (abs(server_side_cluster_dt - self._dt) < 1e-4):
+            exception = f"Trying to initialize a controller with control dt {self._dt}, which" + \
+                f"does not match the cluster control dt {server_side_cluster_dt}"
+            Journal.log(self._class_name,
+                        "_consinstency_checks",
+                        exception,
+                        LogType.EXCEP,
+                        throw_when_excep = True)
+        # check contact names
+        
+        server_side_contact_names = set(self.robot_state.contact_names())
+        control_side_contact_names = set(self._get_contacts())
+
+        if (not server_side_contact_names == control_side_contact_names) and self._verbose:
+            warn = f"Controller-side contact names do not match server-side names!" + \
+                f"\nServer: {self.robot_state.contact_names()}\n Controller: {self._get_contacts()}"
+            Journal.log(self._class_name,
+                        "_consinstency_checks",
+                        warn,
+                        LogType.WARN,
+                        throw_when_excep = True)
+        if not len(self.robot_state.contact_names()) == len(self._get_contacts()):
+            # at least, we need the n of contacts to match!
+            exception = f"Controller-side n contacts {self._get_contacts()} do not match " + \
+                f"server-side n contacts {len(self.robot_state.contact_names())}!"
+            Journal.log(self._class_name,
+                        "_consinstency_checks",
+                        exception,
+                        LogType.EXCEP,
+                        throw_when_excep = True)
+            
+    def _init(self):
+
+        stat = f"Trying to initialize RHC controller " + \
+            f"with dt: {self._dt} s, t_horizon: {self._t_horizon} s, n_intervals: {self._n_intervals}"
+        Journal.log(self._class_name,
+                    "_init",
+                    stat,
+                    LogType.STAT,
+                    throw_when_excep = True)
+        
+        self._init_states() # initializes shared mem. states 
+
+        self._init_problem() # we call the child's initialization method for the actual problem
+        self._post_problem_init()
+
+        self._register_to_cluster() # registers the controller to the cluster
+
+        Journal.log(self._class_name,
+                    "_init",
+                    f"RHC controller initialized with cluster index {self.controller_index} on process {os.getpid()}",
+                    LogType.STAT,
+                    throw_when_excep = True)
+
+    def _deactivate(self):
+        # signal controller deactivation over shared mem
+        self.rhc_status.activation_state.write_retry(False, 
+                                row_index=self.controller_index,
+                                col_index=0,
+                                row_index_view=0)
+        # also set cmds to homing for safety
+        # self.reset_rhc_data()
+
+    def _on_failure(self):
+        
+        self.rhc_status.fails.write_retry(True, 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+        self._deactivate()
+        self._n_fails += 1
+        self.rhc_status.controllers_fail_counter.write_retry(self._n_fails,
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+
+    def _init_robot_homer(self):
+        self._homer = RobotHomer(srdf_path=self.srdf_path, 
+                            jnt_names=self._controller_side_jnt_names,
+                            verbose=self._verbose)
+        
+        self._homer_env = RobotHomer(srdf_path=self.srdf_path, 
+                            jnt_names=self.robot_state.jnt_names(),
+                            verbose=self._verbose)
+        
+    def _update_profiling_data(self):
+
+        # updated debug data on shared memory
+        # with the latest info available
+        self.cluster_stats.solve_loop_dt.write_retry(self._profiling_data_dict["full_solve_dt"], 
+                                                            row_index=self.controller_index,
+                                                            col_index=0,
+                                                            row_index_view=0)
+        self.cluster_stats.rti_sol_time.write_retry(self._profiling_data_dict["rti_solve_dt"], 
+                                                            row_index=self.controller_index,
+                                                            col_index=0,
+                                                            row_index_view=0)
+        self.cluster_stats.prb_update_dt.write_retry(self._profiling_data_dict["problem_update_dt"], 
+                                                            row_index=self.controller_index,
+                                                            col_index=0,
+                                                            row_index_view=0)
+        self.cluster_stats.phase_shift_dt.write_retry(self._profiling_data_dict["phases_shift_dt"], 
+                                                            row_index=self.controller_index,
+                                                            col_index=0,
+                                                            row_index_view=0)
+        self.cluster_stats.task_ref_update_dt.write_retry(self._profiling_data_dict["task_ref_update"], 
+                                                            row_index=self.controller_index,
+                                                            col_index=0,
+                                                            row_index_view=0)
+    
+    def _write_cmds_from_sol(self):
+
+        # gets data from the solution and updates the view on the shared data
+
+        # cmds for robot
+        # jnts
+        self.robot_cmds.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q", robot_idxs=self.controller_index_np)
+        self.robot_cmds.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="v", robot_idxs=self.controller_index_np)
+        self.robot_cmds.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a", robot_idxs=self.controller_index_np)
+        self.robot_cmds.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np)
+        # root
+        self.robot_cmds.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q_full", robot_idxs=self.controller_index_np)
+        self.robot_cmds.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="twist", robot_idxs=self.controller_index_np)
+        self.robot_cmds.root_state.set(data=self._get_root_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np)
+        self.robot_cmds.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np)
+        f_contact = self._get_f_from_sol()
+        if f_contact is not None:
+            contact_names = self.robot_state.contact_names()
+            node_idx_f_estimate=self._rhc_cmds_node_idx-1 # we always write the force to reach the desired state (prev node) 
+            rhc_q_estimate=self._get_root_full_q_from_sol(node_idx=node_idx_f_estimate)[:, 3:7]
+            for i in range(len(contact_names)):
+                contact = contact_names[i]
+                contact_idx = i*3
+                contact_force_rhc_world=f_contact[contact_idx:(contact_idx+3), node_idx_f_estimate:node_idx_f_estimate+1].T
+                world2base_frame(v_w=contact_force_rhc_world, 
+                    q_b=rhc_q_estimate, 
+                    v_out=self._contact_force_base_loc_aux,
+                    is_q_wijk=False # horizon q is ijkw
+                    )
+                self.robot_cmds.contact_wrenches.set(data=self._contact_force_base_loc_aux, 
+                    data_type="f", 
+                    robot_idxs=self.controller_index_np,
+                    contact_name=contact)
+        
+        # prediction data from MPC horizon
+        self.robot_pred.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._pred_node_idx), data_type="q", robot_idxs=self.controller_index_np)
+        self.robot_pred.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._pred_node_idx), data_type="v", robot_idxs=self.controller_index_np)
+        self.robot_pred.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a", robot_idxs=self.controller_index_np)
+        self.robot_pred.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._pred_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np)
+        self.robot_pred.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._pred_node_idx), data_type="q_full", robot_idxs=self.controller_index_np)
+        self.robot_pred.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._pred_node_idx), data_type="twist", robot_idxs=self.controller_index_np)
+        self.robot_pred.root_state.set(data=self._get_root_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np)
+        self.robot_pred.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._pred_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np)
+
+        # write robot commands
+        self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, 
+                                row_index_view=0,
+                                n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols,
+                                read=False) # jnt state
+        self.robot_cmds.root_state.synch_retry(row_index=self.controller_index, col_index=0, 
+                                row_index_view=0,
+                                n_rows=1, n_cols=self.robot_cmds.root_state.n_cols,
+                                read=False) # root state, in case it was written
+        self.robot_cmds.contact_wrenches.synch_retry(row_index=self.controller_index, col_index=0, 
+                                row_index_view=0,
+                                n_rows=1, n_cols=self.robot_cmds.contact_wrenches.n_cols,
+                                read=False) # contact state
+        
+        # write robot pred
+        self.robot_pred.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, 
+                                row_index_view=0,
+                                n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols,
+                                read=False)
+        self.robot_pred.root_state.synch_retry(row_index=self.controller_index, col_index=0, 
+                                row_index_view=0,
+                                n_rows=1, n_cols=self.robot_cmds.root_state.n_cols,
+                                read=False)
+        
+        # we also fill other data (cost, constr. violation, etc..)
+        self.rhc_status.rhc_cost.write_retry(self._get_rhc_cost(), 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+        self.rhc_status.rhc_constr_viol.write_retry(self._get_rhc_constr_viol(), 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+        self.rhc_status.rhc_n_iter.write_retry(self._get_rhc_niter_to_sol(), 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0)
+        self.rhc_status.rhc_nodes_cost.write_retry(data=self._get_rhc_nodes_cost(), 
+                                    row_index=self.controller_index, 
+                                    col_index=0,
+                                    row_index_view=0)
+        self.rhc_status.rhc_nodes_constr_viol.write_retry(data=self._get_rhc_nodes_constr_viol(), 
+                                    row_index=self.controller_index, 
+                                    col_index=0,
+                                    row_index_view=0)
+        self.rhc_status.rhc_fail_idx.write_retry(self._get_failure_index(), 
+                                    row_index=self.controller_index,
+                                    col_index=0,
+                                    row_index_view=0) # write idx  on shared mem
+
+    def _compute_pred_delta(self):
+        
+        # measurements
+        q_full_root_meas = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np)
+        twist_root_meas = self.robot_state.root_state.get(data_type="twist", robot_idxs=self.controller_index_np)
+        a_root_meas = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np)
+        g_vec_root_meas = self.robot_state.root_state.get(data_type="gn", robot_idxs=self.controller_index_np)
+
+        q_jnts_meas = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np)
+        v_jnts_meas = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np)
+        a_jnts_meas = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np)
+        eff_jnts_meas = self.robot_state.jnts_state.get(data_type="eff", robot_idxs=self.controller_index_np)
+
+        # prediction from rhc 
+        delta_root_q_full=self._get_root_full_q_from_sol(node_idx=1)-q_full_root_meas
+        delta_root_twist=self._get_root_twist_from_sol(node_idx=1)-twist_root_meas
+        delta_root_a=self._get_root_a_from_sol(node_idx=0)-a_root_meas
+        delta_g_vec=self._get_norm_grav_vector_from_sol(node_idx=0)-g_vec_root_meas
+
+        delta_jnts_q=self._get_jnt_q_from_sol(node_idx=1)-q_jnts_meas
+        delta_jnts_v=self._get_jnt_v_from_sol(node_idx=1)-v_jnts_meas
+        delta_jnts_a=self._get_jnt_a_from_sol(node_idx=0)-a_jnts_meas
+        delta_jnts_eff=self._get_jnt_eff_from_sol(node_idx=0)-eff_jnts_meas
+
+        # writing pred. errors
+        self.rhc_pred_delta.root_state.set(data=delta_root_q_full, data_type="q_full", robot_idxs=self.controller_index_np)
+        self.rhc_pred_delta.root_state.set(data=delta_root_twist,data_type="twist", robot_idxs=self.controller_index_np)
+        self.rhc_pred_delta.root_state.set(data=delta_root_a,data_type="a_full", robot_idxs=self.controller_index_np)
+        self.rhc_pred_delta.root_state.set(data=delta_g_vec, data_type="gn", robot_idxs=self.controller_index_np)
+
+        self.rhc_pred_delta.jnts_state.set(data=delta_jnts_q,data_type="q", robot_idxs=self.controller_index_np)
+        self.rhc_pred_delta.jnts_state.set(data=delta_jnts_v,data_type="v", robot_idxs=self.controller_index_np)
+        self.rhc_pred_delta.jnts_state.set(data=delta_jnts_a,data_type="a", robot_idxs=self.controller_index_np)
+        self.rhc_pred_delta.jnts_state.set(data=delta_jnts_eff, data_type="eff", robot_idxs=self.controller_index_np)
+
+        # write on shared memory
+        self.rhc_pred_delta.jnts_state.synch_retry(row_index=self.controller_index, 
+                                                   col_index=0, 
+                                                   n_rows=1, 
+                                                   row_index_view=0,
+                                                   n_cols=self.robot_cmds.jnts_state.n_cols,
+                                read=False) # jnt state
+        self.rhc_pred_delta.root_state.synch_retry(row_index=self.controller_index, 
+                                                    col_index=0,
+                                                    n_rows=1, 
+                                                    row_index_view=0,
+                                                    n_cols=self.robot_cmds.root_state.n_cols,
+                                read=False) # root state
+    
+    def _assign_controller_side_jnt_names(self, 
+                        jnt_names: List[str]):
+
+        self._controller_side_jnt_names = jnt_names
+        self._got_jnt_names_from_controllers = True
+
+    def _check_jnt_names_compatibility(self):
+
+        set_rhc = set(self._controller_side_jnt_names)
+        set_env  = set(self._env_side_jnt_names)
+        
+        if not set_rhc == set_env:
+            rhc_is_missing=set_env-set_rhc
+            env_is_missing=set_rhc-set_env
+
+            msg_type=LogType.WARN
+            message=""
+            if not len(rhc_is_missing)==0: # allowed
+                message = "\nSome env-side joint names are missing on rhc client-side!\n" + \
+                "RHC-SERVER-SIDE-> \n" + \
+                " ".join(self._env_side_jnt_names) + "\n" +\
+                "RHC-CLIENT-SIDE -> \n" + \
+                " ".join(self._controller_side_jnt_names) + "\n" \
+                "\MISSING -> \n" + \
+                " ".join(list(rhc_is_missing)) + "\n"
+                if not self._allow_less_jnts: # raise exception
+                    msg_type=LogType.EXCEP
+
+            if not len(env_is_missing)==0: # not allowed
+                message = "\nSome rhc-side joint names are missing on rhc server-side!\n" + \
+                "RHC-SERVER-SIDE-> \n" + \
+                " ".join(self._env_side_jnt_names) + \
+                "RHC-CLIENT-SIDE -> \n" + \
+                " ".join(self._controller_side_jnt_names) + "\n" \
+                "\nmissing -> \n" + \
+                " ".join(list(env_is_missing))
+                msg_type=LogType.EXCEP
+            
+            if msg_type==LogType.WARN and not self._verbose:
+                return
+            
+            Journal.log(self._class_name,
+                    "_check_jnt_names_compatibility",
+                    message,
+                    msg_type,
+                    throw_when_excep = True)
+    
+    def _get_cost_info(self):
+        # to be overridden by child class
+        return None, None
+    
+    def _get_constr_info(self):
+        # to be overridden by child class
+        return None, None
+    
+    def _get_fail_idx(self):
+        # to be overriden by parent
+        return 0.0
+    
+    def _get_failure_index(self):
+        fail_idx=self._get_fail_idx()/self._fail_idx_thresh
+        if fail_idx>1.0:
+            fail_idx=1.0
+        return fail_idx
+    
+    def _check_rhc_failure(self):
+        # we use fail idx viol to detect failures
+        idx = self._get_failure_index()
+        return idx>=1.0
+    
+    def _update_rhc_internal(self):
+        # data which is not enabled in the config is not actually 
+        # written so overhead is minimal for non-enabled data
+        self.rhc_internal.write_q(data= self._get_q_from_sol(),
+                            retry=True)
+        self.rhc_internal.write_v(data= self._get_v_from_sol(),
+                            retry=True)
+        self.rhc_internal.write_a(data= self._get_a_from_sol(),
+                            retry=True)
+        self.rhc_internal.write_a_dot(data= self._get_a_dot_from_sol(),
+                            retry=True)
+        self.rhc_internal.write_f(data= self._get_f_from_sol(),
+                            retry=True)
+        self.rhc_internal.write_f_dot(data= self._get_f_dot_from_sol(),
+                            retry=True)
+        self.rhc_internal.write_eff(data= self._get_eff_from_sol(),
+                            retry=True)
+        for cost_idx in range(self.rhc_internal.config.n_costs):
+            # iterate over all costs and update all values
+            cost_name = self.rhc_internal.config.cost_names[cost_idx]
+            self.rhc_internal.write_cost(data= self._get_cost_from_sol(cost_name = cost_name),
+                                cost_name = cost_name,
+                                retry=True)
+        for constr_idx in range(self.rhc_internal.config.n_constr):
+            # iterate over all constraints and update all values
+            constr_name = self.rhc_internal.config.constr_names[constr_idx]
+            self.rhc_internal.write_constr(data= self._get_constr_from_sol(constr_name=constr_name),
+                                constr_name = constr_name,
+                                retry=True)
+    
+    def _get_contacts(self): 
+        contact_names = self._get_contact_names()
+        self._got_contact_names = True
+        return contact_names
+    
+    def _get_q_from_sol(self):
+        # to be overridden by child class
+        return None
+
+    def _get_v_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_a_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_a_dot_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_f_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_f_dot_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_eff_from_sol(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_cost_from_sol(self,
+                    cost_name: str):
+        # to be overridden by child class
+        return None
+    
+    def _get_constr_from_sol(self,
+                    constr_name: str):
+        # to be overridden by child class
+        return None
+    
+    @abstractmethod
+    def _reset(self):   
+        pass
+
+    @abstractmethod
+    def _init_rhc_task_cmds(self):
+        pass
+
+    @abstractmethod
+    def _get_robot_jnt_names(self):
+        pass
+    
+    @abstractmethod
+    def _get_contact_names(self):
+        pass
+
+    @abstractmethod
+    def _get_jnt_q_from_sol(self, node_idx=1) -> np.ndarray:
+        pass
+
+    @abstractmethod
+    def _get_jnt_v_from_sol(self, node_idx=1) -> np.ndarray:
+        pass
+    
+    @abstractmethod
+    def _get_jnt_a_from_sol(self, node_idx=0) -> np.ndarray:
+        pass
+
+    @abstractmethod
+    def _get_jnt_eff_from_sol(self, node_idx=0) -> np.ndarray:
+        pass
+    
+    @abstractmethod
+    def _get_root_full_q_from_sol(self, node_idx=1) -> np.ndarray:
+        pass
+    
+    @abstractmethod
+    def _get_full_q_from_sol(self, node_idx=1) -> np.ndarray:
+        pass
+
+    @abstractmethod
+    def _get_root_twist_from_sol(self, node_idx=1) -> np.ndarray:
+        pass
+    
+    @abstractmethod
+    def _get_root_a_from_sol(self, node_idx=0) -> np.ndarray:
+        pass
+
+    def _get_norm_grav_vector_from_sol(self, node_idx=1) -> np.ndarray:
+        rhc_q=self._get_root_full_q_from_sol(node_idx=node_idx)[:, 3:7]
+        world2base_frame(v_w=self._norm_grav_vector_w,q_b=rhc_q,v_out=self._norm_grav_vector_base_loc,
+            is_q_wijk=False)
+        return self._norm_grav_vector_base_loc
+    
+    def _get_rhc_cost(self):
+        # to be overridden
+        return np.nan
+    
+    def _get_rhc_constr_viol(self):
+        # to be overridden
+        return np.nan
+
+    def _get_rhc_nodes_cost(self):
+        # to be overridden
+        return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype)
+    
+    def _get_rhc_nodes_constr_viol(self):
+        # to be overridden
+        return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype)
+    
+    def _get_rhc_niter_to_sol(self) -> np.ndarray:
+        # to be overridden
+        return np.nan
+    
+    @abstractmethod
+    def _update_open_loop(self):
+        # updates rhc controller 
+        # using the internal state 
+        pass
+    
+    @abstractmethod
+    def _update_closed_loop(self):
+        # uses meas. from robot
+        pass
+
+    @abstractmethod
+    def _solve(self) -> bool:
+        pass
+
+    @abstractmethod
+    def _bootstrap(self) -> bool:
+        pass
+            
+    @abstractmethod
+    def _get_ndofs(self):
+        pass
+    
+    abstractmethod
+    def _get_robot_mass(self):
+        pass
+
+    @abstractmethod
+    def _init_problem(self):
+        # initialized horizon's TO problem
+        pass
+
+    @abstractmethod
+    def _post_problem_init(self):
+        pass
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/metadata.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9285c7f180c366e71c4a03bf77f55e6528f437ff
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/metadata.yaml
@@ -0,0 +1,68 @@
+rosbag2_bagfile_information:
+  version: 5
+  storage_identifier: sqlite3
+  duration:
+    nanoseconds: 89879882813
+  starting_time:
+    nanoseconds_since_epoch: 140136718
+  message_count: 133027
+  topics_with_message_count:
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_robot_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 19003
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_robot_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 19003
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 19003
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_contacts
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 19003
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_hl_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 19004
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 19005
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_rhc_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 19005
+    - topic_metadata:
+        name: /MPCViz_unitree_b2w_2026_03_28_11_09_54_ID_HandShake
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 1
+  compression_format: zstd
+  compression_mode: FILE
+  relative_file_paths:
+    - rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd
+  files:
+    - path: rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3
+      starting_time:
+        nanoseconds_since_epoch: 140136718
+      duration:
+        nanoseconds: 89879882813
+      message_count: 133027
\ No newline at end of file
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3 b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3
new file mode 100644
index 0000000000000000000000000000000000000000..662676248fff7fdfc237f952b0a7397e0618e60f
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8528347c6927e6eadcfbea4365ba2255a3ed3782b9d903dc4431d8b8e185c82c
+size 29442048
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..6a0ad1a9f315490e12adfbf17ee01ea33647662e
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6/rosbag_unitree_b2w_2026_03_28_11_09_54_ID_2026-03-29_03-11-09_6_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ef90075d478fee30b0d8a14d46983d1c4402b5b7e763eb036eeaa25b7d30d20
+size 2011910
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sac.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sac.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd40a13287ccf6ccda694c218d114ede8d4b9d4b
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sac.py
@@ -0,0 +1,680 @@
+import torch 
+import torch.nn as nn
+from torch.distributions.normal import Normal
+import math
+
+from aug_mpc.utils.nn.normalization_utils import RunningNormalizer 
+from aug_mpc.utils.nn.layer_utils import llayer_init 
+
+from typing import List
+
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+from EigenIPC.PyEigenIPC import VLevel
+
+class SACAgent(nn.Module):
+    def __init__(self,
+            obs_dim: int, 
+            actions_dim: int,
+            obs_ub: List[float] = None,
+            obs_lb: List[float] = None,
+            actions_ub: List[float] = None,
+            actions_lb: List[float] = None,
+            rescale_obs: bool = False,
+            norm_obs: bool = True,
+            use_action_rescale_for_critic: bool = True,
+            device:str="cuda",
+            dtype=torch.float32,
+            is_eval:bool=False,
+            load_qf:bool=False,
+            epsilon:float=1e-8,
+            debug:bool=False,
+            compression_ratio:float=-1.0, # > 0; if [0, 1] compression, >1 "expansion"
+            layer_width_actor:int=256,
+            n_hidden_layers_actor:int=2,
+            layer_width_critic:int=512,
+            n_hidden_layers_critic:int=4,
+            torch_compile: bool = False,
+            add_weight_norm: bool = False,
+            add_layer_norm: bool = False,
+            add_batch_norm: bool = False):
+
+        super().__init__()
+
+        self._use_torch_compile=torch_compile
+
+        self._layer_width_actor=layer_width_actor
+        self._layer_width_critic=layer_width_critic
+        self._n_hidden_layers_actor=n_hidden_layers_actor
+        self._n_hidden_layers_critic=n_hidden_layers_critic
+
+        self._obs_dim=obs_dim
+        self._actions_dim=actions_dim
+        self._actions_ub=actions_ub
+        self._actions_lb=actions_lb
+
+        self._add_weight_norm=add_weight_norm
+        self._add_layer_norm=add_layer_norm
+        self._add_batch_norm=add_batch_norm
+
+        self._is_eval=is_eval
+        self._load_qf=load_qf
+
+        self._epsilon=epsilon
+
+        if compression_ratio > 0.0:
+            self._layer_width_actor=int(compression_ratio*obs_dim)
+            self._layer_width_critic=int(compression_ratio*(obs_dim+actions_dim))
+        
+        self._normalize_obs = norm_obs
+        self._rescale_obs=rescale_obs
+        if self._rescale_obs and self._normalize_obs:
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Both running normalization and obs rescaling is enabled. Was this intentional?",
+                LogType.WARN,
+                throw_when_excep = True)
+        
+        self._use_action_rescale_for_critic=use_action_rescale_for_critic
+
+        self._rescaling_epsi=1e-9
+
+        self._debug = debug
+
+        self._torch_device = device
+        self._torch_dtype = dtype
+
+        # obs scale and bias
+        if obs_ub is None:
+            obs_ub = [1] * obs_dim
+        if obs_lb is None:
+            obs_lb = [-1] * obs_dim
+        if (len(obs_ub) != obs_dim):
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Observations ub list length should be equal to {obs_dim}, but got {len(obs_ub)}",
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if (len(obs_lb) != obs_dim):
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Observations lb list length should be equal to {obs_dim}, but got {len(obs_lb)}",
+                LogType.EXCEP,
+                throw_when_excep = True)
+
+        self._obs_ub = torch.tensor(obs_ub, dtype=self._torch_dtype, 
+                                device=self._torch_device)
+        self._obs_lb = torch.tensor(obs_lb, dtype=self._torch_dtype,
+                                device=self._torch_device)
+        obs_scale = torch.full((obs_dim, ),
+                            fill_value=0.0,
+                            dtype=self._torch_dtype,
+                            device=self._torch_device)
+        obs_scale[:] = (self._obs_ub-self._obs_lb)/2.0
+        self.register_buffer(
+            "obs_scale", obs_scale
+        )
+        obs_bias = torch.full((obs_dim, ),
+                            fill_value=0.0,
+                            dtype=self._torch_dtype,
+                            device=self._torch_device)
+        obs_bias[:] = (self._obs_ub+self._obs_lb)/2.0
+        self.register_buffer(
+            "obs_bias", obs_bias)
+        
+        self._build_nets()
+        
+        self._init_obs_norm()
+
+        msg=f"Created SAC agent with actor [{self._layer_width_actor}, {self._n_hidden_layers_actor}]\
+        and critic [{self._layer_width_critic}, {self._n_hidden_layers_critic}] sizes.\
+        \n Runningobs normalizer: {type(self.obs_running_norm)} \
+        \n Batch normalization: {self._add_batch_norm} \
+        \n Layer normalization: {self._add_layer_norm} \
+        \n Weight normalization: {self._add_weight_norm} \
+        \n Critic input actions are descaled: {self._use_action_rescale_for_critic}"
+        Journal.log(self.__class__.__name__,
+            "__init__",
+            msg,
+            LogType.INFO)
+    
+    def _init_obs_norm(self):
+        
+        self.obs_running_norm=None
+        if self._normalize_obs:
+            self.obs_running_norm = RunningNormalizer((self._obs_dim,), 
+                                        epsilon=self._epsilon, 
+                                        device=self._torch_device, dtype=self._torch_dtype, 
+                                        freeze_stats=True, # always start with freezed stats
+                                        debug=self._debug)
+            self.obs_running_norm.type(self._torch_dtype) # ensuring correct dtype for whole module
+
+    def _build_nets(self):
+
+        if self._add_weight_norm:
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Will use weight normalization reparametrization\n",
+                LogType.INFO)
+
+        self.actor=None
+        self.qf1=None
+        self.qf2=None
+        self.qf1_target=None
+        self.qf2_target=None
+        
+        self.actor = Actor(obs_dim=self._obs_dim,
+                    actions_dim=self._actions_dim,
+                    actions_ub=self._actions_ub,
+                    actions_lb=self._actions_lb,
+                    device=self._torch_device,
+                    dtype=self._torch_dtype,
+                    layer_width=self._layer_width_actor,
+                    n_hidden_layers=self._n_hidden_layers_actor,
+                    add_weight_norm=self._add_weight_norm,
+                    add_layer_norm=self._add_layer_norm,
+                    add_batch_norm=self._add_batch_norm,
+                    )
+
+        if (not self._is_eval) or self._load_qf: # just needed for training or during eval
+            # for debug, if enabled
+            self.qf1 = CriticQ(obs_dim=self._obs_dim,
+                    actions_dim=self._actions_dim,
+                    device=self._torch_device,
+                    dtype=self._torch_dtype,
+                    layer_width=self._layer_width_critic,
+                    n_hidden_layers=self._n_hidden_layers_critic,
+                    add_weight_norm=self._add_weight_norm,
+                    add_layer_norm=self._add_layer_norm,
+                    add_batch_norm=self._add_batch_norm)
+            self.qf1_target = CriticQ(obs_dim=self._obs_dim,
+                    actions_dim=self._actions_dim,
+                    device=self._torch_device,
+                    dtype=self._torch_dtype,
+                    layer_width=self._layer_width_critic,
+                    n_hidden_layers=self._n_hidden_layers_critic,
+                    add_weight_norm=self._add_weight_norm,
+                    add_layer_norm=self._add_layer_norm,
+                    add_batch_norm=self._add_batch_norm)
+            
+            self.qf2 = CriticQ(obs_dim=self._obs_dim,
+                    actions_dim=self._actions_dim,
+                    device=self._torch_device,
+                    dtype=self._torch_dtype,
+                    layer_width=self._layer_width_critic,
+                    n_hidden_layers=self._n_hidden_layers_critic,
+                    add_weight_norm=self._add_weight_norm,
+                    add_layer_norm=self._add_layer_norm,
+                    add_batch_norm=self._add_batch_norm)
+            self.qf2_target = CriticQ(obs_dim=self._obs_dim,
+                    actions_dim=self._actions_dim,
+                    device=self._torch_device,
+                    dtype=self._torch_dtype,
+                    layer_width=self._layer_width_critic,
+                    n_hidden_layers=self._n_hidden_layers_critic,
+                    add_weight_norm=self._add_weight_norm,
+                    add_layer_norm=self._add_layer_norm,
+                    add_batch_norm=self._add_batch_norm)
+        
+            self.qf1_target.load_state_dict(self.qf1.state_dict())
+            self.qf2_target.load_state_dict(self.qf2.state_dict())
+
+        if self._use_torch_compile:
+            self.obs_running_norm=torch.compile(self.obs_running_norm)
+            self.actor = torch.compile(self.actor)
+            if (not self._is_eval) or self._load_qf:
+                self.qf1 = torch.compile(self.qf1)
+                self.qf2 = torch.compile(self.qf2)
+                self.qf1_target = torch.compile(self.qf1_target)
+                self.qf2_target = torch.compile(self.qf2_target)
+            
+    def reset(self, reset_stats: bool = False):
+        # we should just reinitialize the parameters, but for easiness
+        # we recreate the networks
+
+        # force deallocation of objects
+        import gc
+        del self.actor
+        del self.qf1
+        del self.qf2
+        del self.qf1_target
+        del self.qf2_target
+        gc.collect()
+
+        self._build_nets()
+
+        if reset_stats: # we also reinitialize obs norm
+            self._init_obs_norm()
+
+        # self.obs_running_norm.reset()
+
+    def layer_width_actor(self):
+        return self._layer_width_actor
+
+    def n_hidden_layers_actor(self):
+        return self._n_hidden_layers_actor
+
+    def layer_width_critic(self):
+        return self._layer_width_critic
+
+    def n_hidden_layers_critic(self):
+        return self._n_hidden_layers_critic
+
+    def get_impl_path(self):
+        import os 
+        return os.path.abspath(__file__)
+    
+    def update_obs_bnorm(self, x):
+        self.obs_running_norm.unfreeze()
+        self.obs_running_norm.manual_stat_update(x)
+        self.obs_running_norm.freeze()
+
+    def _obs_scaling_layer(self, x):
+        x=(x-self.obs_bias)
+        x=x/(self.obs_scale+self._rescaling_epsi)
+        return x
+    
+    def _preprocess_obs(self, x):
+        if self._rescale_obs:
+            x = self._obs_scaling_layer(x)
+        if self.obs_running_norm is not None:
+            x = self.obs_running_norm(x)
+        return x
+
+    def _preprocess_actions(self, a):
+        if self._use_action_rescale_for_critic:
+            a=self.actor.remove_scaling(a=a) # rescale to be in range [-1, 1]
+        return a
+
+    def get_action(self, x):
+        x = self._preprocess_obs(x)
+        return self.actor.get_action(x)
+    
+    def get_qf1_val(self, x, a):
+        x = self._preprocess_obs(x)
+        a = self._preprocess_actions(a)
+        return self.qf1(x, a)
+
+    def get_qf2_val(self, x, a):
+        x = self._preprocess_obs(x)
+        a = self._preprocess_actions(a)
+        return self.qf2(x, a)
+    
+    def get_qf1t_val(self, x, a):
+        x = self._preprocess_obs(x)
+        a = self._preprocess_actions(a)
+        return self.qf1_target(x, a)
+    
+    def get_qf2t_val(self, x, a):
+        x = self._preprocess_obs(x)
+        a = self._preprocess_actions(a)
+        return self.qf2_target(x, a)
+
+    def load_state_dict(self, param_dict):
+
+        missing, unexpected = super().load_state_dict(param_dict,
+            strict=False)
+        if not len(missing)==0:
+            Journal.log(self.__class__.__name__,
+                "load_state_dict",
+                f"These parameters are missing from the provided state dictionary: {str(missing)}\n",
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not len(unexpected)==0:
+            Journal.log(self.__class__.__name__,
+                "load_state_dict",
+                f"These parameters present in the provided state dictionary are not needed: {str(unexpected)}\n",
+                LogType.WARN)
+        
+        # sanity check on running normalizer
+        import re
+        running_norm_pattern = r"running_norm"
+        error=f"Found some keys in model state dictionary associated with a running normalizer. Are you running the agent with norm_obs=True?\n"
+        if any(re.match(running_norm_pattern, key) for key in unexpected):
+            Journal.log(self.__class__.__name__,
+                "load_state_dict",
+                error,
+                LogType.EXCEP,
+                throw_when_excep=True)
+
+class CriticQ(nn.Module):
+    def __init__(self,
+        obs_dim: int, 
+        actions_dim: int,
+        device: str = "cuda",
+        dtype = torch.float32,
+        layer_width: int = 512,
+        n_hidden_layers: int = 4,
+        add_weight_norm: bool = False,
+        add_layer_norm: bool = False,
+        add_batch_norm: bool = False):
+
+        super().__init__()
+
+        self._lrelu_slope=0.01
+
+        self._torch_device = device
+        self._torch_dtype = dtype
+
+        self._obs_dim = obs_dim
+        self._actions_dim = actions_dim
+        self._q_net_dim = self._obs_dim + self._actions_dim
+
+        self._first_hidden_layer_width=self._q_net_dim # fist layer fully connected and of same dim
+        # as input
+
+        init_type="kaiming_uniform" # maintains the variance of activations throughout the network
+        nonlinearity="leaky_relu" # suited for kaiming
+
+        # Input layer        
+        layers=llayer_init(
+            layer=nn.Linear(self._q_net_dim, self._first_hidden_layer_width),
+            init_type=init_type,
+            nonlinearity=nonlinearity,
+            a_leaky_relu=self._lrelu_slope,
+            device=self._torch_device,
+            dtype=self._torch_dtype,
+            add_weight_norm=add_weight_norm,
+            add_layer_norm=add_layer_norm,
+            add_batch_norm=add_batch_norm,
+            uniform_biases=False, # constant bias init
+            bias_const=0.0
+        )
+        layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+        
+        # Hidden layers
+        layers.extend(
+            llayer_init(
+                layer=nn.Linear(self._first_hidden_layer_width, layer_width),
+                init_type=init_type,
+                nonlinearity=nonlinearity,
+                a_leaky_relu=self._lrelu_slope,
+                device=self._torch_device,
+                dtype=self._torch_dtype,
+                add_weight_norm=add_weight_norm,
+                add_layer_norm=add_layer_norm,
+                add_batch_norm=add_batch_norm,
+                uniform_biases=False, # constant bias init
+                bias_const=0.0
+            )
+        )
+        layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+        for _ in range(n_hidden_layers - 1):
+            layers.extend(
+                llayer_init(
+                    layer=nn.Linear(layer_width, layer_width),
+                    init_type=init_type,
+                    nonlinearity=nonlinearity,
+                    a_leaky_relu=self._lrelu_slope,
+                    device=self._torch_device,
+                    dtype=self._torch_dtype,
+                    add_weight_norm=add_weight_norm,
+                    add_layer_norm=add_layer_norm,
+                    add_batch_norm=add_batch_norm,
+                    uniform_biases=False, # constant bias init
+                    bias_const=0.0
+                )
+            )
+            layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+
+        # Output layer
+        layers.extend(
+            llayer_init(
+                layer=nn.Linear(layer_width, 1),
+                init_type="uniform",
+                uniform_biases=False, # contact biases
+                bias_const=-0.1, # negative to prevent overestimation
+                scale_weight=1e-2,
+                device=self._torch_device,
+                dtype=self._torch_dtype,
+                add_weight_norm=False,
+                add_layer_norm=False,
+                add_batch_norm=False
+            )
+        )
+
+        # Creating the full sequential network
+        self._q_net = nn.Sequential(*layers)
+        self._q_net.to(self._torch_device).type(self._torch_dtype)
+
+        print("Critic architecture")
+        print(self._q_net)
+
+    def get_n_params(self):
+        return sum(p.numel() for p in self.parameters())
+
+    def forward(self, x, a):
+        x = torch.cat([x, a], dim=1)
+        return self._q_net(x)
+        
+class Actor(nn.Module):
+    def __init__(self,
+        obs_dim: int, 
+        actions_dim: int,
+        actions_ub: List[float] = None,
+        actions_lb: List[float] = None,
+        device: str = "cuda",
+        dtype = torch.float32,
+        layer_width: int = 256,
+        n_hidden_layers: int = 2,
+        add_weight_norm: bool = False,
+        add_layer_norm: bool = False,
+        add_batch_norm: bool = False):
+    
+        super().__init__()
+
+        self._lrelu_slope=0.01
+        
+        self._torch_device = device
+        self._torch_dtype = dtype
+
+        self._obs_dim = obs_dim
+        self._actions_dim = actions_dim
+        
+        self._first_hidden_layer_width=self._obs_dim # fist layer fully connected and of same dim
+    
+        # Action scale and bias
+        if actions_ub is None:
+            actions_ub = [1] * actions_dim
+        if actions_lb is None:
+            actions_lb = [-1] * actions_dim
+        if (len(actions_ub) != actions_dim):
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Actions ub list length should be equal to {actions_dim}, but got {len(actions_ub)}",
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if (len(actions_lb) != actions_dim):
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Actions lb list length should be equal to {actions_dim}, but got {len(actions_lb)}",
+                LogType.EXCEP,
+                throw_when_excep = True)
+
+        self._actions_ub = torch.tensor(actions_ub, dtype=self._torch_dtype, 
+                                device=self._torch_device)
+        self._actions_lb = torch.tensor(actions_lb, dtype=self._torch_dtype,
+                                device=self._torch_device)
+        action_scale = torch.full((actions_dim, ),
+                            fill_value=0.0,
+                            dtype=self._torch_dtype,
+                            device=self._torch_device)
+        action_scale[:] = (self._actions_ub-self._actions_lb)/2.0
+        self.register_buffer(
+            "action_scale", action_scale
+        )
+        actions_bias = torch.full((actions_dim, ),
+                            fill_value=0.0,
+                            dtype=self._torch_dtype,
+                            device=self._torch_device)
+        actions_bias[:] = (self._actions_ub+self._actions_lb)/2.0
+        self.register_buffer(
+            "action_bias", actions_bias)
+        
+        # Network configuration
+        self.LOG_STD_MAX = 2
+        self.LOG_STD_MIN = -5
+
+        # Input layer followed by hidden layers
+        layers=llayer_init(nn.Linear(self._obs_dim, self._first_hidden_layer_width), 
+                    init_type="kaiming_uniform",
+                    nonlinearity="leaky_relu",
+                    a_leaky_relu=self._lrelu_slope,
+                    device=self._torch_device, 
+                    dtype=self._torch_dtype,
+                    add_weight_norm=add_weight_norm,
+                    add_layer_norm=add_layer_norm,
+                    add_batch_norm=add_batch_norm,
+                    uniform_biases=False, # constant bias init
+                    bias_const=0.0
+                    )
+        layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+    
+        # Hidden layers
+        layers.extend(
+            llayer_init(nn.Linear(self._first_hidden_layer_width, layer_width), 
+                init_type="kaiming_uniform",
+                nonlinearity="leaky_relu",
+                a_leaky_relu=self._lrelu_slope,
+                device=self._torch_device,
+                dtype=self._torch_dtype,
+                add_weight_norm=add_weight_norm,
+                add_layer_norm=add_layer_norm,
+                add_batch_norm=add_batch_norm,
+                uniform_biases=False, # constant bias init
+                bias_const=0.0)
+        )
+        layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+        
+        for _ in range(n_hidden_layers - 1):
+            layers.extend(
+                llayer_init(nn.Linear(layer_width, layer_width), 
+                    init_type="kaiming_uniform",
+                    nonlinearity="leaky_relu",
+                    a_leaky_relu=self._lrelu_slope,
+                    device=self._torch_device,
+                    dtype=self._torch_dtype,
+                    add_weight_norm=add_weight_norm,
+                    add_layer_norm=add_layer_norm,
+                    add_batch_norm=add_batch_norm,
+                    uniform_biases=False, # constant bias init
+                    bias_const=0.0)            
+            )
+            layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)])
+        
+        # Sequential layers for the feature extractor
+        self._fc12 = nn.Sequential(*layers)
+
+        # Mean and log_std layers
+        out_fc_mean=llayer_init(nn.Linear(layer_width, self._actions_dim), 
+                        init_type="uniform",
+                        uniform_biases=False, # constant bias init
+                        bias_const=0.0,
+                        scale_weight=1e-3, # scaling (output layer)
+                        scale_bias=1.0,
+                        device=self._torch_device, 
+                        dtype=self._torch_dtype,
+                        add_weight_norm=False,
+                        add_layer_norm=False,
+                        add_batch_norm=False
+                        )
+        self.fc_mean = nn.Sequential(*out_fc_mean)
+        out_fc_logstd= llayer_init(nn.Linear(layer_width, self._actions_dim), 
+                        init_type="uniform",
+                        uniform_biases=False,
+                        bias_const=math.log(0.5),
+                        scale_weight=1e-3, # scaling (output layer)
+                        scale_bias=1.0,
+                        device=self._torch_device, 
+                        dtype=self._torch_dtype,
+                        add_weight_norm=False,
+                        add_layer_norm=False,
+                        add_batch_norm=False,
+                        )
+        self.fc_logstd = nn.Sequential(*out_fc_logstd)
+
+        # Move all components to the specified device and dtype
+        self._fc12.to(device=self._torch_device, dtype=self._torch_dtype)
+        self.fc_mean.to(device=self._torch_device, dtype=self._torch_dtype)
+        self.fc_logstd.to(device=self._torch_device, dtype=self._torch_dtype)
+
+        print("Actor architecture")
+        print(self._fc12)
+        print(self.fc_mean)
+        print(self.fc_logstd)
+
+    def get_n_params(self):
+        return sum(p.numel() for p in self.parameters())
+    
+    def forward(self, x):
+        x = self._fc12(x)
+        mean = self.fc_mean(x)
+        log_std = self.fc_logstd(x)
+        log_std = torch.tanh(log_std)
+        log_std = self.LOG_STD_MIN + 0.5 * (self.LOG_STD_MAX - self.LOG_STD_MIN) * (log_std + 1)  # From SpinUp / Denis Yarats
+        return mean, log_std
+
+    def get_action(self, x):
+        mean, log_std = self(x)
+        std = log_std.exp()
+        normal = torch.distributions.Normal(mean, std)
+        x_t = normal.rsample()  # Reparameterization trick (for SAC we neex action
+        # to be differentible since we use Q nets. Using sample() would break the
+        # comp. graph and not allow gradients to flow)
+        y_t = torch.tanh(x_t)
+        action = y_t * self.action_scale + self.action_bias
+        log_prob_vec = normal.log_prob(x_t) # per-dimension log prob before tanh
+        log_prob_vec = log_prob_vec - torch.log(self.action_scale * (1 - y_t.pow(2)) + 1e-6) # tanh Jacobian + scaling
+        log_prob_sum = log_prob_vec.sum(1, keepdim=True)
+        mean = torch.tanh(mean) * self.action_scale + self.action_bias
+        return action, (log_prob_sum, log_prob_vec), mean
+    
+    def remove_scaling(self, a):
+        return (a - self.action_bias)/self.action_scale
+
+if __name__ == "__main__":  
+    device = "cpu"  # or "cpu"
+    import time
+    obs_dim = 273
+    agent = SACAgent(
+        obs_dim=obs_dim,
+        actions_dim=10,
+        actions_lb=None,
+        actions_ub=None,
+        obs_lb=None,
+        obs_ub=None,
+        rescale_obs=False,
+        norm_obs=True,
+        use_action_rescale_for_critic=True,
+        is_eval=True,
+        compression_ratio=0.6,
+        layer_width_actor=128,
+        layer_width_critic=256,
+        n_hidden_layers_actor=3,
+        n_hidden_layers_critic=3,
+        device=device,
+        dtype=torch.float32,
+        add_weight_norm=True,
+        add_layer_norm=False,
+        add_batch_norm=False
+    )
+
+    n_samples = 10000
+    random_obs = torch.rand((1, obs_dim), dtype=torch.float32, device=device)
+
+    if device == "cuda":
+        torch.cuda.synchronize()
+    start = time.time()
+
+    for i in range(n_samples):
+        actions, _, mean = agent.get_action(x=random_obs)
+        actions = actions.detach()
+        actions[:, :] = mean.detach()
+
+    if device == "cuda":
+        torch.cuda.synchronize()
+    end = time.time()
+
+    avrg_eval_time = (end - start) / n_samples
+    print(f"Average policy evaluation time on {device}: {avrg_eval_time:.6f} s")
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sactor_critic_algo.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sactor_critic_algo.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a578cd8bbb4404b39ac47ebd3f178728bbef08f
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/sactor_critic_algo.py
@@ -0,0 +1,2397 @@
+from aug_mpc.agents.sactor_critic.sac import SACAgent
+from aug_mpc.agents.dummies.dummy import DummyAgent
+
+from aug_mpc.utils.shared_data.algo_infos import SharedRLAlgorithmInfo, QfVal, QfTrgt
+from aug_mpc.utils.shared_data.training_env import SubReturns, TotReturns
+from aug_mpc.utils.model_bundle import write_bundle_manifest
+from aug_mpc.utils.nn.rnd import RNDFull
+
+import torch 
+import torch.optim as optim
+import torch.nn as nn
+
+import random
+import math
+from typing import Dict
+
+import os
+import shutil
+
+import time
+
+import wandb
+import h5py
+import numpy as np
+
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+from EigenIPC.PyEigenIPC import VLevel
+
+from abc import ABC, abstractmethod
+
+class SActorCriticAlgoBase(ABC):
+
+    # base class for actor-critic RL algorithms
+     
+    def __init__(self,
+            env, 
+            debug = False,
+            remote_db = False,
+            seed: int = 1):
+
+        self._env = env 
+        self._seed = seed
+
+        self._eval = False
+        self._det_eval = True
+
+        self._full_env_db=False
+
+        self._agent = None 
+        
+        self._debug = debug
+        self._remote_db = remote_db
+
+        self._writer = None
+        
+        self._run_name = None
+        self._drop_dir = None
+        self._dbinfo_drop_fname = None
+        self._model_path = None
+        
+        self._policy_update_db_data_dict =  {}
+        self._custom_env_data_db_dict = {}
+        self._rnd_db_data_dict =  {}
+        self._hyperparameters = {}
+        self._wandb_d={}
+
+        # get params from env
+        self._get_params_from_env()
+
+        self._torch_device = torch.device("cpu") # defaults to cpu
+
+        self._setup_done = False
+
+        self._verbose = False
+
+        self._is_done = False
+        
+        self._shared_algo_data = None
+
+        self._this_child_path = None
+        self._this_basepath = os.path.abspath(__file__)
+    
+    def __del__(self):
+
+        self.done()
+
+    def _get_params_from_env(self):
+
+        self._env_name = self._env.name()
+        self._episodic_reward_metrics = self._env.ep_rewards_metrics()
+        self._use_gpu = self._env.using_gpu()
+        self._dtype = self._env.dtype()
+        self._env_opts=self._env.env_opts()
+        self._num_envs = self._env.n_envs()
+        self._obs_dim = self._env.obs_dim()
+        self._actions_dim = self._env.actions_dim()
+        self._episode_timeout_lb, self._episode_timeout_ub = self._env.episode_timeout_bounds()
+        self._task_rand_timeout_lb, self._task_rand_timeout_ub = self._env.task_rand_timeout_bounds()
+        self._env_n_action_reps = self._env.n_action_reps()
+        self._is_continuous_actions_bool=self._env.is_action_continuous()
+        self._is_continuous_actions=torch.where(self._is_continuous_actions_bool)[0]
+        self._is_discrete_actions_bool=self._env.is_action_discrete()
+        self._is_discrete_actions=torch.where(self._is_discrete_actions_bool)[0]
+
+        # default to all debug envs
+        self._db_env_selector=torch.tensor(list(range(0, self._num_envs)), dtype=torch.int)
+        self._db_env_selector_bool=torch.full((self._num_envs, ), 
+                dtype=torch.bool, device="cpu",
+                fill_value=True)
+        # default to no expl envs
+        self._expl_env_selector=None
+        self._expl_env_selector_bool=torch.full((self._num_envs, ), dtype=torch.bool, device="cpu",
+                fill_value=False)
+        self._pert_counter=0.0
+        # demo envs
+        self._demo_stop_thresh=None # performance metrics above which demo envs are deactivated
+        # (can be overridden thorugh the provided options)
+        self._demo_env_selector=self._env.demo_env_idxs()
+        self._demo_env_selector_bool=self._env.demo_env_idxs(get_bool=True)
+        
+    def learn(self):
+  
+        if not self._setup_done:
+            self._should_have_called_setup()
+
+        self._start_time = time.perf_counter()
+
+        # experience collection
+        with torch.no_grad(): # don't need grad computation here
+            for i in range(self._collection_freq):
+                if not self._collect_transition():
+                    return False
+                self._vec_transition_counter+=1
+        
+        self._collection_t = time.perf_counter()
+        
+        if self._vec_transition_counter % self._bnorm_vecfreq == 0:
+            with torch.no_grad(): # don't need grad computation here
+                self._update_batch_norm(bsize=self._bnorm_bsize)
+
+        # policy update
+        self._policy_update_t_start = time.perf_counter()
+        for i in range(self._update_freq):
+            self._update_policy()
+            self._update_counter+=1
+
+        self._policy_update_t = time.perf_counter()
+        
+        with torch.no_grad():
+            if self._validate and (self._vec_transition_counter % self._validation_db_vecstep_freq == 0):
+                # validation
+                self._update_validation_losses()
+            self._validation_t = time.perf_counter()
+            self._post_step()
+        
+        if self._use_period_resets:
+            # periodic policy resets
+            if not self._adaptive_resets:
+                self._periodic_resets_on=(self._vec_transition_counter >= self._reset_vecstep_start) and \
+                (self._vec_transition_counter < self._reset_vecstep_end)
+
+                if self._periodic_resets_on and \
+                    (self._vec_transition_counter-self._reset_vecstep_start) % self._periodic_resets_vecfreq == 0:
+                    self._reset_agent()
+            else: # trigger reset based on overfit metric
+                if self._overfit_idx > self._overfit_idx_thresh:
+                    self._reset_agent()
+
+        return not self.is_done()
+
+    def eval(self):
+
+        if not self._setup_done:
+            self._should_have_called_setup()
+
+        self._start_time = time.perf_counter()
+
+        if not self._collect_eval_transition():
+            return False
+        self._vec_transition_counter+=1
+
+        self._collection_t = time.perf_counter()
+        
+        self._post_step()
+
+        return not self.is_done()
+    
+    @abstractmethod
+    def _collect_transition(self)->bool:
+        pass
+    
+    @abstractmethod
+    def _collect_eval_transition(self)->bool:
+        pass
+
+    @abstractmethod
+    def _update_policy(self):
+        pass
+    
+    @abstractmethod
+    def _update_validation_losses(self):
+        pass
+
+    def _update_overfit_idx(self, loss, val_loss):
+        overfit_now=(val_loss-loss)/loss
+        self._overfit_idx=self._overfit_idx_alpha*overfit_now+\
+            (1-self._overfit_idx_alpha)*self._overfit_idx
+
+    def setup(self,
+            run_name: str,
+            ns: str,
+            custom_args: Dict = {},
+            verbose: bool = False,
+            drop_dir_name: str = None,
+            eval: bool = False,
+            resume: bool = False,
+            model_path: str = None,
+            n_eval_timesteps: int = None,
+            comment: str = "",
+            dump_checkpoints: bool = False,
+            norm_obs: bool = True,
+            rescale_obs: bool = False):
+
+        tot_tsteps=int(100e6)
+        if "tot_tsteps" in custom_args:
+            tot_tsteps=custom_args["tot_tsteps"]
+
+        self._verbose = verbose
+
+        self._ns=ns # only used for shared mem stuff
+
+        self._dump_checkpoints = dump_checkpoints
+        
+        self._init_algo_shared_data(static_params=self._hyperparameters) # can only handle dicts with
+        # numeric values
+
+        if "full_env_db" in custom_args:
+            self._full_env_db=custom_args["full_env_db"]
+        
+        self._eval = eval
+        self._resume=resume
+        if self._eval and self._resume: 
+            Journal.log(self.__class__.__name__,
+                "setup",
+                f"Cannot set both eval and resume to true. Exiting.",
+                LogType.EXCEP,
+                throw_when_excep = True)
+        
+        self._load_qf=False
+        if self._eval:
+            if "load_qf" in custom_args:
+                self._load_qf=custom_args["load_qf"]
+        if self._resume:
+            self._load_qf=True # must load qf when resuming
+            self._eval=False
+        try:
+            self._det_eval=custom_args["det_eval"]
+        except:
+            pass
+        
+        self._override_agent_actions=False
+        if "override_agent_actions" in custom_args:
+            self._override_agent_actions=custom_args["override_agent_actions"]
+
+        if self._override_agent_actions: # force evaluation mode
+            Journal.log(self.__class__.__name__,
+                "setup",
+                "will force evaluation mode since override_agent_actions was set to true",
+                LogType.INFO,
+                throw_when_excep = True)
+            self._eval=True
+            self._validate=False
+            self._load_qf=False
+            self._det_eval=False
+            self._resume=False
+
+        self._run_name = run_name
+        from datetime import datetime
+        self._time_id = datetime.now().strftime('d%Y_%m_%d_h%H_m%M_s%S')
+        self._unique_id = self._time_id + "-" + self._run_name
+
+        self._hyperparameters["unique_run_id"]=self._unique_id
+        self._hyperparameters.update(custom_args)
+
+        self._torch_device = torch.device("cuda" if torch.cuda.is_available() and self._use_gpu else "cpu")
+
+        try:
+            layer_width_actor=self._hyperparameters["actor_lwidth"]
+            layer_width_critic=self._hyperparameters["critic_lwidth"]
+            n_hidden_layers_actor=self._hyperparameters["actor_n_hlayers"]
+            n_hidden_layers_critic=self._hyperparameters["critic_n_hlayers"]
+        except:
+            layer_width_actor=256
+            layer_width_critic=512
+            n_hidden_layers_actor=2
+            n_hidden_layers_critic=4
+            pass
+
+        use_torch_compile=False
+        add_weight_norm=False
+        add_layer_norm=False
+        add_batch_norm=False
+        compression_ratio=-1.0
+        if "use_torch_compile" in self._hyperparameters and \
+            self._hyperparameters["use_torch_compile"]:
+            use_torch_compile=True
+        if "add_weight_norm" in self._hyperparameters and \
+            self._hyperparameters["add_weight_norm"]:
+            add_weight_norm=True
+        if "add_layer_norm" in self._hyperparameters and \
+            self._hyperparameters["add_layer_norm"]:
+            add_layer_norm=True
+        if "add_batch_norm" in self._hyperparameters and \
+            self._hyperparameters["add_batch_norm"]:
+            add_batch_norm=True
+        if "compression_ratio" in self._hyperparameters:
+            compression_ratio=self._hyperparameters["compression_ratio"]
+
+        act_rescale_critic=False
+        if "act_rescale_critic" in self._hyperparameters:
+            act_rescale_critic=self._hyperparameters["act_rescale_critic"]
+        if not self._override_agent_actions:
+            self._agent = SACAgent(obs_dim=self._env.obs_dim(),
+                        obs_ub=self._env.get_obs_ub().flatten().tolist(),
+                        obs_lb=self._env.get_obs_lb().flatten().tolist(),
+                        actions_dim=self._env.actions_dim(),
+                        actions_ub=None, # agent will assume actions are properly normalized in [-1, 1] by the env
+                        actions_lb=None,
+                        rescale_obs=rescale_obs,
+                        norm_obs=norm_obs,
+                        use_action_rescale_for_critic=act_rescale_critic,
+                        compression_ratio=compression_ratio,
+                        device=self._torch_device,
+                        dtype=self._dtype,
+                        is_eval=self._eval,
+                        load_qf=self._load_qf,
+                        debug=self._debug,
+                        layer_width_actor=layer_width_actor,
+                        layer_width_critic=layer_width_critic,
+                        n_hidden_layers_actor=n_hidden_layers_actor,
+                        n_hidden_layers_critic=n_hidden_layers_critic,
+                        torch_compile=use_torch_compile,
+                        add_weight_norm=add_weight_norm,
+                        add_layer_norm=add_layer_norm,
+                        add_batch_norm=add_batch_norm)
+        else: # we use a fake agent
+            self._agent = DummyAgent(obs_dim=self._env.obs_dim(),
+                    actions_dim=self._env.actions_dim(),
+                    actions_ub=None,
+                    actions_lb=None,
+                    device=self._torch_device,
+                    dtype=self._dtype,
+                    debug=self._debug)
+        
+        # loging actual widths and layers in case they were override inside agent init
+        self._hyperparameters["actor_lwidth_actual"]=self._agent.layer_width_actor()
+        self._hyperparameters["actor_n_hlayers_actual"]=self._agent.n_hidden_layers_actor()
+        self._hyperparameters["critic_lwidth_actual"]=self._agent.layer_width_critic()
+        self._hyperparameters["critic_n_hlayers_actual"]=self._agent.n_hidden_layers_critic()
+
+        # load model if necessary 
+        if self._eval and (not self._override_agent_actions): # load pretrained model
+            if model_path is None:
+                msg = f"No model path provided in eval mode! Was this intentional? \
+                    No jnt remapping will be available and a randomly init agent will be used."
+                Journal.log(self.__class__.__name__,
+                    "setup",
+                    msg,
+                    LogType.WARN,
+                    throw_when_excep = True)
+            if  n_eval_timesteps is None:
+                Journal.log(self.__class__.__name__,
+                    "setup",
+                    f"When eval is True, n_eval_timesteps should be provided!!",
+                    LogType.EXCEP,
+                    throw_when_excep = True)
+            # everything is ok 
+            self._model_path = model_path
+            if self._model_path is not None:
+                self._load_model(self._model_path)
+
+            # overwrite init params
+            self._init_params(tot_tsteps=n_eval_timesteps,
+                custom_args=custom_args)
+        else:
+            if self._resume:
+                if model_path is None:
+                    msg = f"No model path provided in resume mode! Please provide a valid checkpoint path."
+                    Journal.log(self.__class__.__name__,
+                        "setup",
+                        msg,
+                        LogType.EXCEP,
+                        throw_when_excep = True)
+            self._model_path = model_path
+            if self._model_path is not None:
+                self._load_model(self._model_path) # load model from checkpoint (including q functions and running normalizers)
+            self._init_params(tot_tsteps=tot_tsteps,
+                custom_args=custom_args)
+        
+        # adding additional db info
+        self._hyperparameters["obs_names"]=self._env.obs_names()
+        self._hyperparameters["action_names"]=self._env.action_names()
+        self._hyperparameters["sub_reward_names"]=self._env.sub_rew_names()
+        self._hyperparameters["sub_trunc_names"]=self._env.sub_trunc_names()
+        self._hyperparameters["sub_term_names"]=self._env.sub_term_names()
+
+        self._allow_expl_during_eval=False
+        if "allow_expl_during_eval" in self._hyperparameters:
+            self._allow_expl_during_eval=self._hyperparameters["allow_expl_during_eval"]
+    
+        # reset environment
+        self._env.reset()
+        if self._eval:
+            self._env.switch_random_reset(on=False)
+
+        if self._debug and (not self._override_agent_actions):
+            with torch.no_grad():
+                init_obs = self._env.get_obs(clone=True)
+                _, init_log_pi, _ = self._agent.get_action(init_obs)
+                init_policy_entropy = (-init_log_pi[0]).mean().item()
+                init_policy_entropy_per_action = init_policy_entropy / float(self._actions_dim)
+            Journal.log(self.__class__.__name__,
+                "setup",
+                f"Initial policy entropy per action: {init_policy_entropy_per_action:.4f})",
+                LogType.INFO,
+                throw_when_excep = True)
+
+        # create dump directory + copy important files for debug
+        self._init_drop_dir(drop_dir_name)
+        self._hyperparameters["drop_dir"]=self._drop_dir
+
+        # add env options to hyperparameters
+        self._hyperparameters.update(self._env_opts) 
+
+        if not self._eval:
+            self._init_agent_optimizers()
+
+            self._init_replay_buffers() # only needed when training
+            if self._validate:
+                self._init_validation_buffers() 
+        
+        if self._autotune:
+            self._init_alpha_autotuning()
+        
+        if self._use_rnd:
+            self._rnd_net = RNDFull(input_dim=self._rnd_indim, output_dim=self._rnd_outdim,
+                layer_width=self._rnd_lwidth, n_hidden_layers=self._rnd_hlayers,
+                device=self._torch_device,
+                dtype=self._dtype,
+                normalize=norm_obs # normalize if also used for SAC agent
+                )
+            self._rnd_optimizer = torch.optim.Adam(self._rnd_net.rnd_predictor_net.parameters(), 
+                                    lr=self._rnd_lr)
+            
+            self._rnd_input = torch.full(size=(self._batch_size, self._rnd_net.input_dim()),
+                    fill_value=0.0,
+                    dtype=self._dtype,
+                    device=self._torch_device,
+                    requires_grad=False) 
+            self._rnd_bnorm_input = torch.full(size=(self._bnorm_bsize, self._rnd_net.input_dim()),
+                    fill_value=0.0,
+                    dtype=self._dtype,
+                    device=self._torch_device,
+                    requires_grad=False) 
+
+            self._proc_exp_bonus_all = torch.full(size=(self._batch_size, 1),
+                    fill_value=0.0,
+                    dtype=self._dtype,
+                    device=self._torch_device,
+                    requires_grad=False) 
+            self._raw_exp_bonus_all = torch.full(size=(self._batch_size, 1),
+                    fill_value=0.0,
+                    dtype=self._dtype,
+                    device=self._torch_device,
+                    requires_grad=False) 
+        # if self._autotune_rnd_scale:
+            # self._reward_normalizer=RunningNormalizer((1,), epsilon=1e-8, 
+            #                     device=self._torch_device, dtype=self._dtype, 
+            #                     freeze_stats=False,
+            #                     debug=self._debug)
+
+        self._init_dbdata()
+
+        if (self._debug):
+            if self._remote_db:
+                job_type = "evaluation" if self._eval else "training"
+                project="IBRIDO-ablations"
+                wandb.init(
+                    project=project,
+                    group=self._run_name,
+                    name=self._unique_id,
+                    id=self._unique_id,
+                    job_type=job_type,
+                    # tags=None,
+                    notes=comment,
+                    resume="never", # do not allow runs with the same unique id
+                    mode="online", # "online", "offline" or "disabled"
+                    entity=None,
+                    sync_tensorboard=True,
+                    config=self._hyperparameters,
+                    monitor_gym=True,
+                    save_code=True,
+                    dir=self._drop_dir
+                )
+                wandb.watch((self._agent), log="all", log_freq=1000, log_graph=False)
+        
+        if "demo_stop_thresh" in self._hyperparameters:
+            self._demo_stop_thresh=self._hyperparameters["demo_stop_thresh"]
+
+        actions = self._env.get_actions()
+        self._random_uniform = torch.full_like(actions, fill_value=0.0) # used for sampling random actions (preallocated
+        # for efficiency)
+        self._random_normal = torch.full_like(self._random_uniform,fill_value=0.0)
+        # for efficiency)
+        
+        self._actions_override=None            
+        if self._override_agent_actions:
+            from aug_mpc.utils.shared_data.training_env import Actions
+            self._actions_override = Actions(namespace=ns+"_override",
+            n_envs=self._num_envs,
+            action_dim=actions.shape[1],
+            action_names=self._env.action_names(),
+            env_names=None,
+            is_server=True,
+            verbose=self._verbose,
+            vlevel=VLevel.V2,
+            safe=True,
+            force_reconnection=True,
+            with_gpu_mirror=self._use_gpu,
+            fill_value=0.0)
+            self._actions_override.run()
+
+        self._start_time_tot = time.perf_counter()
+
+        self._start_time = time.perf_counter()
+
+        self._replay_bf_full = False
+        self._validation_bf_full = False
+        self._bpos=0
+        self._bpos_val=0
+
+        self._is_done = False
+        self._setup_done = True
+
+    def is_done(self):
+
+        return self._is_done 
+    
+    def model_path(self):
+
+        return self._model_path
+
+    def _init_params(self,
+            tot_tsteps: int,
+            custom_args: Dict = {}):
+    
+        self._collection_freq=1
+        self._update_freq=4
+
+        self._replay_buffer_size_vec=10*self._task_rand_timeout_ub # cover at least a number of eps            
+        self._replay_buffer_size = self._replay_buffer_size_vec*self._num_envs
+        if self._replay_buffer_size_vec < 0: # in case env did not properly define _task_rand_timeout_ub
+            self._replay_buffer_size = int(1e6)
+            self._replay_buffer_size_vec = self._replay_buffer_size//self._num_envs
+            self._replay_buffer_size=self._replay_buffer_size_vec*self._num_envs
+
+        self._batch_size = 16394
+
+        new_transitions_per_batch=self._collection_freq*self._num_envs/self._replay_buffer_size # assumes uniform sampling
+        self._utd_ratio=self._update_freq/(new_transitions_per_batch*self._batch_size)
+
+        self._lr_policy = 1e-3
+        self._lr_q = 5e-4 
+
+        self._discount_factor = 0.99
+        if "discount_factor" in custom_args:
+            self._discount_factor=custom_args["discount_factor"]
+
+        self._smoothing_coeff = 0.01
+
+        self._policy_freq = 2
+        self._trgt_net_freq = 1
+        self._rnd_freq = 1
+
+        # exploration
+
+        # entropy regularization (separate "discrete" and "continuous" actions)
+        self._entropy_metric_high = 0.5
+        self._entropy_metric_low = 0.0
+
+        # self._entropy_disc_start = -0.05
+        # self._entropy_disc_end = -0.5
+
+        # self._entropy_cont_start = -0.05
+        # self._entropy_cont_end = -2.0
+
+        self._entropy_disc_start = -0.3
+        self._entropy_disc_end = -0.3
+
+        self._entropy_cont_start = -0.5
+        self._entropy_cont_end = -0.5
+
+        # enable/disable entropy annealing (default: enabled)
+        self._anneal_entropy = False
+        
+        self._trgt_avrg_entropy_per_action_disc = self._entropy_disc_start
+        self._trgt_avrg_entropy_per_action_cont = self._entropy_cont_start
+
+        self._disc_idxs = self._is_discrete_actions.clone().to(torch.long)
+        self._cont_idxs = self._is_continuous_actions.clone().to(torch.long)
+        
+        self._target_entropy_disc = float(self._disc_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_disc)
+        self._target_entropy_cont = float(self._cont_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_cont)
+        self._target_entropy = self._target_entropy_disc + self._target_entropy_cont
+        self._trgt_avrg_entropy_per_action = self._target_entropy / float(max(self._actions_dim, 1))
+        self._hyperparameters["anneal_entropy"] = self._anneal_entropy
+
+        self._autotune = True
+        self._alpha_disc = 0.2 # initial values
+        self._alpha_cont = 0.2
+        self._alpha = 0.5*(self._alpha_disc + self._alpha_cont)
+        self._log_alpha_disc = math.log(self._alpha_disc)
+        self._log_alpha_cont = math.log(self._alpha_cont)
+        self._a_optimizer_disc = None
+        self._a_optimizer_cont = None
+
+        # random expl ens
+        self._expl_envs_perc=0.0 # [0, 1]
+        if "expl_envs_perc" in custom_args:
+            self._expl_envs_perc=custom_args["expl_envs_perc"]
+        self._n_expl_envs = int(self._num_envs*self._expl_envs_perc) # n of random envs on which noisy actions will be applied
+        self._noise_freq_vec = 100 # substeps
+        self._noise_duration_vec = 5 # should be less than _noise_freq
+        # correct with env substepping
+        self._noise_freq_vec=self._noise_freq_vec//self._env_n_action_reps
+        self._noise_duration_vec=self._noise_duration_vec//self._env_n_action_reps
+        
+        self._continuous_act_expl_noise_std=0.3 # wrt actions scale
+        self._discrete_act_expl_noise_std=1.2 # setting it a bit > 1 helps in ensuring discr. actions range is explored
+        
+        # rnd
+        self._use_rnd=True
+        self._rnd_net=None
+        self._rnd_optimizer = None
+        self._rnd_lr = 1e-3
+        if "use_rnd" in custom_args and (not self._eval):
+            self._use_rnd=custom_args["use_rnd"]
+        self._rnd_weight=1.0
+        self._alpha_rnd=0.0
+        self._novelty_scaler=None
+        if self._use_rnd:
+            from adarl.utils.NoveltyScaler import NoveltyScaler
+
+            self._novelty_scaler=NoveltyScaler(th_device=self._torch_device,
+                                    bonus_weight=self._rnd_weight,
+                                    avg_alpha=self._alpha_rnd)
+        
+        self._rnd_lwidth=512
+        self._rnd_hlayers=3
+        self._rnd_outdim=16
+        self._rnd_indim=self._obs_dim+self._actions_dim
+
+        # batch normalization
+        self._bnorm_bsize = 4096
+        self._bnorm_vecfreq_nom = 5 # wrt vec steps
+        # make sure _bnorm_vecfreq is a multiple of _collection_freq
+        self._bnorm_vecfreq = (self._bnorm_vecfreq_nom//self._collection_freq)*self._collection_freq
+        if self._bnorm_vecfreq == 0:
+            self._bnorm_vecfreq=self._collection_freq
+        self._reward_normalizer=None
+
+        self._total_timesteps = int(tot_tsteps)
+        # self._total_timesteps = self._total_timesteps//self._env_n_action_reps # correct with n of action reps
+        self._total_timesteps_vec = self._total_timesteps // self._num_envs
+        self._total_steps = self._total_timesteps_vec//self._collection_freq
+        self._total_timesteps_vec = self._total_steps*self._collection_freq # correct to be a multiple of self._total_steps
+        self._total_timesteps = self._total_timesteps_vec*self._num_envs # actual n transitions
+
+        # self._warmstart_timesteps = int(5e3)
+        warmstart_length_single_env=min(self._episode_timeout_lb, self._episode_timeout_ub, 
+            self._task_rand_timeout_lb, self._task_rand_timeout_ub)
+        self._warmstart_timesteps=warmstart_length_single_env*self._num_envs
+        if self._warmstart_timesteps < self._batch_size: # ensure we collect sufficient experience before
+            # starting training
+            self._warmstart_timesteps=4*self._batch_size
+        self._warmstart_vectimesteps = self._warmstart_timesteps//self._num_envs
+        # ensuring multiple of collection_freq
+        self._warmstart_timesteps = self._num_envs*self._warmstart_vectimesteps # actual
+        
+        # period nets resets (for tackling the primacy bias)
+        self._use_period_resets=False
+        if "use_period_resets" in custom_args:
+            self._use_period_resets=custom_args["use_period_resets"]
+        self._adaptive_resets=True # trigger reset based on overfit metric
+        self._just_one_reset=False
+        self._periodic_resets_freq=int(4e6)
+        self._periodic_resets_start=int(1.5e6)
+        self._periodic_resets_end=int(0.8*self._total_timesteps)
+
+        self._periodic_resets_vecfreq=self._periodic_resets_freq//self._num_envs
+        self._periodic_resets_vecfreq = (self._periodic_resets_vecfreq//self._collection_freq)*self._collection_freq
+        self._reset_vecstep_start=self._periodic_resets_start//self._num_envs
+        self._reset_vecstep_end=self._periodic_resets_end//self._num_envs
+
+        if self._just_one_reset:
+            # we set the end as the fist reset + a fraction of the reset frequency (this way only one reset will happen)
+            self._reset_vecstep_end=int(self._reset_vecstep_start+0.8*self._periodic_resets_vecfreq)
+
+        self._periodic_resets_on=False
+
+        # debug
+        self._m_checkpoint_freq_nom = 1e6 # n totoal timesteps after which a checkpoint model is dumped
+        self._m_checkpoint_freq= self._m_checkpoint_freq_nom//self._num_envs
+
+        # expl envs
+        if self._n_expl_envs>0 and ((self._num_envs-self._n_expl_envs)>0): # log data only from envs which are not altered (e.g. by exploration noise)
+            # computing expl env selector
+            self._expl_env_selector = torch.randperm(self._num_envs, device="cpu")[:self._n_expl_envs]
+            self._expl_env_selector_bool[self._expl_env_selector]=True
+
+        # demo envs
+        if self._demo_env_selector_bool is None:
+            self._db_env_selector_bool[:]=~self._expl_env_selector_bool
+        else: # we log db data separately for env which are neither for demo nor for random exploration
+            self._demo_env_selector_bool=self._demo_env_selector_bool.cpu()
+            self._demo_env_selector=self._demo_env_selector.cpu()
+            self._db_env_selector_bool[:]=torch.logical_and(~self._expl_env_selector_bool, ~self._demo_env_selector_bool)
+                
+        self._n_expl_envs = self._expl_env_selector_bool.count_nonzero()
+        self._num_db_envs = self._db_env_selector_bool.count_nonzero()
+
+        if not self._num_db_envs>0:
+            Journal.log(self.__class__.__name__,
+                "_init_params",
+                "No indipendent db env can be computed (check your demo and expl settings)! Will use all envs.",
+                LogType.EXCEP,
+                throw_when_excep = False)
+            self._num_db_envs=self._num_envs
+            self._db_env_selector_bool[:]=True
+        self._db_env_selector=torch.nonzero(self._db_env_selector_bool).flatten()
+        
+        self._transition_noise_freq=float(self._noise_duration_vec)/float(self._noise_freq_vec)
+        self._env_noise_freq=float(self._n_expl_envs)/float(self._num_envs)
+        self._noise_buff_freq=self._transition_noise_freq*self._env_noise_freq
+
+        self._db_vecstep_frequency = 32 # log db data every n (vectorized) SUB timesteps
+        self._db_vecstep_frequency=round(self._db_vecstep_frequency/self._env_n_action_reps) # correcting with actions reps 
+        # correct db vecstep frequency to ensure it's a multiple of self._collection_freq
+        self._db_vecstep_frequency=(self._db_vecstep_frequency//self._collection_freq)*self._collection_freq
+        if self._db_vecstep_frequency == 0:
+            self._db_vecstep_frequency=self._collection_freq
+
+        self._env_db_checkpoints_vecfreq=150*self._db_vecstep_frequency # detailed db data from envs
+
+        self._validate=True
+        self._validation_collection_vecfreq=50 # add vec transitions to val buffer with some vec freq
+        self._validation_ratio=1.0/self._validation_collection_vecfreq # [0, 1], 0.1 10% size of training buffer
+        self._validation_buffer_size_vec = int(self._replay_buffer_size*self._validation_ratio)//self._num_envs
+        self._validation_buffer_size = self._validation_buffer_size_vec*self._num_envs
+        self._validation_batch_size = int(self._batch_size*self._validation_ratio)
+        self._validation_db_vecstep_freq=self._db_vecstep_frequency
+        if self._eval: # no need for validation transitions during evaluation
+            self._validate=False
+        self._overfit_idx=0.0
+        self._overfit_idx_alpha=0.03 # exponential MA
+        self._overfit_idx_thresh=2.0
+
+        self._n_policy_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq #TD3 delayed update
+        self._n_qf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq # qf updated at each vec timesteps
+        self._n_tqf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq//self._trgt_net_freq
+
+        self._exp_to_policy_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_policy_updates_to_be_done)
+        self._exp_to_qf_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_qf_updates_to_be_done)
+        self._exp_to_qft_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_tqf_updates_to_be_done)
+
+        self._db_data_size = round(self._total_timesteps_vec/self._db_vecstep_frequency)+self._db_vecstep_frequency
+        
+        # write them to hyperparam dictionary for debugging
+        self._hyperparameters["n_envs"] = self._num_envs
+        self._hyperparameters["obs_dim"] = self._obs_dim
+        self._hyperparameters["actions_dim"] = self._actions_dim
+
+        self._hyperparameters["seed"] = self._seed
+        self._hyperparameters["using_gpu"] = self._use_gpu
+        self._hyperparameters["total_timesteps_vec"] = self._total_timesteps_vec
+
+        self._hyperparameters["collection_freq"]=self._collection_freq
+        self._hyperparameters["update_freq"]=self._update_freq
+        self._hyperparameters["total_steps"]=self._total_steps
+        
+        self._hyperparameters["utd_ratio"] = self._utd_ratio
+        
+        self._hyperparameters["n_policy_updates_when_done"] = self._n_policy_updates_to_be_done
+        self._hyperparameters["n_qf_updates_when_done"] = self._n_qf_updates_to_be_done
+        self._hyperparameters["n_tqf_updates_when_done"] = self._n_tqf_updates_to_be_done
+        self._hyperparameters["experience_to_policy_grad_steps_ratio"] = self._exp_to_policy_grad_ratio
+        self._hyperparameters["experience_to_quality_fun_grad_steps_ratio"] = self._exp_to_qf_grad_ratio
+        self._hyperparameters["experience_to_trgt_quality_fun_grad_steps_ratio"] = self._exp_to_qft_grad_ratio
+
+        self._hyperparameters["episodes timeout lb"] = self._episode_timeout_lb
+        self._hyperparameters["episodes timeout ub"] = self._episode_timeout_ub
+        self._hyperparameters["task rand timeout lb"] = self._task_rand_timeout_lb
+        self._hyperparameters["task rand timeout ub"] = self._task_rand_timeout_ub
+        
+        self._hyperparameters["warmstart_timesteps"] = self._warmstart_timesteps
+        self._hyperparameters["warmstart_vectimesteps"] = self._warmstart_vectimesteps
+        self._hyperparameters["replay_buffer_size"] = self._replay_buffer_size
+        self._hyperparameters["batch_size"] = self._batch_size
+        self._hyperparameters["total_timesteps"] = self._total_timesteps
+        self._hyperparameters["lr_policy"] = self._lr_policy
+        self._hyperparameters["lr_q"] = self._lr_q
+        self._hyperparameters["discount_factor"] = self._discount_factor
+        self._hyperparameters["smoothing_coeff"] = self._smoothing_coeff
+        self._hyperparameters["policy_freq"] = self._policy_freq
+        self._hyperparameters["trgt_net_freq"] = self._trgt_net_freq
+        self._hyperparameters["autotune"] = self._autotune
+        self._hyperparameters["target_entropy"] = self._target_entropy
+        self._hyperparameters["target_entropy_disc"] = self._target_entropy_disc
+        self._hyperparameters["target_entropy_cont"] = self._target_entropy_cont
+        self._hyperparameters["disc_entropy_idxs"] = self._disc_idxs.tolist()
+        self._hyperparameters["cont_entropy_idxs"] = self._cont_idxs.tolist()
+        self._hyperparameters["log_alpha_disc"] = None if self._log_alpha_disc is None else self._log_alpha_disc
+        self._hyperparameters["log_alpha_cont"] = None if self._log_alpha_cont is None else self._log_alpha_cont
+        self._hyperparameters["alpha"] = self._alpha
+        self._hyperparameters["alpha_disc"] = self._alpha_disc
+        self._hyperparameters["alpha_cont"] = self._alpha_cont
+        self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq
+        self._hyperparameters["db_vecstep_frequency"] = self._db_vecstep_frequency
+        self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq
+
+        self._hyperparameters["use_period_resets"]= self._use_period_resets
+        self._hyperparameters["just_one_reset"]= self._just_one_reset
+        self._hyperparameters["period_resets_vecfreq"]= self._periodic_resets_vecfreq
+        self._hyperparameters["period_resets_vecstart"]= self._reset_vecstep_start
+        self._hyperparameters["period_resets_vecend"]= self._reset_vecstep_end
+        self._hyperparameters["period_resets_freq"]= self._periodic_resets_freq
+        self._hyperparameters["period_resets_start"]= self._periodic_resets_start
+        self._hyperparameters["period_resets_end"]= self._periodic_resets_end
+
+        self._hyperparameters["use_rnd"] = self._use_rnd
+        self._hyperparameters["rnd_lwidth"] = self._rnd_lwidth
+        self._hyperparameters["rnd_hlayers"] = self._rnd_hlayers
+        self._hyperparameters["rnd_outdim"] = self._rnd_outdim
+        self._hyperparameters["rnd_indim"] = self._rnd_indim
+
+        self._hyperparameters["n_db_envs"] = self._num_db_envs
+        self._hyperparameters["n_expl_envs"] = self._n_expl_envs
+        self._hyperparameters["noise_freq"] = self._noise_freq_vec
+        self._hyperparameters["noise_duration_vec"] = self._noise_duration_vec
+        self._hyperparameters["noise_buff_freq"] = self._noise_buff_freq
+        self._hyperparameters["continuous_act_expl_noise_std"] = self._continuous_act_expl_noise_std
+        self._hyperparameters["discrete_act_expl_noise_std"] = self._discrete_act_expl_noise_std
+
+        self._hyperparameters["n_demo_envs"] = self._env.n_demo_envs()
+        
+        self._hyperparameters["bnorm_bsize"] = self._bnorm_bsize
+        self._hyperparameters["bnorm_vecfreq"] = self._bnorm_vecfreq
+        
+        self._hyperparameters["validate"] = self._validate
+        self._hyperparameters["validation_ratio"] = self._validation_ratio
+        self._hyperparameters["validation_buffer_size_vec"] = self._validation_buffer_size_vec
+        self._hyperparameters["validation_buffer_size"] = self._validation_buffer_size
+        self._hyperparameters["validation_batch_size"] = self._validation_batch_size
+        self._hyperparameters["validation_collection_vecfreq"] = self._validation_collection_vecfreq
+
+        # small debug log
+        info = f"\nUsing \n" + \
+            f"total (vectorized) timesteps to be simulated {self._total_timesteps_vec}\n" + \
+            f"total timesteps to be simulated {self._total_timesteps}\n" + \
+            f"warmstart timesteps {self._warmstart_timesteps}\n" + \
+            f"training replay buffer size {self._replay_buffer_size}\n" + \
+            f"training replay buffer vec size {self._replay_buffer_size_vec}\n" + \
+            f"training batch size {self._batch_size}\n" + \
+            f"validation enabled {self._validate}\n" + \
+            f"validation buffer size {self._validation_buffer_size}\n" + \
+            f"validation buffer vec size {self._validation_buffer_size_vec}\n" + \
+            f"validation collection freq {self._validation_collection_vecfreq}\n" + \
+            f"validation update freq {self._validation_db_vecstep_freq}\n" + \
+            f"validation batch size {self._validation_batch_size}\n" + \
+            f"policy update freq {self._policy_freq}\n" + \
+            f"target networks freq {self._trgt_net_freq}\n" + \
+            f"episode timeout max steps {self._episode_timeout_ub}\n" + \
+            f"episode timeout min steps {self._episode_timeout_lb}\n" + \
+            f"task rand. max n steps {self._task_rand_timeout_ub}\n" + \
+            f"task rand. min n steps {self._task_rand_timeout_lb}\n" + \
+            f"number of action reps {self._env_n_action_reps}\n" + \
+            f"total policy updates to be performed: {self._n_policy_updates_to_be_done}\n" + \
+            f"total q fun updates to be performed: {self._n_qf_updates_to_be_done}\n" + \
+            f"total trgt q fun updates to be performed: {self._n_tqf_updates_to_be_done}\n" + \
+            f"experience to policy grad ratio: {self._exp_to_policy_grad_ratio}\n" + \
+            f"experience to q fun grad ratio: {self._exp_to_qf_grad_ratio}\n" + \
+            f"experience to trgt q fun grad ratio: {self._exp_to_qft_grad_ratio}\n" + \
+            f"amount of noisy transitions in replay buffer: {self._noise_buff_freq*100}% \n"
+        db_env_idxs=", ".join(map(str, self._db_env_selector.tolist()))
+        n_db_envs_str=f"db envs {self._num_db_envs}/{self._num_envs} \n" 
+        info=info + n_db_envs_str + "Debug env. indexes are [" + db_env_idxs+"]\n"
+        if self._env.demo_env_idxs() is not None:
+            demo_idxs_str=", ".join(map(str, self._env.demo_env_idxs().tolist()))
+            n_demo_envs_str=f"demo envs {self._env.n_demo_envs()}/{self._num_envs} \n" 
+            info=info + n_demo_envs_str + "Demo env. indexes are [" + demo_idxs_str+"]\n"
+        if self._expl_env_selector is not None:
+            random_expl_idxs=", ".join(map(str, self._expl_env_selector.tolist()))
+            n_expl_envs_str=f"expl envs {self._n_expl_envs}/{self._num_envs} \n" 
+            info=info + n_expl_envs_str + "Random exploration env. indexes are [" + random_expl_idxs+"]\n"
+        
+        Journal.log(self.__class__.__name__,
+            "_init_params",
+            info,
+            LogType.INFO,
+            throw_when_excep = True)
+        
+        # init counters
+        self._step_counter = 0
+        self._vec_transition_counter = 0
+        self._update_counter = 0
+        self._log_it_counter = 0
+
+    def _init_dbdata(self):
+
+        # initalize some debug data
+        self._collection_dt = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")
+        self._collection_t = -1.0
+
+        self._env_step_fps = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")
+        self._env_step_rt_factor = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")
+        
+        self._batch_norm_update_dt = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")
+        
+        self._policy_update_t_start = -1.0
+        self._policy_update_t = -1.0
+        self._policy_update_dt = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")
+        self._policy_update_fps = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")
+        
+        self._validation_t = -1.0
+        self._validation_dt = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")
+        
+        self._n_of_played_episodes = torch.full((self._db_data_size, 1), 
+                    dtype=torch.int32, fill_value=0, device="cpu")
+        self._n_timesteps_done = torch.full((self._db_data_size, 1), 
+                    dtype=torch.int32, fill_value=0, device="cpu")
+        self._n_policy_updates = torch.full((self._db_data_size, 1), 
+                    dtype=torch.int32, fill_value=0, device="cpu")
+        self._n_qfun_updates = torch.full((self._db_data_size, 1), 
+                    dtype=torch.int32, fill_value=0, device="cpu")
+        self._n_tqfun_updates = torch.full((self._db_data_size, 1), 
+                    dtype=torch.int32, fill_value=0, device="cpu")
+        self._elapsed_min = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=0.0, device="cpu")        
+        
+        self._ep_tsteps_env_distribution = torch.full((self._db_data_size, self._num_db_envs, 1), 
+                    dtype=torch.int32, fill_value=-1, device="cpu")
+
+        self._reward_names = self._episodic_reward_metrics.reward_names()
+        self._reward_names_str = "[" + ', '.join(self._reward_names) + "]"
+        self._n_rewards = self._episodic_reward_metrics.n_rewards()
+
+        # db environments
+        self._tot_rew_max = torch.full((self._db_data_size, self._num_db_envs, 1), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._tot_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, 1), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._tot_rew_min = torch.full((self._db_data_size, self._num_db_envs, 1), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._tot_rew_max_over_envs = torch.full((self._db_data_size, 1, 1), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._tot_rew_min_over_envs = torch.full((self._db_data_size, 1, 1), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._tot_rew_avrg_over_envs = torch.full((self._db_data_size, 1, 1), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._tot_rew_std_over_envs = torch.full((self._db_data_size, 1, 1), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        
+        self._sub_rew_max = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._sub_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._sub_rew_min = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._sub_rew_max_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._sub_rew_min_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._sub_rew_avrg_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._sub_rew_std_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), 
+            dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        
+        # custom data from env # (log data just from db envs for simplicity)
+        self._custom_env_data = {}
+        db_data_names = list(self._env.custom_db_data.keys())
+        for dbdatan in db_data_names: # loop thorugh custom data
+            
+            self._custom_env_data[dbdatan] = {}
+
+            max = self._env.custom_db_data[dbdatan].get_max(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1)
+            avrg = self._env.custom_db_data[dbdatan].get_avrg(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1)
+            min = self._env.custom_db_data[dbdatan].get_min(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1)
+            max_over_envs = self._env.custom_db_data[dbdatan].get_max_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+            min_over_envs = self._env.custom_db_data[dbdatan].get_min_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+            avrg_over_envs = self._env.custom_db_data[dbdatan].get_avrg_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+            std_over_envs = self._env.custom_db_data[dbdatan].get_std_over_envs(env_selector=self._db_env_selector).reshape(1, -1)
+
+            self._custom_env_data[dbdatan]["max"] =torch.full((self._db_data_size, 
+                max.shape[0], 
+                max.shape[1]), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._custom_env_data[dbdatan]["avrg"] =torch.full((self._db_data_size, 
+                avrg.shape[0], 
+                avrg.shape[1]), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._custom_env_data[dbdatan]["min"] =torch.full((self._db_data_size, 
+                min.shape[0], 
+                min.shape[1]), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._custom_env_data[dbdatan]["max_over_envs"] =torch.full((self._db_data_size, 
+                max_over_envs.shape[0], 
+                max_over_envs.shape[1]), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._custom_env_data[dbdatan]["min_over_envs"] =torch.full((self._db_data_size, 
+                min_over_envs.shape[0], 
+                min_over_envs.shape[1]), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._custom_env_data[dbdatan]["avrg_over_envs"] =torch.full((self._db_data_size, 
+                avrg_over_envs.shape[0], 
+                avrg_over_envs.shape[1]), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._custom_env_data[dbdatan]["std_over_envs"] =torch.full((self._db_data_size, 
+                std_over_envs.shape[0], 
+                std_over_envs.shape[1]), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            
+        # exploration envs
+        if self._n_expl_envs > 0:
+            self._ep_tsteps_expl_env_distribution = torch.full((self._db_data_size, self._n_expl_envs, 1), 
+                    dtype=torch.int32, fill_value=-1, device="cpu")
+
+            # also log sub rewards metrics for exploration envs
+            self._sub_rew_max_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_avrg_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_min_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_max_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_min_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_avrg_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_std_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        
+        # demo environments
+        self._demo_envs_active = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._demo_perf_metric = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        if self._env.demo_env_idxs() is not None:
+            n_demo_envs=self._env.demo_env_idxs().shape[0]
+
+            self._ep_tsteps_demo_env_distribution = torch.full((self._db_data_size, n_demo_envs, 1), 
+                    dtype=torch.int32, fill_value=-1, device="cpu")
+
+            # also log sub rewards metrics for exploration envs
+            self._sub_rew_max_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_avrg_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_min_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_max_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_min_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_avrg_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._sub_rew_std_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), 
+                dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            
+        # algorithm-specific db info
+        self._qf1_vals_mean = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf2_vals_mean = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._min_qft_vals_mean = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf1_vals_std = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf2_vals_std = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._min_qft_vals_std = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf1_vals_max = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf1_vals_min = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf2_vals_max = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf2_vals_min = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        
+        self._qf1_loss = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._qf2_loss = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._actor_loss= torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._alpha_loss = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._alpha_loss_disc = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._alpha_loss_cont = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        if self._validate: # add db data for validation losses
+            self._overfit_index = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._qf1_loss_validation = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._qf2_loss_validation = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._actor_loss_validation= torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._alpha_loss_validation = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._alpha_loss_disc_validation = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._alpha_loss_cont_validation = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        
+        self._alphas = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._alphas_disc = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._alphas_cont = torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+        self._policy_entropy_mean=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_std=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_max=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_min=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_disc_mean=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_disc_std=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_disc_max=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_disc_min=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_cont_mean=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_cont_std=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_cont_max=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+        self._policy_entropy_cont_min=torch.full((self._db_data_size, 1), 
+                    dtype=torch.float32, fill_value=torch.nan, device="cpu")
+
+        self._running_mean_obs=None
+        self._running_std_obs=None
+        if self._agent.obs_running_norm is not None and not self._eval:
+            # some db data for the agent
+            self._running_mean_obs = torch.full((self._db_data_size, self._env.obs_dim()), 
+                        dtype=torch.float32, fill_value=0.0, device="cpu")
+            self._running_std_obs = torch.full((self._db_data_size, self._env.obs_dim()), 
+                        dtype=torch.float32, fill_value=0.0, device="cpu")
+
+        # RND
+        self._rnd_loss=None
+        if self._use_rnd:
+            self._rnd_loss = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            
+            self._expl_bonus_raw_avrg = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._expl_bonus_raw_std = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            # self._expl_bonus_raw_min = torch.full((self._db_data_size, 1), 
+            #             dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            # self._expl_bonus_raw_max = torch.full((self._db_data_size, 1), 
+            #             dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            
+            self._expl_bonus_proc_avrg = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            self._expl_bonus_proc_std = torch.full((self._db_data_size, 1), 
+                        dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            # self._expl_bonus_proc_min = torch.full((self._db_data_size, 1), 
+            #             dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            # self._expl_bonus_proc_max = torch.full((self._db_data_size, 1), 
+            #             dtype=torch.float32, fill_value=torch.nan, device="cpu")
+            
+            self._n_rnd_updates = torch.full((self._db_data_size, 1), 
+                    dtype=torch.int32, fill_value=0, device="cpu")
+            self._running_mean_rnd_input = None
+            self._running_std_rnd_input = None
+            if self._rnd_net.obs_running_norm is not None:
+                self._running_mean_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), 
+                        dtype=torch.float32, fill_value=0.0, device="cpu")
+                self._running_std_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), 
+                        dtype=torch.float32, fill_value=0.0, device="cpu")
+    
+    def _init_agent_optimizers(self):
+        self._qf_optimizer = optim.Adam(list(self._agent.qf1.parameters()) + list(self._agent.qf2.parameters()), 
+                                    lr=self._lr_q)
+        self._actor_optimizer = optim.Adam(list(self._agent.actor.parameters()), 
+                                lr=self._lr_policy)
+
+    def _init_alpha_autotuning(self):
+        self._log_alpha_disc = torch.full((1,), fill_value=math.log(self._alpha_disc), requires_grad=True, device=self._torch_device)
+        self._log_alpha_cont = torch.full((1,), fill_value=math.log(self._alpha_cont), requires_grad=True, device=self._torch_device)
+        self._alpha_disc = self._log_alpha_disc.exp().item()
+        self._alpha_cont = self._log_alpha_cont.exp().item()
+        self._alpha = 0.5*(self._alpha_disc + self._alpha_cont)
+        self._a_optimizer_disc = optim.Adam([self._log_alpha_disc], lr=self._lr_q)
+        self._a_optimizer_cont = optim.Adam([self._log_alpha_cont], lr=self._lr_q)
+
+    def _init_replay_buffers(self):
+        
+        self._bpos = 0
+
+        self._obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False) 
+        self._actions = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._actions_dim),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False)
+        self._rewards = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False)
+        self._next_obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False) 
+        self._next_terminal = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1),
+                        fill_value=False,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False)
+
+    def _init_validation_buffers(self):
+        
+        self._bpos_val = 0
+
+        self._obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False) 
+        self._actions_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._actions_dim),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False)
+        self._rewards_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False)
+        self._next_obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim),
+                        fill_value=torch.nan,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False) 
+        self._next_terminal_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1),
+                        fill_value=False,
+                        dtype=self._dtype,
+                        device=self._torch_device,
+                        requires_grad=False)
+        
+    def _save_model(self,
+            is_checkpoint: bool = False):
+
+        path = self._model_path
+        if is_checkpoint: # use iteration as id
+            path = path + "_checkpoint" + str(self._log_it_counter)
+        info = f"Saving model to {path}"
+        Journal.log(self.__class__.__name__,
+            "_save_model",
+            info,
+            LogType.INFO,
+            throw_when_excep = True)
+        agent_state_dict=self._agent.state_dict()
+        if not self._eval: # training
+            # we log the joints which were observed during training
+            observed_joints=self._env.get_observed_joints()
+            if observed_joints is not None:
+                agent_state_dict["observed_jnts"]=self._env.get_observed_joints()
+
+        torch.save(agent_state_dict, path) # saves whole agent state
+        # torch.save(self._agent.parameters(), path) # only save agent parameters
+        info = f"Done."
+        Journal.log(self.__class__.__name__,
+            "_save_model",
+            info,
+            LogType.INFO,
+            throw_when_excep = True)
+    
+    def _dump_env_checkpoints(self):
+
+        path = self._env_db_checkpoints_fname+str(self._log_it_counter)
+
+        if path is not None:
+            info = f"Saving env db checkpoint data to {path}"
+            Journal.log(self.__class__.__name__,
+                "_dump_env_checkpoints",
+                info,
+                LogType.INFO,
+                throw_when_excep = True)
+
+            with h5py.File(path+".hdf5", 'w') as hf:
+
+                for key, value in self._hyperparameters.items():
+                    if value is None:
+                        value = "None"
+                    hf.attrs[key] = value
+                    
+                # full training envs
+                sub_rew_full=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._db_env_selector)
+                tot_rew_full=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._db_env_selector)
+
+                if self._n_expl_envs > 0:
+                    sub_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._expl_env_selector)
+                    tot_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._expl_env_selector)
+                if self._env.n_demo_envs() > 0:
+                    sub_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._demo_env_selector)
+                    tot_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._demo_env_selector)
+
+                ep_vec_freq=self._episodic_reward_metrics.ep_vec_freq() # assuming all db data was collected with the same ep_vec_freq
+
+                hf.attrs['sub_reward_names'] = self._reward_names
+                hf.attrs['log_iteration'] = self._log_it_counter
+                hf.attrs['n_timesteps_done'] = self._n_timesteps_done[self._log_it_counter]
+                hf.attrs['n_policy_updates'] = self._n_policy_updates[self._log_it_counter]
+                hf.attrs['elapsed_min'] = self._elapsed_min[self._log_it_counter]
+                hf.attrs['ep_vec_freq'] = ep_vec_freq
+                hf.attrs['n_expl_envs'] = self._n_expl_envs
+                hf.attrs['n_demo_envs'] = self._env.n_demo_envs()
+
+                # first dump custom db data names
+                db_data_names = list(self._env.custom_db_data.keys())
+                for db_dname in db_data_names:
+                    episodic_data_names = self._env.custom_db_data[db_dname].data_names()
+                    var_name = db_dname
+                    hf.attrs[var_name+"_data_names"] = episodic_data_names
+                            
+                for ep_idx in range(ep_vec_freq): # create separate datasets for each episode
+                    ep_prefix=f'ep_{ep_idx}_'
+
+                    # rewards
+                    hf.create_dataset(ep_prefix+'sub_rew', 
+                        data=sub_rew_full[ep_idx, :, :, :])
+                    hf.create_dataset(ep_prefix+'tot_rew', 
+                        data=tot_rew_full[ep_idx, :, :, :])
+                    if self._n_expl_envs > 0:
+                        hf.create_dataset(ep_prefix+'sub_rew_expl', 
+                            data=sub_rew_full_expl[ep_idx, :, :, :])
+                        hf.create_dataset(ep_prefix+'tot_rew_expl', 
+                            data=tot_rew_full_expl[ep_idx, :, :, :])
+                        hf.create_dataset('expl_env_selector', data=self._expl_env_selector.cpu().numpy())
+                    if self._env.n_demo_envs() > 0:
+                        hf.create_dataset(ep_prefix+'sub_rew_demo', 
+                            data=sub_rew_full_demo)
+                        hf.create_dataset(ep_prefix+'tot_rew_demo', 
+                            data=tot_rew_full_demo[ep_idx, :, :, :])
+                        hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().cpu().numpy())
+
+                    # dump all custom env data
+                    db_data_names = list(self._env.custom_db_data.keys())
+                    for db_dname in db_data_names:
+                        episodic_data=self._env.custom_db_data[db_dname]
+                        var_name = db_dname
+                        hf.create_dataset(ep_prefix+var_name, 
+                            data=episodic_data.get_full_episodic_data(env_selector=self._db_env_selector)[ep_idx, :, :, :])
+                        if self._n_expl_envs > 0:
+                            hf.create_dataset(ep_prefix+var_name+"_expl", 
+                                data=episodic_data.get_full_episodic_data(env_selector=self._expl_env_selector)[ep_idx, :, :, :])
+                        if self._env.n_demo_envs() > 0:
+                            hf.create_dataset(ep_prefix+var_name+"_demo", 
+                                data=episodic_data.get_full_episodic_data(env_selector=self._demo_env_selector)[ep_idx, :, :, :])
+                
+            Journal.log(self.__class__.__name__,
+                "_dump_env_checkpoints",
+                "done.",
+                LogType.INFO,
+                throw_when_excep = True)
+        
+    def done(self):
+        
+        if not self._is_done:
+
+            if not self._eval:
+                self._save_model()
+            
+            self._dump_dbinfo_to_file()
+            
+            if self._full_env_db:
+                self._dump_env_checkpoints()
+
+            if self._shared_algo_data is not None:
+                self._shared_algo_data.write(dyn_info_name=["is_done"],
+                    val=[1.0])
+                self._shared_algo_data.close() # close shared memory
+
+            self._env.close()
+
+            self._is_done = True
+
+    def _dump_dbinfo_to_file(self):
+
+        info = f"Dumping debug info at {self._dbinfo_drop_fname}"
+        Journal.log(self.__class__.__name__,
+            "_dump_dbinfo_to_file",
+            info,
+            LogType.INFO,
+            throw_when_excep = True)
+        
+        with h5py.File(self._dbinfo_drop_fname+".hdf5", 'w') as hf:
+            n_valid = int(max(0, min(self._log_it_counter, self._db_data_size)))
+
+            def _slice_valid(arr):
+                if isinstance(arr, torch.Tensor):
+                    return arr[:n_valid]
+                if isinstance(arr, np.ndarray):
+                    return arr[:n_valid]
+                return arr
+
+            def _ds(name, arr):
+                data = _slice_valid(arr)
+                hf.create_dataset(name, data=data.numpy() if isinstance(data, torch.Tensor) else data)
+
+            # hf.create_dataset('numpy_data', data=numpy_data)
+            # Write dictionaries to HDF5 as attributes
+            for key, value in self._hyperparameters.items():
+                if value is None:
+                    value = "None"
+                hf.attrs[key] = value
+            
+            # rewards
+            hf.create_dataset('sub_reward_names', data=self._reward_names, 
+                dtype='S40') 
+            _ds('sub_rew_max', self._sub_rew_max)
+            _ds('sub_rew_avrg', self._sub_rew_avrg)
+            _ds('sub_rew_min', self._sub_rew_min)
+            _ds('sub_rew_max_over_envs', self._sub_rew_max_over_envs)
+            _ds('sub_rew_min_over_envs', self._sub_rew_min_over_envs)
+            _ds('sub_rew_avrg_over_envs', self._sub_rew_avrg_over_envs)
+            _ds('sub_rew_std_over_envs', self._sub_rew_std_over_envs)
+
+            _ds('tot_rew_max', self._tot_rew_max)
+            _ds('tot_rew_avrg', self._tot_rew_avrg)
+            _ds('tot_rew_min', self._tot_rew_min)
+            _ds('tot_rew_max_over_envs', self._tot_rew_max_over_envs)
+            _ds('tot_rew_min_over_envs', self._tot_rew_min_over_envs)
+            _ds('tot_rew_avrg_over_envs', self._tot_rew_avrg_over_envs)
+            _ds('tot_rew_std_over_envs', self._tot_rew_std_over_envs)
+
+            _ds('ep_tsteps_env_distr', self._ep_tsteps_env_distribution)
+
+            if self._n_expl_envs > 0:
+                # expl envs
+                _ds('sub_rew_max_expl', self._sub_rew_max_expl)
+                _ds('sub_rew_avrg_expl', self._sub_rew_avrg_expl)
+                _ds('sub_rew_min_expl', self._sub_rew_min_expl)
+                _ds('sub_rew_max_over_envs_expl', self._sub_rew_max_over_envs_expl)
+                _ds('sub_rew_min_over_envs_expl', self._sub_rew_min_over_envs_expl)
+                _ds('sub_rew_avrg_over_envs_expl', self._sub_rew_avrg_over_envs_expl)
+                _ds('sub_rew_std_over_envs_expl', self._sub_rew_std_over_envs_expl)
+
+                _ds('ep_timesteps_expl_env_distr', self._ep_tsteps_expl_env_distribution)
+                
+                hf.create_dataset('expl_env_selector', data=self._expl_env_selector.numpy())
+                
+            _ds('demo_envs_active', self._demo_envs_active)
+            _ds('demo_perf_metric', self._demo_perf_metric)
+            
+            if self._env.n_demo_envs() > 0:
+                # demo envs
+                _ds('sub_rew_max_demo', self._sub_rew_max_demo)
+                _ds('sub_rew_avrg_demo', self._sub_rew_avrg_demo)
+                _ds('sub_rew_min_demo', self._sub_rew_min_demo)
+                _ds('sub_rew_max_over_envs_demo', self._sub_rew_max_over_envs_demo)
+                _ds('sub_rew_min_over_envs_demo', self._sub_rew_min_over_envs_demo)
+                _ds('sub_rew_avrg_over_envs_demo', self._sub_rew_avrg_over_envs_demo)
+                _ds('sub_rew_std_over_envs_demo', self._sub_rew_std_over_envs_demo)
+
+                _ds('ep_timesteps_demo_env_distr', self._ep_tsteps_demo_env_distribution)
+                
+                hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().numpy())
+
+            # profiling data
+            _ds('env_step_fps', self._env_step_fps)
+            _ds('env_step_rt_factor', self._env_step_rt_factor)
+            _ds('collection_dt', self._collection_dt)
+            _ds('batch_norm_update_dt', self._batch_norm_update_dt)
+            _ds('policy_update_dt', self._policy_update_dt)
+            _ds('policy_update_fps', self._policy_update_fps)
+            _ds('validation_dt', self._validation_dt)
+            
+            _ds('n_of_played_episodes', self._n_of_played_episodes)
+            _ds('n_timesteps_done', self._n_timesteps_done)
+            _ds('n_policy_updates', self._n_policy_updates)
+            _ds('n_qfun_updates', self._n_qfun_updates)
+            _ds('n_tqfun_updates', self._n_tqfun_updates)
+            
+            _ds('elapsed_min', self._elapsed_min)
+
+            # algo data 
+            _ds('qf1_vals_mean', self._qf1_vals_mean)
+            _ds('qf2_vals_mean', self._qf2_vals_mean)
+            _ds('qf1_vals_std', self._qf1_vals_std)
+            _ds('qf2_vals_std', self._qf2_vals_std)
+            _ds('qf1_vals_max', self._qf1_vals_max)
+            _ds('qf1_vals_min', self._qf1_vals_min)
+            _ds('qf2_vals_max', self._qf2_vals_max)
+            _ds('qf2_vals_min', self._qf1_vals_min)
+
+            _ds('min_qft_vals_mean', self._min_qft_vals_mean)
+            _ds('min_qft_vals_std', self._min_qft_vals_std)
+            
+            _ds('qf1_loss', self._qf1_loss)
+            _ds('qf2_loss', self._qf2_loss)
+            _ds('actor_loss', self._actor_loss)
+            _ds('alpha_loss', self._alpha_loss)
+            _ds('alpha_loss_disc', self._alpha_loss_disc)
+            _ds('alpha_loss_cont', self._alpha_loss_cont)
+            if self._validate:
+                _ds('qf1_loss_validation', self._qf1_loss_validation)
+                _ds('qf2_loss_validation', self._qf2_loss_validation)
+                _ds('actor_loss_validation', self._actor_loss_validation)
+                _ds('alpha_loss_validation', self._alpha_loss_validation)
+                _ds('alpha_loss_disc_validation', self._alpha_loss_disc_validation)
+                _ds('alpha_loss_cont_validation', self._alpha_loss_cont_validation)
+                _ds('overfit_index', self._overfit_index)
+
+            _ds('alphas', self._alphas)
+            _ds('alphas_disc', self._alphas_disc)
+            _ds('alphas_cont', self._alphas_cont)
+            
+            _ds('policy_entropy_mean', self._policy_entropy_mean)
+            _ds('policy_entropy_std', self._policy_entropy_std)
+            _ds('policy_entropy_max', self._policy_entropy_max)
+            _ds('policy_entropy_min', self._policy_entropy_min)
+            _ds('policy_entropy_disc_mean', self._policy_entropy_disc_mean)
+            _ds('policy_entropy_disc_std', self._policy_entropy_disc_std)
+            _ds('policy_entropy_disc_max', self._policy_entropy_disc_max)
+            _ds('policy_entropy_disc_min', self._policy_entropy_disc_min)
+            _ds('policy_entropy_cont_mean', self._policy_entropy_cont_mean)
+            _ds('policy_entropy_cont_std', self._policy_entropy_cont_std)
+            _ds('policy_entropy_cont_max', self._policy_entropy_cont_max)
+            _ds('policy_entropy_cont_min', self._policy_entropy_cont_min)
+            hf.create_dataset('target_entropy', data=self._target_entropy)
+            hf.create_dataset('target_entropy_disc', data=self._target_entropy_disc)
+            hf.create_dataset('target_entropy_cont', data=self._target_entropy_cont)
+
+            if self._use_rnd:
+                _ds('n_rnd_updates', self._n_rnd_updates)
+                _ds('expl_bonus_raw_avrg', self._expl_bonus_raw_avrg)
+                _ds('expl_bonus_raw_std', self._expl_bonus_raw_std)
+                _ds('expl_bonus_proc_avrg', self._expl_bonus_proc_avrg)
+                _ds('expl_bonus_proc_std', self._expl_bonus_proc_std)
+
+                if self._rnd_net.obs_running_norm is not None:
+                    if self._running_mean_rnd_input is not None:
+                        _ds('running_mean_rnd_input', self._running_mean_rnd_input)
+                    if self._running_std_rnd_input is not None:
+                        _ds('running_std_rnd_input', self._running_std_rnd_input)
+
+            # dump all custom env data  
+            db_data_names = list(self._env.custom_db_data.keys())
+            for db_dname in db_data_names:
+                data=self._custom_env_data[db_dname]
+                subnames = list(data.keys())
+                for subname in subnames:
+                    var_name = db_dname + "_" + subname
+                    _ds(var_name, data[subname])
+            
+            # other data
+            if self._agent.obs_running_norm is not None:
+                if self._running_mean_obs is not None:
+                    _ds('running_mean_obs', self._running_mean_obs)
+                if self._running_std_obs is not None:
+                    _ds('running_std_obs', self._running_std_obs)
+            
+        info = f"done."
+        Journal.log(self.__class__.__name__,
+            "_dump_dbinfo_to_file",
+            info,
+            LogType.INFO,
+            throw_when_excep = True)
+
+    def _load_model(self,
+            model_path: str):
+        
+        info = f"Loading model at {model_path}"
+
+        Journal.log(self.__class__.__name__,
+            "_load_model",
+            info,
+            LogType.INFO,
+            throw_when_excep = True)
+        model_dict=torch.load(model_path, 
+                    map_location=self._torch_device,
+                    weights_only=False) 
+        
+        observed_joints=self._env.get_observed_joints()
+        if not ("observed_jnts" in model_dict):
+            Journal.log(self.__class__.__name__,
+            "_load_model",
+            "No observed joints key found in loaded model dictionary! Let's hope joints are ordered in the same way.",
+            LogType.WARN)
+        else:
+            required_joints=model_dict["observed_jnts"]
+            self._check_observed_joints(observed_joints,required_joints)
+
+        self._agent.load_state_dict(model_dict)
+
+        if self._eval:
+            self._switch_training_mode(False)
+
+    def _check_observed_joints(self,
+            observed_joints,
+            required_joints):
+
+        observed=set(observed_joints)
+        required=set(required_joints)
+
+        all_required_joints_avail = required.issubset(observed)
+        if not all_required_joints_avail:
+            missing=[item for item in required if item not in observed]
+            missing_str=', '.join(missing)
+            Journal.log(self.__class__.__name__,
+                "_check_observed_joints",
+                f"not all required joints are available. Missing {missing_str}",
+                LogType.EXCEP,
+                throw_when_excep = True)
+        exceeding=observed-required
+        if not len(exceeding)==0:
+            # do not support having more joints than the required
+            exc_jnts=" ".join(list(exceeding))
+            Journal.log(self.__class__.__name__,
+                "_check_observed_joints",
+                f"more than the required joints found in the observed joint: {exc_jnts}",
+                LogType.EXCEP,
+                throw_when_excep = True)
+        
+        # here we are sure that required and observed sets match
+        self._to_agent_jnt_remap=None
+        if not required_joints==observed_joints:
+            Journal.log(self.__class__.__name__,
+                "_check_observed_joints",
+                f"required jnt obs from agent have different ordering from observed ones. Will compute a remapping.",
+                LogType.WARN,
+                throw_when_excep = True)
+            self._to_agent_jnt_remap = [observed_joints.index(element) for element in required_joints]
+        
+        self._env.set_jnts_remapping(remapping= self._to_agent_jnt_remap)
+
+    def drop_dir(self):
+        return self._drop_dir
+        
+    def _init_drop_dir(self,
+                drop_dir_name: str = None):
+
+        # main drop directory
+        if drop_dir_name is None:
+            # drop to current directory
+            self._drop_dir = "./" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id
+        else:
+            self._drop_dir = drop_dir_name + "/" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id
+        os.makedirs(self._drop_dir)
+        
+        self._env_db_checkpoints_dropdir=None
+        self._env_db_checkpoints_fname=None
+        if self._full_env_db>0:
+            self._env_db_checkpoints_dropdir=self._drop_dir+"/env_db_checkpoints"
+            self._env_db_checkpoints_fname = self._env_db_checkpoints_dropdir + \
+                "/" + self._unique_id + "_env_db_checkpoint"
+            os.makedirs(self._env_db_checkpoints_dropdir)
+        # model
+        if not self._eval or (self._model_path is None):
+            self._model_path = self._drop_dir + "/" + self._unique_id + "_model"
+        else: # we copy the model under evaluation to the drop dir
+            shutil.copy(self._model_path, self._drop_dir)
+
+        # debug info
+        self._dbinfo_drop_fname = self._drop_dir + "/" + self._unique_id + "db_info" # extension added later
+
+        # other auxiliary db files
+        aux_drop_dir = self._drop_dir + "/other"
+        os.makedirs(aux_drop_dir)
+        filepaths = self._env.get_file_paths() # envs implementation
+        filepaths.append(self._this_basepath) # algorithm implementation
+        filepaths.append(self._this_child_path)
+        filepaths.append(self._agent.get_impl_path()) # agent implementation
+        for file in filepaths:
+            shutil.copy(file, self._drop_dir)
+        aux_dirs = self._env.get_aux_dir()
+        for aux_dir in aux_dirs:
+            shutil.copytree(aux_dir, aux_drop_dir, dirs_exist_ok=True)
+
+        write_bundle_manifest(
+            bundle_dir=self._drop_dir,
+            checkpoint_file=os.path.basename(self._model_path),
+        )
+    
+    def _get_performance_metric(self):
+        # to be overridden
+        return 0.0
+
+    def _post_step(self):
+        
+        self._collection_dt[self._log_it_counter] += \
+            (self._collection_t-self._start_time)
+        self._batch_norm_update_dt[self._log_it_counter] += \
+            (self._policy_update_t_start-self._collection_t)
+        self._policy_update_dt[self._log_it_counter] += \
+            (self._policy_update_t - self._policy_update_t_start)
+        if self._validate:
+            self._validation_dt[self._log_it_counter] += \
+            (self._validation_t - self._policy_update_t)
+
+        self._step_counter+=1 # counts algo steps
+        
+        self._demo_envs_active[self._log_it_counter]=self._env.demo_active()
+        self._demo_perf_metric[self._log_it_counter]=self._get_performance_metric()
+        if self._env.n_demo_envs() > 0 and (self._demo_stop_thresh is not None):
+            # check if deactivation condition applies
+            self._env.switch_demo(active=self._demo_perf_metric[self._log_it_counter]<self._demo_stop_thresh)
+        
+        if self._vec_transition_counter % self._db_vecstep_frequency == 0:
+            # only log data every n timesteps 
+            
+            self._env_step_fps[self._log_it_counter] = (self._db_vecstep_frequency*self._num_envs)/ self._collection_dt[self._log_it_counter]
+            if "substepping_dt" in self._hyperparameters:
+                self._env_step_rt_factor[self._log_it_counter] = self._env_step_fps[self._log_it_counter]*self._env_n_action_reps*self._hyperparameters["substepping_dt"]
+
+            self._n_timesteps_done[self._log_it_counter]=self._vec_transition_counter*self._num_envs
+            
+            self._n_policy_updates[self._log_it_counter]+=self._n_policy_updates[self._log_it_counter-1]
+            self._n_qfun_updates[self._log_it_counter]+=self._n_qfun_updates[self._log_it_counter-1]
+            self._n_tqfun_updates[self._log_it_counter]+=self._n_tqfun_updates[self._log_it_counter-1]
+            if self._use_rnd:
+                self._n_rnd_updates[self._log_it_counter]+=self._n_rnd_updates[self._log_it_counter-1]
+            self._policy_update_fps[self._log_it_counter] = (self._n_policy_updates[self._log_it_counter]-\
+                self._n_policy_updates[self._log_it_counter-1])/self._policy_update_dt[self._log_it_counter]
+
+            self._elapsed_min[self._log_it_counter] = (time.perf_counter() - self._start_time_tot)/60.0
+
+            self._n_of_played_episodes[self._log_it_counter] = self._episodic_reward_metrics.get_n_played_episodes(env_selector=self._db_env_selector)
+
+            self._ep_tsteps_env_distribution[self._log_it_counter, :]=\
+                self._episodic_reward_metrics.step_counters(env_selector=self._db_env_selector)*self._env_n_action_reps
+
+            # updating episodic reward metrics
+            # debug environments
+            self._tot_rew_max[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_tot_rew_max(env_selector=self._db_env_selector)
+            self._tot_rew_avrg[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_tot_rew_avrg(env_selector=self._db_env_selector)
+            self._tot_rew_min[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_tot_rew_min(env_selector=self._db_env_selector)
+            self._tot_rew_max_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_tot_rew_max_over_envs(env_selector=self._db_env_selector)
+            self._tot_rew_min_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_tot_rew_min_over_envs(env_selector=self._db_env_selector)
+            self._tot_rew_avrg_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_tot_rew_avrg_over_envs(env_selector=self._db_env_selector)
+            self._tot_rew_std_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_tot_rew_std_over_envs(env_selector=self._db_env_selector)
+
+            self._sub_rew_max[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._db_env_selector)
+            self._sub_rew_avrg[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._db_env_selector)
+            self._sub_rew_min[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._db_env_selector)
+            self._sub_rew_max_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._db_env_selector)
+            self._sub_rew_min_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._db_env_selector)
+            self._sub_rew_avrg_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._db_env_selector)
+            self._sub_rew_std_over_envs[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._db_env_selector)
+
+            # fill env custom db metrics (only for debug environments)
+            db_data_names = list(self._env.custom_db_data.keys())
+            for dbdatan in db_data_names:
+                self._custom_env_data[dbdatan]["max"][self._log_it_counter, :, :] = self._env.custom_db_data[dbdatan].get_max(env_selector=self._db_env_selector)
+                self._custom_env_data[dbdatan]["avrg"][self._log_it_counter, :, :] = self._env.custom_db_data[dbdatan].get_avrg(env_selector=self._db_env_selector)
+                self._custom_env_data[dbdatan]["min"][self._log_it_counter, :, :] = self._env.custom_db_data[dbdatan].get_min(env_selector=self._db_env_selector)
+                self._custom_env_data[dbdatan]["max_over_envs"][self._log_it_counter, :, :] = self._env.custom_db_data[dbdatan].get_max_over_envs(env_selector=self._db_env_selector)
+                self._custom_env_data[dbdatan]["min_over_envs"][self._log_it_counter, :, :] = self._env.custom_db_data[dbdatan].get_min_over_envs(env_selector=self._db_env_selector)
+                self._custom_env_data[dbdatan]["avrg_over_envs"][self._log_it_counter, :, :] = self._env.custom_db_data[dbdatan].get_avrg_over_envs(env_selector=self._db_env_selector)
+                self._custom_env_data[dbdatan]["std_over_envs"][self._log_it_counter, :, :] = self._env.custom_db_data[dbdatan].get_std_over_envs(env_selector=self._db_env_selector)
+
+            # exploration envs
+            if self._n_expl_envs > 0:
+                self._ep_tsteps_expl_env_distribution[self._log_it_counter, :]=\
+                    self._episodic_reward_metrics.step_counters(env_selector=self._expl_env_selector)*self._env_n_action_reps
+
+                self._sub_rew_max_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._expl_env_selector)
+                self._sub_rew_avrg_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._expl_env_selector)
+                self._sub_rew_min_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._expl_env_selector)
+                self._sub_rew_max_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._expl_env_selector)
+                self._sub_rew_min_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._expl_env_selector)
+                self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._expl_env_selector)
+                self._sub_rew_std_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._expl_env_selector)
+
+            # demo envs
+            if self._env.n_demo_envs() > 0 and self._env.demo_active():
+                # only log if demo envs are active (db data will remaing to nan if that case)
+                self._ep_tsteps_demo_env_distribution[self._log_it_counter, :]=\
+                    self._episodic_reward_metrics.step_counters(env_selector=self._demo_env_selector)*self._env_n_action_reps
+
+                self._sub_rew_max_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._demo_env_selector)
+                self._sub_rew_avrg_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._demo_env_selector)
+                self._sub_rew_min_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._demo_env_selector)
+                self._sub_rew_max_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._demo_env_selector)
+                self._sub_rew_min_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._demo_env_selector)
+                self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._demo_env_selector)
+                self._sub_rew_std_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._demo_env_selector)
+
+            # other data
+            if self._agent.obs_running_norm is not None:
+                if self._running_mean_obs is not None:
+                    self._running_mean_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_mean()
+                if self._running_std_obs is not None:
+                    self._running_std_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_std()
+
+            if self._use_rnd:
+                if self._running_mean_rnd_input is not None:
+                    self._running_mean_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_mean()
+                if self._running_std_rnd_input is not None:
+                    self._running_std_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_std()
+                    
+            # write some episodic db info on shared mem
+            sub_returns=self._sub_returns.get_torch_mirror(gpu=False)
+            sub_returns[:, :]=self._episodic_reward_metrics.get_sub_rew_avrg()
+            tot_returns=self._tot_returns.get_torch_mirror(gpu=False)
+            tot_returns[:, :]=self._episodic_reward_metrics.get_tot_rew_avrg()
+            self._sub_returns.synch_all(read=False)
+            self._tot_returns.synch_all(read=False)
+            
+            self._log_info()
+
+            self._log_it_counter+=1 
+
+        if self._dump_checkpoints and \
+            (self._vec_transition_counter % self._m_checkpoint_freq == 0):
+            self._save_model(is_checkpoint=True)
+
+        if self._full_env_db and \
+            (self._vec_transition_counter % self._env_db_checkpoints_vecfreq == 0):
+            self._dump_env_checkpoints()
+
+        if self._vec_transition_counter == self._total_timesteps_vec:
+            self.done()           
+            
+    def _should_have_called_setup(self):
+
+        exception = f"setup() was not called!"
+
+        Journal.log(self.__class__.__name__,
+            "_should_have_called_setup",
+            exception,
+            LogType.EXCEP,
+            throw_when_excep = True)
+    
+    def _log_info(self):
+        
+        if self._debug or self._verbose:
+            elapsed_h = self._elapsed_min[self._log_it_counter].item()/60.0
+            est_remaining_time_h =  elapsed_h * 1/(self._vec_transition_counter) * (self._total_timesteps_vec-self._vec_transition_counter)
+            is_done=self._vec_transition_counter==self._total_timesteps_vec
+
+            actual_tsteps_with_updates=-1
+            experience_to_policy_grad_ratio=-1
+            experience_to_qfun_grad_ratio=-1
+            experience_to_tqfun_grad_ratio=-1
+            if not self._eval:
+                actual_tsteps_with_updates=(self._n_timesteps_done[self._log_it_counter].item()-self._warmstart_timesteps)
+                epsi=1e-6 # to avoid div by 0
+                experience_to_policy_grad_ratio=actual_tsteps_with_updates/(self._n_policy_updates[self._log_it_counter].item()-epsi)
+                experience_to_qfun_grad_ratio=actual_tsteps_with_updates/(self._n_qfun_updates[self._log_it_counter].item()-epsi)
+                experience_to_tqfun_grad_ratio=actual_tsteps_with_updates/(self._n_tqfun_updates[self._log_it_counter].item()-epsi)
+     
+        if self._debug:
+
+            if self._remote_db: 
+                # write general algo debug info to shared memory    
+                info_names=self._shared_algo_data.dynamic_info.get()
+                info_data = [
+                    self._n_timesteps_done[self._log_it_counter].item(),
+                    self._n_policy_updates[self._log_it_counter].item(),
+                    experience_to_policy_grad_ratio,
+                    elapsed_h,
+                    est_remaining_time_h,
+                    self._env_step_fps[self._log_it_counter].item(),
+                    self._env_step_rt_factor[self._log_it_counter].item(),
+                    self._collection_dt[self._log_it_counter].item(),
+                    self._policy_update_fps[self._log_it_counter].item(),
+                    self._policy_update_dt[self._log_it_counter].item(),
+                    is_done,
+                    self._n_of_played_episodes[self._log_it_counter].item(),
+                    self._batch_norm_update_dt[self._log_it_counter].item(),
+                    ]
+                self._shared_algo_data.write(dyn_info_name=info_names,
+                                        val=info_data)
+
+                # write debug info to remote wandb server
+                db_data_names = list(self._env.custom_db_data.keys())
+                for dbdatan in db_data_names: 
+                    data = self._custom_env_data[dbdatan]
+                    data_names = self._env.custom_db_data[dbdatan].data_names()
+
+                    self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_max": 
+                            wandb.Histogram(data["max"][self._log_it_counter, :, :].numpy())})
+                    self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_avrg": 
+                            wandb.Histogram(data["avrg"][self._log_it_counter, :, :].numpy())})
+                    self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_min": 
+                            wandb.Histogram(data["min"][self._log_it_counter, :, :].numpy())})
+            
+                    self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_max_over_envs": 
+                        data["max_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+                    self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_min_over_envs": 
+                        data["min_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+                    self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_avrg_over_envs": 
+                        data["avrg_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+                    self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_std_over_envs": 
+                        data["std_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))})
+                
+                self._wandb_d.update({'log_iteration' : self._log_it_counter})
+                self._wandb_d.update(dict(zip(info_names, info_data)))
+
+                # debug environments
+                self._wandb_d.update({'correlation_db/ep_timesteps_env_distr': 
+                    wandb.Histogram(self._ep_tsteps_env_distribution[self._log_it_counter, :, :].numpy())})
+
+                self._wandb_d.update({'tot_reward/tot_rew_max': wandb.Histogram(self._tot_rew_max[self._log_it_counter, :, :].numpy()),
+                    'tot_reward/tot_rew_avrg': wandb.Histogram(self._tot_rew_avrg[self._log_it_counter, :, :].numpy()),
+                    'tot_reward/tot_rew_min': wandb.Histogram(self._tot_rew_min[self._log_it_counter, :, :].numpy()),
+                    'tot_reward/tot_rew_max_over_envs': self._tot_rew_max_over_envs[self._log_it_counter, :, :].item(),
+                    'tot_reward/tot_rew_min_over_envs': self._tot_rew_min_over_envs[self._log_it_counter, :, :].item(),
+                    'tot_reward/tot_rew_avrg_over_envs': self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item(),
+                    'tot_reward/tot_rew_std_over_envs': self._tot_rew_std_over_envs[self._log_it_counter, :, :].item(),
+                    })
+                # sub rewards from db envs
+                self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max":
+                        wandb.Histogram(self._sub_rew_max.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min":
+                        wandb.Histogram(self._sub_rew_min.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg":
+                        wandb.Histogram(self._sub_rew_avrg.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                    
+            
+                self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max_over_envs":
+                        self._sub_rew_max_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min_over_envs":
+                        self._sub_rew_min_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg_over_envs":
+                        self._sub_rew_avrg_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_std_over_envs":
+                        self._sub_rew_std_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                
+                # exploration envs
+                if self._n_expl_envs > 0:
+                    self._wandb_d.update({'correlation_db/ep_timesteps_expl_env_distr': 
+                        wandb.Histogram(self._ep_tsteps_expl_env_distribution[self._log_it_counter, :, :].numpy())})
+
+                    # sub reward from expl envs
+                    self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_expl":
+                            wandb.Histogram(self._sub_rew_max_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_expl":
+                            wandb.Histogram(self._sub_rew_avrg_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_expl":
+                            wandb.Histogram(self._sub_rew_min_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                
+                    self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_over_envs_expl":
+                            self._sub_rew_max_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_over_envs_expl":
+                            self._sub_rew_min_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_over_envs_expl":
+                            self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_std_over_envs_expl":
+                            self._sub_rew_std_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                
+                # demo envs (only log if active, otherwise we log nans which wandb doesn't like)
+                if self._env.n_demo_envs() > 0:
+                    if self._env.demo_active():
+                        # log hystograms only if there are no nan in the data
+                        self._wandb_d.update({'correlation_db/ep_timesteps_demo_env_distr':
+                            wandb.Histogram(self._ep_tsteps_demo_env_distribution[self._log_it_counter, :, :].numpy())})
+
+                        # sub reward from expl envs
+                        self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_demo":
+                                wandb.Histogram(self._sub_rew_max_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                        self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_demo":
+                                wandb.Histogram(self._sub_rew_avrg_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                        self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_demo":
+                                wandb.Histogram(self._sub_rew_min_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))})
+                
+                    self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_over_envs_demo":
+                            self._sub_rew_max_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_over_envs_demo":
+                            self._sub_rew_min_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_over_envs_demo":
+                            self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                    self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_std_over_envs_demo":
+                            self._sub_rew_std_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))})
+                    
+                if self._vec_transition_counter > (self._warmstart_vectimesteps-1):
+                    # algo info
+                    self._policy_update_db_data_dict.update({
+                        "sac_q_info/qf1_vals_mean": self._qf1_vals_mean[self._log_it_counter, 0],
+                        "sac_q_info/qf2_vals_mean": self._qf2_vals_mean[self._log_it_counter, 0],
+                        "sac_q_info/min_qft_vals_mean": self._min_qft_vals_mean[self._log_it_counter, 0],
+                        "sac_q_info/qf1_vals_std": self._qf1_vals_std[self._log_it_counter, 0],
+                        "sac_q_info/qf2_vals_std": self._qf2_vals_std[self._log_it_counter, 0],
+                        "sac_q_info/min_qft_vals_std": self._min_qft_vals_std[self._log_it_counter, 0],
+                        "sac_q_info/qf1_vals_max": self._qf1_vals_max[self._log_it_counter, 0],
+                        "sac_q_info/qf2_vals_max": self._qf2_vals_max[self._log_it_counter, 0],
+                        "sac_q_info/qf1_vals_min": self._qf1_vals_min[self._log_it_counter, 0],
+                        "sac_q_info/qf2_vals_min": self._qf2_vals_min[self._log_it_counter, 0],
+                        
+                        "sac_actor_info/policy_entropy_mean": self._policy_entropy_mean[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_std": self._policy_entropy_std[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_max": self._policy_entropy_max[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_min": self._policy_entropy_min[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_disc_mean": self._policy_entropy_disc_mean[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_disc_std": self._policy_entropy_disc_std[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_disc_max": self._policy_entropy_disc_max[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_disc_min": self._policy_entropy_disc_min[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_cont_mean": self._policy_entropy_cont_mean[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_cont_std": self._policy_entropy_cont_std[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_cont_max": self._policy_entropy_cont_max[self._log_it_counter, 0],
+                        "sac_actor_info/policy_entropy_cont_min": self._policy_entropy_cont_min[self._log_it_counter, 0],
+                        
+                        "sac_q_info/qf1_loss": self._qf1_loss[self._log_it_counter, 0],
+                        "sac_q_info/qf2_loss": self._qf2_loss[self._log_it_counter, 0],
+                        "sac_actor_info/actor_loss": self._actor_loss[self._log_it_counter, 0]})
+                    alpha_logs = {
+                        "sac_alpha_info/alpha": self._alphas[self._log_it_counter, 0],
+                        "sac_alpha_info/alpha_disc": self._alphas_disc[self._log_it_counter, 0],
+                        "sac_alpha_info/alpha_cont": self._alphas_cont[self._log_it_counter, 0],
+                        "sac_alpha_info/target_entropy": self._target_entropy,
+                        "sac_alpha_info/target_entropy_disc": self._target_entropy_disc,
+                        "sac_alpha_info/target_entropy_cont": self._target_entropy_cont
+                    }
+                    if self._autotune:
+                        alpha_logs.update({
+                            "sac_alpha_info/alpha_loss": self._alpha_loss[self._log_it_counter, 0],
+                            "sac_alpha_info/alpha_loss_disc": self._alpha_loss_disc[self._log_it_counter, 0],
+                            "sac_alpha_info/alpha_loss_cont": self._alpha_loss_cont[self._log_it_counter, 0],
+                        })
+                    self._policy_update_db_data_dict.update(alpha_logs)
+                    
+                    if self._validate:
+                        self._policy_update_db_data_dict.update({
+                            "sac_q_info/qf1_loss_validation": self._qf1_loss_validation[self._log_it_counter, 0],
+                            "sac_q_info/qf2_loss_validation": self._qf2_loss_validation[self._log_it_counter, 0],
+                            "sac_q_info/overfit_index": self._overfit_index[self._log_it_counter, 0],
+                            "sac_actor_info/actor_loss_validation": self._actor_loss_validation[self._log_it_counter, 0]})
+                        if self._autotune:
+                            self._policy_update_db_data_dict.update({
+                                "sac_alpha_info/alpha_loss_validation": self._alpha_loss_validation[self._log_it_counter, 0],
+                                "sac_alpha_info/alpha_loss_disc_validation": self._alpha_loss_disc_validation[self._log_it_counter, 0],
+                                "sac_alpha_info/alpha_loss_cont_validation": self._alpha_loss_cont_validation[self._log_it_counter, 0]})
+
+                    self._wandb_d.update(self._policy_update_db_data_dict)
+
+                    if self._use_rnd:
+                        self._rnd_db_data_dict.update({
+                            "rnd_info/expl_bonus_raw_avrg": self._expl_bonus_raw_avrg[self._log_it_counter, 0],
+                            "rnd_info/expl_bonus_raw_std": self._expl_bonus_raw_std[self._log_it_counter, 0],
+                            "rnd_info/expl_bonus_proc_avrg": self._expl_bonus_proc_avrg[self._log_it_counter, 0],
+                            "rnd_info/expl_bonus_proc_std": self._expl_bonus_proc_std[self._log_it_counter, 0],
+                            "rnd_info/rnd_loss": self._rnd_loss[self._log_it_counter, 0],
+                        })
+                        self._wandb_d.update(self._rnd_db_data_dict)
+
+                        if self._rnd_net.obs_running_norm is not None:
+                            # adding info on running normalizer if used
+                            if self._running_mean_rnd_input is not None:
+                                self._wandb_d.update({f"rnd_info/running_mean_rhc_input": self._running_mean_rnd_input[self._log_it_counter, :]})
+                            if self._running_std_rnd_input is not None:
+                                self._wandb_d.update({f"rnd_info/running_std_rhc_input": self._running_std_rnd_input[self._log_it_counter, :]})
+
+                if self._agent.obs_running_norm is not None:
+                    # adding info on running normalizer if used
+                    if self._running_mean_obs is not None:
+                        self._wandb_d.update({f"running_norm/mean": self._running_mean_obs[self._log_it_counter, :]})
+                    if self._running_std_obs is not None:
+                        self._wandb_d.update({f"running_norm/std": self._running_std_obs[self._log_it_counter, :]})
+                
+                self._wandb_d.update(self._custom_env_data_db_dict) 
+                
+                wandb.log(self._wandb_d)
+
+        if self._verbose:
+                       
+            info =f"\nTotal n. timesteps simulated: {self._n_timesteps_done[self._log_it_counter].item()}/{self._total_timesteps}\n" + \
+                f"N. policy updates performed: {self._n_policy_updates[self._log_it_counter].item()}/{self._n_policy_updates_to_be_done}\n" + \
+                f"N. q fun updates performed: {self._n_qfun_updates[self._log_it_counter].item()}/{self._n_qf_updates_to_be_done}\n" + \
+                f"N. trgt q fun updates performed: {self._n_tqfun_updates[self._log_it_counter].item()}/{self._n_tqf_updates_to_be_done}\n" + \
+                f"experience to policy grad ratio: {experience_to_policy_grad_ratio}\n" + \
+                f"experience to q fun grad ratio: {experience_to_qfun_grad_ratio}\n" + \
+                f"experience to trgt q fun grad ratio: {experience_to_tqfun_grad_ratio}\n"+ \
+                f"Warmstart completed: {self._vec_transition_counter > self._warmstart_vectimesteps or self._eval} ; ({self._vec_transition_counter}/{self._warmstart_vectimesteps})\n" +\
+                f"Replay buffer full: {self._replay_bf_full}; current position {self._bpos}/{self._replay_buffer_size_vec}\n" +\
+                f"Validation buffer full: {self._validation_bf_full}; current position {self._bpos_val}/{self._validation_buffer_size_vec}\n" +\
+                f"Elapsed time: {self._elapsed_min[self._log_it_counter].item()/60.0} h\n" + \
+                f"Estimated remaining training time: " + \
+                f"{est_remaining_time_h} h\n" + \
+                f"Total reward episodic data --> \n" + \
+                f"max: {self._tot_rew_max_over_envs[self._log_it_counter, :, :].item()}\n" + \
+                f"avg: {self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item()}\n" + \
+                f"min: {self._tot_rew_min_over_envs[self._log_it_counter, :, :].item()}\n" + \
+                f"Episodic sub-rewards episodic data --> \nsub rewards names: {self._reward_names_str}\n" + \
+                f"max: {self._sub_rew_max_over_envs[self._log_it_counter, :]}\n" + \
+                f"min: {self._sub_rew_min_over_envs[self._log_it_counter, :]}\n" + \
+                f"avg: {self._sub_rew_avrg_over_envs[self._log_it_counter, :]}\n" + \
+                f"std: {self._sub_rew_std_over_envs[self._log_it_counter, :]}\n" + \
+                f"N. of episodes on which episodic rew stats are computed: {self._n_of_played_episodes[self._log_it_counter].item()}\n" + \
+                f"Current env. step sps: {self._env_step_fps[self._log_it_counter].item()}, time for experience collection {self._collection_dt[self._log_it_counter].item()} s\n" + \
+                f"Current env (sub-stepping) rt factor: {self._env_step_rt_factor[self._log_it_counter].item()}\n" + \
+                f"Current policy update fps: {self._policy_update_fps[self._log_it_counter].item()}, time for policy updates {self._policy_update_dt[self._log_it_counter].item()} s\n" + \
+                f"Time spent updating batch normalizations {self._batch_norm_update_dt[self._log_it_counter].item()} s\n" + \
+                f"Time spent for computing validation {self._validation_dt[self._log_it_counter].item()} s\n" + \
+                f"Demo envs are active: {self._demo_envs_active[self._log_it_counter].item()}. N  demo envs if active {self._env.n_demo_envs()}\n" + \
+                f"Performance metric now: {self._demo_perf_metric[self._log_it_counter].item()}\n" + \
+                f"Entropy (disc): current {float(self._policy_entropy_disc_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_disc:.4f}\n" + \
+                f"Entropy (cont): current {float(self._policy_entropy_cont_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_cont:.4f}\n"
+            if self._use_rnd:
+                info = info + f"N. rnd updates performed: {self._n_rnd_updates[self._log_it_counter].item()}\n"
+            
+            Journal.log(self.__class__.__name__,
+                "_post_step",
+                info,
+                LogType.INFO,
+                throw_when_excep = True)
+
+    def _add_experience(self, 
+            obs: torch.Tensor, actions: torch.Tensor, rewards: torch.Tensor, 
+            next_obs: torch.Tensor, 
+            next_terminal: torch.Tensor) -> None:
+        
+        if self._validate and \
+            (self._vec_transition_counter % self._validation_collection_vecfreq == 0):
+            # fill validation buffer
+            
+            self._obs_val[self._bpos_val] = obs
+            self._next_obs_val[self._bpos_val] = next_obs
+            self._actions_val[self._bpos_val] = actions
+            self._rewards_val[self._bpos_val] = rewards
+            self._next_terminal_val[self._bpos_val] = next_terminal
+
+            self._bpos_val += 1
+            if self._bpos_val == self._validation_buffer_size_vec:
+                self._validation_bf_full = True
+                self._bpos_val = 0
+
+        else: # fill normal replay buffer
+            self._obs[self._bpos] = obs
+            self._next_obs[self._bpos] = next_obs
+            self._actions[self._bpos] = actions
+            self._rewards[self._bpos] = rewards
+            self._next_terminal[self._bpos] = next_terminal
+
+            self._bpos += 1
+            if self._bpos == self._replay_buffer_size_vec:
+                self._replay_bf_full = True
+                self._bpos = 0
+
+    def _sample(self, size: int = None):
+        
+        if size is None:
+            size=self._batch_size
+
+        batched_obs = self._obs.view((-1, self._env.obs_dim()))
+        batched_next_obs = self._next_obs.view((-1, self._env.obs_dim()))
+        batched_actions = self._actions.view((-1, self._env.actions_dim()))
+        batched_rewards = self._rewards.view(-1)
+        batched_terminal = self._next_terminal.view(-1)
+
+        # sampling from the batched buffer
+        up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs
+        shuffled_buffer_idxs = torch.randint(0, up_to,
+                                        (size,)) 
+        
+        sampled_obs = batched_obs[shuffled_buffer_idxs]
+        sampled_next_obs = batched_next_obs[shuffled_buffer_idxs]
+        sampled_actions = batched_actions[shuffled_buffer_idxs]
+        sampled_rewards = batched_rewards[shuffled_buffer_idxs]
+        sampled_terminal = batched_terminal[shuffled_buffer_idxs]
+
+        return sampled_obs, sampled_actions,\
+            sampled_next_obs,\
+            sampled_rewards, \
+            sampled_terminal
+    
+    def _sample_validation(self, size: int = None):
+        
+        if size is None:
+            size=self._validation_batch_size
+
+        batched_obs = self._obs_val.view((-1, self._env.obs_dim()))
+        batched_next_obs = self._next_obs_val.view((-1, self._env.obs_dim()))
+        batched_actions = self._actions_val.view((-1, self._env.actions_dim()))
+        batched_rewards = self._rewards_val.view(-1)
+        batched_terminal = self._next_terminal_val.view(-1)
+
+        # sampling from the batched buffer
+        up_to = self._validation_buffer_size if self._validation_bf_full else self._bpos_val*self._num_envs
+        shuffled_buffer_idxs = torch.randint(0, up_to,
+                                        (size,)) 
+        
+        sampled_obs = batched_obs[shuffled_buffer_idxs]
+        sampled_next_obs = batched_next_obs[shuffled_buffer_idxs]
+        sampled_actions = batched_actions[shuffled_buffer_idxs]
+        sampled_rewards = batched_rewards[shuffled_buffer_idxs]
+        sampled_terminal = batched_terminal[shuffled_buffer_idxs]
+
+        return sampled_obs, sampled_actions,\
+            sampled_next_obs,\
+            sampled_rewards, \
+            sampled_terminal
+    
+    def _sample_random_actions(self):
+        
+        self._random_uniform.uniform_(-1,1)
+        random_actions = self._random_uniform
+
+        return random_actions
+    
+    def _perturb_some_actions(self,
+            actions: torch.Tensor):
+        
+        if self._is_continuous_actions_bool.any(): # if there are any continuous actions
+            self._perturb_actions(actions,
+                action_idxs=self._is_continuous_actions, 
+                env_idxs=self._expl_env_selector.to(actions.device),
+                normal=True, # use normal for continuous
+                scaling=self._continuous_act_expl_noise_std)
+        if self._is_discrete_actions_bool.any(): # actions to be treated as discrete
+            self._perturb_actions(actions,
+                action_idxs=self._is_discrete_actions, 
+                env_idxs=self._expl_env_selector.to(actions.device),
+                normal=False, # use uniform distr for discrete
+                scaling=self._discrete_act_expl_noise_std)
+        self._pert_counter+=1
+        if self._pert_counter >= self._noise_duration_vec:
+            self._pert_counter=0
+    
+    def _perturb_actions(self, 
+        actions: torch.Tensor,
+        action_idxs: torch.Tensor, 
+        env_idxs: torch.Tensor,
+        normal: bool = True,
+        scaling: float = 1.0):
+        if normal: # gaussian
+            # not super efficient (in theory random_normal can be made smaller in size)
+            self._random_normal.normal_(mean=0, std=1)
+            noise=self._random_normal
+        else: # uniform
+            self._random_uniform.uniform_(-1,1)
+            noise=self._random_uniform
+        
+        env_indices = env_idxs.reshape(-1,1)  # Get indices of True environments
+        action_indices = action_idxs.reshape(1,-1) # Get indices of True actions
+        action_indices_flat=action_indices.flatten()
+
+        perturbation=noise[env_indices, action_indices]*scaling
+        perturbed_actions=actions[env_indices, action_indices]+perturbation
+        perturbed_actions.clamp_(-1.0, 1.0) # enforce normalized bounds
+
+        actions[env_indices, action_indices]=\
+            perturbed_actions
+
+    def _update_batch_norm(self, bsize: int = None):
+
+        if bsize is None:
+            bsize=self._batch_size # same used for training
+
+        up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs
+        shuffled_buffer_idxs = torch.randint(0, up_to,
+                                        (bsize,)) 
+        
+        # update obs normalization        
+        # (we should sample also next obs, but if most of the transitions are not terminal, 
+        # this is not an issue and is more efficient)
+        if (self._agent.obs_running_norm is not None) and \
+            (not self._eval):
+            batched_obs = self._obs.view((-1, self._obs_dim))
+            sampled_obs = batched_obs[shuffled_buffer_idxs]
+            self._agent.update_obs_bnorm(x=sampled_obs)
+
+        if self._use_rnd: # update running norm for RND
+            batched_obs = self._obs.view((-1, self._obs_dim))
+            batched_actions = self._actions.view((-1, self._actions_dim))
+            sampled_obs = batched_obs[shuffled_buffer_idxs]
+            sampled_actions = batched_actions[shuffled_buffer_idxs]
+            torch.cat(tensors=(sampled_obs, sampled_actions), dim=1, 
+                out=self._rnd_bnorm_input)
+            self._rnd_net.update_input_bnorm(x=self._rnd_bnorm_input)
+
+        # update running norm on rewards also
+        # if self._reward_normalizer is not None:
+        #     batched_rew = self._rewards.view(-1)
+        #     sampled_rew = batched_rew[shuffled_buffer_idxs]
+        #     self._reward_normalizer.manual_stat_update(x=sampled_rew)
+
+    def _reset_agent(self):
+        # not super efficient, but effective -> 
+        # brand new agent, brand new optimizers
+        self._agent.reset()
+        # forcing deallocation of previous optimizers 
+        import gc
+        del self._qf_optimizer
+        del self._actor_optimizer
+        if self._autotune:
+            del self._a_optimizer_disc
+            del self._a_optimizer_cont
+            del self._log_alpha_disc
+            del self._log_alpha_cont
+        gc.collect()
+        self._init_agent_optimizers()
+        if self._autotune: # also reinitialize alpha optimization
+            self._init_alpha_autotuning()
+
+        self._overfit_idx=0.0
+
+    def _switch_training_mode(self, 
+                    train: bool = True):
+
+        self._agent.train(train)
+
+    def _init_algo_shared_data(self,
+                static_params: Dict):
+
+        self._shared_algo_data = SharedRLAlgorithmInfo(namespace=self._ns,
+                is_server=True, 
+                static_params=static_params,
+                verbose=self._verbose, 
+                vlevel=VLevel.V2, 
+                safe=False,
+                force_reconnection=True)
+
+        self._shared_algo_data.run()
+
+        # write some initializations
+        self._shared_algo_data.write(dyn_info_name=["is_done"],
+                val=[0.0])
+        
+        # only written to if flags where enabled
+        self._qf_vals=QfVal(namespace=self._ns,
+            is_server=True, 
+            n_envs=self._num_envs, 
+            verbose=self._verbose, 
+            vlevel=VLevel.V2,
+            safe=False,
+            force_reconnection=True)
+        self._qf_vals.run()
+        self._qf_trgt=QfTrgt(namespace=self._ns,
+            is_server=True, 
+            n_envs=self._num_envs, 
+            verbose=self._verbose, 
+            vlevel=VLevel.V2,
+            safe=False,
+            force_reconnection=True)
+        self._qf_trgt.run()
+
+        # episodic returns
+        reward_names=self._episodic_reward_metrics.data_names()
+        self._sub_returns=SubReturns(namespace=self._ns,
+            is_server=True, 
+            n_envs=self._num_envs, 
+            n_rewards=len(reward_names),
+            reward_names=reward_names,
+            verbose=self._verbose, 
+            vlevel=VLevel.V2,
+            safe=False,
+            force_reconnection=True)
+        self._sub_returns.run()
+
+        self._tot_returns=TotReturns(namespace=self._ns,
+            is_server=True, 
+            n_envs=self._num_envs, 
+            verbose=self._verbose, 
+            vlevel=VLevel.V2,
+            safe=False,
+            force_reconnection=True)
+        self._tot_returns.run()
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/training_env_base.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/training_env_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1d82cd55b606a8fe783803efcb26726301c1693
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/training_env_base.py
@@ -0,0 +1,2002 @@
+import torch
+import math
+from aug_mpc.utils.math_utils import quaternion_to_angular_velocity, quaternion_difference
+
+from mpc_hive.utilities.shared_data.rhc_data import RobotState
+from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred
+from mpc_hive.utilities.shared_data.rhc_data import RhcRefs
+from mpc_hive.utilities.shared_data.rhc_data import RhcStatus
+from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo
+
+from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperSrvr
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetSrvr
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest
+
+from aug_mpc.utils.shared_data.agent_refs import AgentRefs
+from aug_mpc.utils.shared_data.training_env import SharedTrainingEnvInfo
+
+from aug_mpc.utils.shared_data.training_env import Observations, NextObservations
+from aug_mpc.utils.shared_data.training_env import TotRewards
+from aug_mpc.utils.shared_data.training_env import SubRewards
+from aug_mpc.utils.shared_data.training_env import Actions
+from aug_mpc.utils.shared_data.training_env import Terminations, SubTerminations
+from aug_mpc.utils.shared_data.training_env import Truncations, SubTruncations
+from aug_mpc.utils.shared_data.training_env import EpisodesCounter,TaskRandCounter,SafetyRandResetsCounter,RandomTruncCounter,SubStepAbsCounter
+
+from aug_mpc.utils.episodic_rewards import EpisodicRewards
+from aug_mpc.utils.episodic_data import EpisodicData
+from aug_mpc.utils.episodic_data import MemBuffer
+from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother
+from aug_mpc.utils.math_utils import check_capsize
+
+from mpc_hive.utilities.math_utils_torch import world2base_frame
+
+from EigenIPC.PyEigenIPC import VLevel
+from EigenIPC.PyEigenIPC import LogType
+from EigenIPC.PyEigenIPC import Journal
+from EigenIPC.PyEigenIPC import StringTensorClient
+
+from perf_sleep.pyperfsleep import PerfSleep
+
+from abc import abstractmethod, ABC
+
+import os
+from typing import List, Dict
+
+class AugMPCTrainingEnvBase(ABC):
+
+    """Base class for a remote training environment tailored to Learning-based Receding Horizon Control"""
+
+    def __init__(self,
+            namespace: str,
+            obs_dim: int,
+            actions_dim: int,
+            env_name: str = "",
+            verbose: bool = False,
+            vlevel: VLevel = VLevel.V1,
+            debug: bool = True,
+            use_gpu: bool = True,
+            dtype: torch.dtype = torch.float32,
+            override_agent_refs: bool = False,
+            timeout_ms: int = 60000,
+            env_opts: Dict = {}):
+
+        self._this_path = os.path.abspath(__file__)
+
+        self.custom_db_data = None
+
+        self._random_reset_active=False
+
+        self._action_smoother_continuous=None
+        self._action_smoother_discrete=None
+
+        self._closed = False
+        self._ready=False
+
+        self._namespace = namespace
+        self._with_gpu_mirror = True
+        self._safe_shared_mem = False
+        
+        self._obs_dim = obs_dim
+        self._actions_dim = actions_dim
+
+        self._use_gpu = use_gpu
+        if self._use_gpu:
+            self._device="cuda"
+        else:
+            self._device="cpu"
+
+        self._dtype = dtype
+
+        self._verbose = verbose
+        self._vlevel = vlevel
+
+        self._is_debug = debug
+
+        self._env_name = env_name
+
+        self._override_agent_refs = override_agent_refs
+
+        self._substep_dt=1.0 # dt [s] between each substep
+
+        self._env_opts={}
+        self._env_opts.update(env_opts)   
+        self._process_env_opts()
+
+        self._robot_state = None
+        self._rhc_cmds = None
+        self._rhc_pred = None
+        self._rhc_refs = None
+        self._rhc_status = None
+
+        self._remote_stepper = None
+        self._remote_resetter = None
+        self._remote_reset_req = None
+
+        self._agent_refs = None
+
+        self._n_envs = 0
+        
+        self._ep_timeout_counter = None
+        self._task_rand_counter = None
+        self._rand_safety_reset_counter = None
+        self._rand_trunc_counter = None
+
+        self._actions_map={} # to be used to hold info like action idxs
+        self._obs_map={}
+
+        self._obs = None
+        self._obs_ub = None
+        self._obs_lb = None
+        self._next_obs = None
+        self._actions = None
+        self._actual_actions = None
+        self._actions_ub = None
+        self._actions_lb = None
+        self._tot_rewards = None
+        self._sub_rewards = None
+        self._sub_terminations = None
+        self._sub_truncations = None
+        self._terminations = None
+        self._truncations = None
+        self._act_mem_buffer = None
+
+        self._episodic_rewards_metrics = None
+        
+        self._timeout = timeout_ms
+    
+        self._height_grid_size = None
+        self._height_flat_dim = 0
+
+        self._attach_to_shared_mem()
+
+        self._init_obs(obs_dim)
+        self._init_actions(actions_dim)
+        self._init_rewards()
+        self._init_terminations()
+        self._init_truncations()
+        self._init_custom_db_data()
+        
+        self._demo_setup() # setup for demo envs
+
+        # to ensure maps are properly initialized
+        _ = self._get_action_names()
+        _ = self._get_obs_names()
+        _ = self._get_sub_trunc_names()
+        _ = self._get_sub_term_names()
+
+        self._set_substep_rew()
+        self._set_substep_obs()
+
+        self._custom_post_init()
+
+        # update actions scale and offset in case it was modified in _custom_post_init
+        self._actions_scale = (self._actions_ub - self._actions_lb)/2.0
+        self._actions_offset = (self._actions_ub + self._actions_lb)/2.0
+
+        if self._env_opts["use_action_smoothing"]:
+            self._init_action_smoothing()
+
+        self._ready=self._init_step(reset_on_init=self._env_opts["reset_on_init"])
+
+    def _add_env_opt(self,
+        opts: Dict,
+        name: str,
+        default):
+
+        if not name in opts:
+            opts[name]=default
+
+    def _process_env_opts(self, ):
+
+        self._check_for_env_opts("episode_timeout_lb", int)
+        self._check_for_env_opts("episode_timeout_ub", int)
+        self._check_for_env_opts("n_steps_task_rand_lb", int)
+        self._check_for_env_opts("n_steps_task_rand_ub", int)
+        self._check_for_env_opts("use_random_trunc", bool)
+        self._check_for_env_opts("random_trunc_freq", int)
+        self._check_for_env_opts("random_trunc_freq_delta", int)
+        self._check_for_env_opts("use_random_safety_reset", bool)
+        self._check_for_env_opts("random_reset_freq", int)
+
+        self._check_for_env_opts("action_repeat", int)
+
+        self._check_for_env_opts("n_preinit_steps", int)
+
+        self._check_for_env_opts("demo_envs_perc", float)
+
+        self._check_for_env_opts("vec_ep_freq_metrics_db", int)
+
+        self._check_for_env_opts("srew_drescaling", bool)
+
+        self._check_for_env_opts("use_action_history", bool)
+        self._check_for_env_opts("actions_history_size", int)
+        
+        self._check_for_env_opts("use_action_smoothing", bool)
+        self._check_for_env_opts("smoothing_horizon_c", float)
+        self._check_for_env_opts("smoothing_horizon_d", float)
+
+        self._check_for_env_opts("add_heightmap_obs", bool)
+
+        self._check_for_env_opts("reset_on_init", bool)
+        
+        # parse action repeat opt + get some sim information
+        if self._env_opts["action_repeat"] <=0: 
+            self._env_opts["action_repeat"] = 1
+        self._action_repeat=self._env_opts["action_repeat"]
+        # parse remote sim info
+        sim_info = {}
+        sim_info_shared = SharedEnvInfo(namespace=self._namespace,
+                    is_server=False,
+                    safe=False,
+                    verbose=self._verbose,
+                    vlevel=self._vlevel)
+        sim_info_shared.run()
+        sim_info_keys = sim_info_shared.param_keys
+        sim_info_data = sim_info_shared.get().flatten()
+        for i in range(len(sim_info_keys)):
+            sim_info[sim_info_keys[i]] = sim_info_data[i]
+        if "substepping_dt" in sim_info_keys:
+            self._substep_dt=sim_info["substepping_dt"]
+        self._env_opts.update(sim_info)
+
+        self._env_opts["substep_dt"]=self._substep_dt
+
+        self._env_opts["override_agent_refs"]=self._override_agent_refs
+
+        self._env_opts["episode_timeout_lb"] = round(self._env_opts["episode_timeout_lb"]/self._action_repeat) 
+        self._env_opts["episode_timeout_ub"] = round(self._env_opts["episode_timeout_ub"]/self._action_repeat)
+
+        self._env_opts["n_steps_task_rand_lb"] = round(self._env_opts["n_steps_task_rand_lb"]/self._action_repeat)
+        self._env_opts["n_steps_task_rand_ub"] = round(self._env_opts["n_steps_task_rand_ub"]/self._action_repeat)
+        
+        if self._env_opts["random_reset_freq"] <=0:
+            self._env_opts["use_random_safety_reset"]=False
+            self._env_opts["random_reset_freq"]=-1
+        self._random_reset_active=self._env_opts["use_random_safety_reset"]
+
+        self._env_opts["random_trunc_freq"] = round(self._env_opts["random_trunc_freq"]/self._action_repeat) 
+        self._env_opts["random_trunc_freq_delta"] = round(self._env_opts["random_trunc_freq_delta"]/self._action_repeat) 
+
+        if self._env_opts["random_trunc_freq"] <=0:
+            self._env_opts["use_random_trunc"]=False
+            self._env_opts["random_trunc_freq"]=-1
+
+        self._full_db=False
+        if "full_env_db" in self._env_opts:
+            self._full_db=self._env_opts["full_env_db"]
+
+    def _check_for_env_opts(self, 
+            name: str,
+            expected_type):
+        if not (name in self._env_opts):
+            Journal.log(self.__class__.__name__,
+                "_check_for_env_opts",
+                f"Required option {name} missing for env opts!",
+                LogType.EXCEP,
+                throw_when_excep=True)
+        if not isinstance(self._env_opts[name], expected_type):
+            Journal.log(self.__class__.__name__,
+                "_check_for_env_opts",
+                f"Option {name} in env opts is not of type {expected_type} (got {type(self._env_opts[name])})!",
+                LogType.EXCEP,
+                throw_when_excep=True)
+            
+    def __del__(self):
+
+        self.close()
+
+    def _demo_setup(self):
+
+        self._demo_envs_idxs=None
+        self._demo_envs_idxs_bool=None
+        self._n_demo_envs=round(self._env_opts["demo_envs_perc"]*self._n_envs)
+        self._add_demos=False
+        if not self._n_demo_envs >0:
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                "will not use demo environments",
+                LogType.INFO,
+                throw_when_excep=False)
+        else:
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Will run with {self._n_demo_envs} demonstration envs.",
+                LogType.INFO)
+            self._demo_envs_idxs = torch.randperm(self._n_envs, device=self._device)[:self._n_demo_envs]
+            self._demo_envs_idxs_bool=torch.full((self._n_envs, ), dtype=torch.bool, device=self._device,
+                                        fill_value=False)
+            self._demo_envs_idxs_bool[self._demo_envs_idxs]=True
+
+            self._init_demo_envs() # custom logic
+
+            demo_idxs_str=", ".join(map(str, self._demo_envs_idxs.tolist()))
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                f"Demo env. indexes are [{demo_idxs_str}]",
+                LogType.INFO)
+    
+    def env_opts(self):
+        return self._env_opts
+    
+    def demo_env_idxs(self, get_bool: bool=False):
+        if get_bool:
+            return self._demo_envs_idxs_bool
+        else:
+            return self._demo_envs_idxs
+        
+    def _init_demo_envs(self):
+        pass
+    
+    def n_demo_envs(self):
+        return self._n_demo_envs
+
+    def demo_active(self):
+        return self._add_demos
+    
+    def switch_demo(self, active: bool = False):
+        if self._demo_envs_idxs is not None:
+            self._add_demos=active
+        else:
+            Journal.log(self.__class__.__name__,
+                "switch_demo",
+                f"Cannot switch demostrations on. No demo envs available!",
+                LogType.EXCEP,
+                throw_when_excep=True)
+
+    def _get_this_file_path(self):
+        return self._this_path
+    
+    def episode_timeout_bounds(self):
+        return self._env_opts["episode_timeout_lb"], self._env_opts["episode_timeout_ub"]
+    
+    def task_rand_timeout_bounds(self):
+        return self._env_opts["n_steps_task_rand_lb"], self._env_opts["n_steps_task_rand_ub"]
+    
+    def n_action_reps(self):
+        return self._action_repeat
+    
+    def get_file_paths(self):
+        from aug_mpc.utils.sys_utils import PathsGetter
+        path_getter = PathsGetter()
+        base_paths = []
+        base_paths.append(self._get_this_file_path())
+        base_paths.append(path_getter.REMOTENVPATH)
+        for script_path in path_getter.SCRIPTSPATHS:
+            base_paths.append(script_path)
+
+        # rhc files
+        from EigenIPC.PyEigenIPC import StringTensorClient
+        from perf_sleep.pyperfsleep import PerfSleep
+        shared_rhc_shared_files = StringTensorClient(
+            basename="SharedRhcFilesDropDir", 
+            name_space=self._namespace,
+            verbose=self._verbose, 
+            vlevel=VLevel.V2)
+        shared_rhc_shared_files.run()
+        shared_rhc_files_vals=[""]*shared_rhc_shared_files.length()
+        while not shared_rhc_shared_files.read_vec(shared_rhc_files_vals, 0):
+            nsecs =  1000000000 # 1 sec
+            PerfSleep.thread_sleep(nsecs) # we just keep it alive
+        rhc_list=[]
+        for rhc_files in shared_rhc_files_vals:
+            file_list = rhc_files.split(", ")
+            rhc_list.extend(file_list)
+        rhc_list = list(set(rhc_list)) # removing duplicates
+        base_paths.extend(rhc_list)
+        
+        # world interface files
+        get_world_interface_paths = self.get_world_interface_paths()
+        base_paths.extend(get_world_interface_paths)
+        return base_paths
+
+    def get_world_interface_paths(self):
+        paths = []
+        shared_world_iface_files = StringTensorClient(
+            basename="SharedWorldInterfaceFilesDropDir", 
+            name_space=self._namespace,
+            verbose=self._verbose, 
+            vlevel=VLevel.V2)
+        shared_world_iface_files.run()
+        world_iface_vals=[""]*shared_world_iface_files.length()
+        while not shared_world_iface_files.read_vec(world_iface_vals, 0):
+            nsecs =  1000000000 # 1 sec
+            PerfSleep.thread_sleep(nsecs) # keep alive while waiting
+        shared_world_iface_files.close()
+        for files in world_iface_vals:
+            if files == "":
+                continue
+            file_list = files.split(", ")
+            for f in file_list:
+                if f not in paths:
+                    paths.append(f)
+        return paths
+
+    def get_aux_dir(self):
+        empty_list = []
+        return empty_list
+
+    def _init_step(self, reset_on_init: bool = True):
+        
+        self._check_controllers_registered(retry=True)
+        self._activate_rhc_controllers()
+
+        # just an auxiliary tensor
+        initial_reset_aux = self._terminations.get_torch_mirror(gpu=self._use_gpu).clone()
+        initial_reset_aux[:, :] = reset_on_init # we reset all sim envs first
+        init_step_ok=True
+        init_step_ok=self._remote_sim_step() and init_step_ok
+        if not init_step_ok:
+            return False
+        init_step_ok=self._remote_reset(reset_mask=initial_reset_aux) and init_step_ok
+        if not init_step_ok:
+            return False
+            
+        for i in range(self._env_opts["n_preinit_steps"]): # perform some
+            # dummy remote env stepping to make sure to have meaningful 
+            # initializations (doesn't increment step counter)
+            init_step_ok=self._remote_sim_step() and init_step_ok # 1 remote sim. step
+            if not init_step_ok:
+                return False
+            init_step_ok=self._send_remote_reset_req() and init_step_ok # fake reset request 
+            if not init_step_ok:
+                return False
+            
+        self.reset()
+
+        return init_step_ok
+
+    def _debug(self):
+
+        if self._use_gpu:
+            # using non_blocking which is not safe when GPU->CPU
+            self._obs.synch_mirror(from_gpu=True,non_blocking=True) # copy data from gpu to cpu view
+            self._next_obs.synch_mirror(from_gpu=True,non_blocking=True)
+            self._actions.synch_mirror(from_gpu=True,non_blocking=True)
+            self._truncations.synch_mirror(from_gpu=True,non_blocking=True) 
+            self._sub_truncations.synch_mirror(from_gpu=True,non_blocking=True)
+            self._terminations.synch_mirror(from_gpu=True,non_blocking=True)
+            self._sub_terminations.synch_mirror(from_gpu=True,non_blocking=True)
+            self._tot_rewards.synch_mirror(from_gpu=True,non_blocking=True)
+            self._sub_rewards.synch_mirror(from_gpu=True,non_blocking=True)
+            # if we want reliable db data then we should synchronize data streams
+            torch.cuda.synchronize()
+
+        # copy CPU view on shared memory
+        self._obs.synch_all(read=False, retry=True) 
+        self._next_obs.synch_all(read=False, retry=True)
+        self._actions.synch_all(read=False, retry=True) 
+        self._tot_rewards.synch_all(read=False, retry=True)
+        self._sub_rewards.synch_all(read=False, retry=True)
+        self._truncations.synch_all(read=False, retry = True) 
+        self._sub_truncations.synch_all(read=False, retry = True)
+        self._terminations.synch_all(read=False, retry = True) 
+        self._sub_terminations.synch_all(read=False, retry = True)
+        
+        self._debug_agent_refs()
+        
+    def _debug_agent_refs(self):
+        if self._use_gpu:
+            if not self._override_agent_refs:
+                self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False)
+        if not self._override_agent_refs:
+            self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True)
+
+    def _remote_sim_step(self):
+
+        self._remote_stepper.trigger() # triggers simulation + RHC
+        if not self._remote_stepper.wait_ack_from(1, self._timeout):
+            Journal.log(self.__class__.__name__,
+            "_remote_sim_step",
+            "Remote sim. env step ack not received within timeout",
+            LogType.EXCEP,
+            throw_when_excep = False)
+            return False
+        return True
+
+    def _remote_reset(self,
+                reset_mask: torch.Tensor = None):
+
+        reset_reqs = self._remote_reset_req.get_torch_mirror()
+        if reset_mask is None: # just send the signal to allow stepping, but do not reset any of
+            # the remote envs
+            reset_reqs[:, :] = False
+        else:
+            reset_reqs[:, :] = reset_mask # remotely reset envs corresponding to
+            # the mask (True--> to be reset)
+        self._remote_reset_req.synch_all(read=False, retry=True) # write on shared buffer
+        remote_reset_ok = self._send_remote_reset_req() # process remote request
+
+        if reset_mask is not None:
+            self._synch_state(gpu=self._use_gpu) # if some env was reset, we use _obs
+            # to hold the states, including resets, while _next_obs will always hold the 
+            # state right after stepping the sim env
+            # (could be a bit more efficient, since in theory we only need to read the envs
+            # corresponding to the reset_mask)
+            
+            
+        return remote_reset_ok
+    
+    def _send_remote_reset_req(self):
+
+        self._remote_resetter.trigger()
+        if not self._remote_resetter.wait_ack_from(1, self._timeout): # remote reset completed
+            Journal.log(self.__class__.__name__,
+                "_post_step",
+                "Remote reset did not complete within the prescribed timeout!",
+                LogType.EXCEP,
+                throw_when_excep = False)
+            return False
+        return True
+    
+    def step(self, 
+            action):
+
+        actions_norm = action.detach() # IMPORTANT: assumes actions are already normalized in [-1, 1]
+
+        actions = self._actions.get_torch_mirror(gpu=self._use_gpu) # will hold agent actions (real range)
+
+        # scale normalized actions to physical space before interfacing with controllers
+        actions[:, :] = actions_norm*self._actions_scale + self._actions_offset
+
+        self._override_actions_with_demo() # if necessary override some actions with expert demonstrations
+        # (getting actions with get_actions will return the modified actions tensor)
+
+        actions.clamp_(self._actions_lb, self._actions_ub) # just to be safe
+
+        if self._act_mem_buffer is not None: # store norm actions in memory buffer
+            self._act_mem_buffer.update(new_data=actions_norm)
+
+        if self._env_opts["use_action_smoothing"]:
+            self._apply_actions_smoothing() # smooth actions if enabled (the tensor returned by 
+            # get_actions does not contain smoothing and can be safely employed for experience collection)
+
+        self._apply_actions_to_rhc() # apply last agent actions to rhc controller
+
+        stepping_ok = True
+        tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu)
+        sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+        next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu)
+        tot_rewards.zero_()
+        sub_rewards.zero_()
+        self._substep_rewards.zero_()
+        next_obs.zero_() # necessary for substep obs
+
+        for i in range(0, self._action_repeat):
+
+            self._pre_substep() # custom logic @ substep freq
+
+            stepping_ok = stepping_ok and self._check_controllers_registered(retry=False) # does not make sense to run training
+            # if we lost some controllers
+            stepping_ok = stepping_ok and self._remote_sim_step() # blocking, 
+
+            # no sim substepping is allowed to fail
+            self._synch_state(gpu=self._use_gpu) # read state from shared mem (done in substeps also, 
+            # since substeps rewards will need updated substep obs)
+            
+            self._custom_post_substp_pre_rew() # custom substepping logic
+            self._compute_substep_rewards()
+            self._assemble_substep_rewards() # includes rewards clipping
+            self._custom_post_substp_post_rew() # custom substepping logic
+
+            # fill substep obs
+            self._fill_substep_obs(self._substep_obs)
+            self._assemble_substep_obs()
+            if not i==(self._action_repeat-1):
+                # sends reset signal to complete remote step sequence,
+                # but does not reset any remote env
+                stepping_ok = stepping_ok and self._remote_reset(reset_mask=None) 
+            else: # last substep
+                
+                self._fill_step_obs(next_obs) # update next obs
+                self._clamp_obs(next_obs) # good practice
+                obs = self._obs.get_torch_mirror(gpu=self._use_gpu)
+                obs[:, :] = next_obs # start from next observation, unless reset (handled in post_step())
+
+                self._compute_step_rewards() # implemented by child
+
+                tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu)
+                sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+                self._clamp_rewards(sub_rewards) # clamp all sub rewards
+                
+                tot_rewards[:, :] = torch.sum(sub_rewards, dim=1, keepdim=True)
+
+                scale=1 # scale tot rew by the number of action repeats
+                if self._env_opts["srew_drescaling"]: # scale rewards depending on the n of subrewards
+                    scale*=sub_rewards.shape[1] # n. dims rescaling
+                tot_rewards.mul_(1/scale)
+
+            self._substep_abs_counter.increment() # @ substep freq
+
+            if not stepping_ok:
+                return False
+            
+        stepping_ok =  stepping_ok and self._post_step() # post sub-stepping operations
+        # (if action_repeat > 1, then just the db data at the last substep is logged)
+        # also, if a reset of an env occurs, obs will hold the reset state
+
+        return stepping_ok 
+    
+    def _post_step(self):
+        
+        # first increment counters
+        self._ep_timeout_counter.increment() # episode timeout
+        self._task_rand_counter.increment() # task randomization
+        if self._rand_trunc_counter is not None: # random truncations (for removing temp. correlations)
+            self._rand_trunc_counter.increment()
+
+        # check truncation and termination conditions 
+        self._check_truncations() # defined in child env
+        self._check_terminations()
+        terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+        truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu)
+        ignore_ep_end=None
+        if self._rand_trunc_counter is not None:
+            ignore_ep_end=self._rand_trunc_counter.time_limits_reached()
+            if self._use_gpu:
+                ignore_ep_end=ignore_ep_end.cuda()
+        
+            truncated = torch.logical_or(truncated, 
+                ignore_ep_end) # add truncation (sub truncations defined in child env
+            # remain untouched)
+        
+        episode_finished = torch.logical_or(terminated,
+                            truncated)
+        episode_finished_cpu = episode_finished.cpu()
+
+        if self._rand_safety_reset_counter is not None and self._random_reset_active:
+            self._rand_safety_reset_counter.increment(to_be_incremented=episode_finished_cpu.flatten())
+            # truncated[:,:] = torch.logical_or(truncated,
+            #     self._rand_safety_reset_counter.time_limits_reached().cuda())
+
+        if self._act_mem_buffer is not None:
+            self._act_mem_buffer.reset(to_be_reset=episode_finished.flatten(),
+                            init_data=self._normalize_actions(self.default_action))
+
+        if self._action_smoother_continuous is not None:
+            self._action_smoother_continuous.reset(to_be_reset=episode_finished.flatten(),
+                reset_val=self.default_action[:, self._is_continuous_actions])
+        if self._action_smoother_discrete is not None:
+            self._action_smoother_discrete.reset(to_be_reset=episode_finished.flatten(),
+                reset_val=self.default_action[:, ~self._is_continuous_actions])
+
+        # debug step if required (IMPORTANT: must be before remote reset so that we always db
+        # actual data from the step and not after reset)
+        if self._is_debug:
+            self._debug() # copies db data on shared memory
+            ignore_ep_end_cpu=ignore_ep_end if not self._use_gpu else ignore_ep_end.cpu()
+            self._update_custom_db_data(episode_finished=episode_finished_cpu, 
+                    ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc
+                    )           
+            self._episodic_rewards_metrics.update(rewards = self._sub_rewards.get_torch_mirror(gpu=False),
+                    ep_finished=episode_finished_cpu,
+                    ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc
+                    )
+
+        # remotely reset envs
+        to_be_reset=self._to_be_reset()
+        to_be_reset_custom=self._custom_reset()
+        if to_be_reset_custom is not None:
+            to_be_reset[:, :] = torch.logical_or(to_be_reset,to_be_reset_custom)
+        rm_reset_ok = self._remote_reset(reset_mask=to_be_reset)
+        
+        self._custom_post_step(episode_finished=episode_finished) # any additional logic from child env  
+        # here, before actual reset taskes place  (at this point the state is the reset one)
+
+        # updating also prev pos and orientation in case some env was reset
+        self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+        self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+        obs = self._obs.get_torch_mirror(gpu=self._use_gpu)
+        self._fill_step_obs(obs)
+        self._clamp_obs(obs)
+
+        # updating prev step quantities
+        self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+        self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+        # synchronize and reset counters for finished episodes
+        self._ep_timeout_counter.reset(to_be_reset=episode_finished)
+        self._task_rand_counter.reset(to_be_reset=episode_finished)
+        self._substep_abs_counter.reset(to_be_reset=torch.logical_or(terminated,to_be_reset),
+            randomize_offsets=True # otherwise timers across envs would be strongly correlated
+            ) # reset only if resetting environment or if terminal
+
+        if self._rand_trunc_counter is not None:
+            # only reset when safety truncation was is triggered   
+            self._rand_trunc_counter.reset(to_be_reset=self._rand_trunc_counter.time_limits_reached(),
+                randomize_limits=True, # we need to randomize otherwise the other counters will synchronize
+                # with the episode counters
+                randomize_offsets=False # always restart at 0
+                )
+        # safety reset counter is only when it reches its reset interval (just to keep
+        # the counter bounded)
+        if self._rand_safety_reset_counter is not None and self._random_reset_active:
+            self._rand_safety_reset_counter.reset(to_be_reset=self._rand_safety_reset_counter.time_limits_reached())
+
+        return rm_reset_ok
+    
+    def _to_be_reset(self):
+        # always reset if a termination occurred or if there's a random safety reset
+        # request
+        terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+        to_be_reset=terminated.clone()
+        if (self._rand_safety_reset_counter is not None) and self._random_reset_active:
+            to_be_reset=torch.logical_or(to_be_reset,
+                self._rand_safety_reset_counter.time_limits_reached())
+
+        return to_be_reset
+
+    def _custom_reset(self):
+        # can be overridden by child
+        return None
+    
+    def _apply_actions_smoothing(self):
+
+        actions = self._actions.get_torch_mirror(gpu=self._use_gpu)
+        actual_actions=self.get_actual_actions() # will write smoothed actions here
+        if self._action_smoother_continuous is not None:
+            self._action_smoother_continuous.update(new_signal=
+                    actions[:, self._is_continuous_actions])
+            actual_actions[:, self._is_continuous_actions]=self._action_smoother_continuous.get()
+        if self._action_smoother_discrete is not None:
+            self._action_smoother_discrete.update(new_signal=
+                    actions[:, ~self._is_continuous_actions])
+            actual_actions[:, ~self._is_continuous_actions]=self._action_smoother_discrete.get()
+
+    def _update_custom_db_data(self,
+                    episode_finished,
+                    ignore_ep_end):
+
+        # update defaults
+        self.custom_db_data["RhcRefsFlag"].update(new_data=self._rhc_refs.contact_flags.get_torch_mirror(gpu=False), 
+                                    ep_finished=episode_finished,
+                                    ignore_ep_end=ignore_ep_end) # before potentially resetting the flags, get data
+        self.custom_db_data["Actions"].update(new_data=self._actions.get_torch_mirror(gpu=False), 
+                                    ep_finished=episode_finished,
+                                    ignore_ep_end=ignore_ep_end)
+        self.custom_db_data["Obs"].update(new_data=self._obs.get_torch_mirror(gpu=False), 
+                                    ep_finished=episode_finished,
+                                    ignore_ep_end=ignore_ep_end)
+        
+        self.custom_db_data["SubTerminations"].update(new_data=self._sub_terminations.get_torch_mirror(gpu=False), 
+                                    ep_finished=episode_finished,
+                                    ignore_ep_end=ignore_ep_end)
+        self.custom_db_data["SubTruncations"].update(new_data=self._sub_truncations.get_torch_mirror(gpu=False), 
+                                    ep_finished=episode_finished,
+                                    ignore_ep_end=ignore_ep_end)
+        
+        self.custom_db_data["Terminations"].update(new_data=self._terminations.get_torch_mirror(gpu=False), 
+                                    ep_finished=episode_finished,
+                                    ignore_ep_end=ignore_ep_end)
+        self.custom_db_data["Truncations"].update(new_data=self._truncations.get_torch_mirror(gpu=False), 
+                                    ep_finished=episode_finished,
+                                    ignore_ep_end=ignore_ep_end)
+
+        
+        self._get_custom_db_data(episode_finished=episode_finished, ignore_ep_end=ignore_ep_end)
+
+    def reset_custom_db_data(self, keep_track: bool = False):
+        # to be called periodically to reset custom db data stat. collection 
+        for custom_db_data in self.custom_db_data.values():
+            custom_db_data.reset(keep_track=keep_track)
+
+    def _assemble_substep_rewards(self):
+        # by default assemble  substep rewards by averaging
+        sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+        
+        # average over substeps depending on scale
+        # sub_rewards[:, self._is_substep_rew] = sub_rewards[:, self._is_substep_rew] + \
+        #     self._substep_rewards[:, self._is_substep_rew]/self._action_repeat
+        sub_rewards[:, self._is_substep_rew] += self._substep_rewards[:, self._is_substep_rew]/self._action_repeat
+    
+    def _assemble_substep_obs(self):
+        next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu)        
+        next_obs[:, self._is_substep_obs] += self._substep_obs[:, self._is_substep_obs]/self._action_repeat
+
+    def randomize_task_refs(self,
+                env_indxs: torch.Tensor = None):
+                    
+        if self._override_agent_refs:
+            self._override_refs(env_indxs=env_indxs)
+        else:
+            self._randomize_task_refs(env_indxs=env_indxs)
+            
+    def reset(self):
+        
+        self.randomize_task_refs(env_indxs=None) # randomize all refs across envs
+
+        self._obs.reset()
+        self._actions.reset()
+        self._next_obs.reset()
+        self._sub_rewards.reset()
+        self._tot_rewards.reset()
+        self._terminations.reset()
+        self._sub_terminations.reset()
+        self._truncations.reset()
+        self._sub_truncations.reset()
+
+        self._ep_timeout_counter.reset(randomize_offsets=True)
+        self._task_rand_counter.reset()
+        self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter)
+        if self._rand_safety_reset_counter is not None:
+            self._rand_safety_reset_counter.reset()
+        self._substep_abs_counter.reset()
+
+        if self._act_mem_buffer is not None:
+            self._act_mem_buffer.reset_all(init_data=self._normalize_actions(self.default_action))
+
+        if self._action_smoother_continuous is not None:
+            self._action_smoother_continuous.reset(reset_val=self.default_action[:, self._is_continuous_actions])
+        if self._action_smoother_discrete is not None:
+            self._action_smoother_discrete.reset(reset_val=self.default_action[:, ~self._is_continuous_actions])
+
+        self._synch_state(gpu=self._use_gpu) # read obs from shared mem
+
+        # just calling custom post step to ensure tak refs are updated 
+        terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+        truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu)
+        episode_finished = torch.logical_or(terminated,
+                            truncated)
+        self._custom_post_step(episode_finished=episode_finished)
+
+        obs = self._obs.get_torch_mirror(gpu=self._use_gpu)
+        next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu)
+        self._fill_step_obs(obs) # initialize observations 
+        self._clamp_obs(obs) # to avoid bad things
+        self._fill_step_obs(next_obs) # and next obs
+        self._clamp_obs(next_obs)
+
+        self.reset_custom_db_data(keep_track=False)
+        self._episodic_rewards_metrics.reset(keep_track=False)
+
+        self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+        self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+        self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+        self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+    def is_ready(self):
+        return self._ready
+    
+    def close(self):
+        
+        if not self._closed:
+
+            # close all shared mem. clients
+            self._robot_state.close()
+            self._rhc_cmds.close()
+            self._rhc_pred.close()
+            self._rhc_refs.close()
+            self._rhc_status.close()
+            
+            self._remote_stepper.close()
+            
+            self._ep_timeout_counter.close()
+            self._task_rand_counter.close()
+            if self._rand_safety_reset_counter is not None:
+                self._rand_safety_reset_counter.close()
+
+            # closing env.-specific shared data
+            self._obs.close()
+            self._next_obs.close()
+            self._actions.close()
+            if self._actual_actions is not None:
+                self._actual_actions.close()
+            self._sub_rewards.close()
+            self._tot_rewards.close()
+
+            self._terminations.close()
+            self._sub_terminations.close()
+            self._truncations.close()
+            self._sub_truncations.close()
+
+            self._closed = True
+
+    def get_obs(self, clone:bool=False):
+        if clone:
+            return self._obs.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+        else:
+            return self._obs.get_torch_mirror(gpu=self._use_gpu).detach()
+
+    def get_next_obs(self, clone:bool=False):
+        if clone:
+            return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+        else:
+            return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach()
+        
+    def get_actions(self, clone:bool=False, normalized: bool = False):
+        actions = self._actions.get_torch_mirror(gpu=self._use_gpu).detach()
+        if normalized:
+            normalized_actions = self._normalize_actions(actions)
+            return normalized_actions.clone() if clone else normalized_actions
+        return actions.clone() if clone else actions
+    
+    def get_actual_actions(self, clone:bool=False, normalized: bool = False):
+        if self._env_opts["use_action_smoothing"]:
+            actions = self._actual_actions.get_torch_mirror(gpu=self._use_gpu).detach()
+        else: # actual action coincides with the one from the agent + possible modif.
+            actions = self.get_actions(clone=False, normalized=False)
+        if normalized:
+            normalized_actions = self._normalize_actions(actions)
+            return normalized_actions.clone() if clone else normalized_actions
+        return actions.clone() if clone else actions
+
+    def _normalize_actions(self, actions: torch.Tensor):
+        scale = torch.where(self._actions_scale == 0.0,
+            torch.ones_like(self._actions_scale),
+            self._actions_scale)
+        normalized = (actions - self._actions_offset)/scale
+        zero_scale_mask = torch.eq(self._actions_scale, 0.0).squeeze(0)
+        if torch.any(zero_scale_mask):
+            normalized[:, zero_scale_mask] = 0.0
+        return normalized
+        
+    def get_rewards(self, clone:bool=False):
+        if clone:
+            return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+        else:
+            return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach()
+        
+    def get_terminations(self, clone:bool=False):
+        if clone:
+            return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+        else:
+            return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach()
+    
+    def get_truncations(self, clone:bool=False):
+        if clone:
+            return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach().clone()
+        else:
+            return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach()
+        
+    def obs_dim(self):
+
+        return self._obs_dim
+    
+    def actions_dim(self):
+
+        return self._actions_dim
+    
+    def ep_rewards_metrics(self):
+
+        return self._episodic_rewards_metrics
+    
+    def using_gpu(self):
+
+        return self._use_gpu
+
+    def name(self):
+
+        return self._env_name
+
+    def n_envs(self):
+
+        return self._n_envs
+
+    def dtype(self):
+                                    
+        return self._dtype 
+    
+    def obs_names(self):
+        return self._get_obs_names()
+    
+    def action_names(self):
+        return self._get_action_names()
+
+    def sub_rew_names(self):
+        return self._get_rewards_names()
+    
+    def sub_term_names(self):
+        return self._get_sub_term_names()
+    
+    def sub_trunc_names(self):
+        return self._get_sub_trunc_names()
+    
+    def _get_obs_names(self):
+        # to be overridden by child class
+        return None
+    
+    def get_robot_jnt_names(self):
+        return self._robot_state.jnt_names()
+
+    def _get_action_names(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_rewards_names(self):
+        # to be overridden by child class
+        return None
+    
+    def _get_sub_term_names(self):
+        # to be overridden by child class
+        sub_term_names = []
+        sub_term_names.append("rhc_failure")
+        sub_term_names.append("robot_capsize")
+        sub_term_names.append("rhc_capsize")
+
+        return sub_term_names
+    
+    def _get_sub_trunc_names(self):
+        # to be overridden by child class
+        sub_trunc_names = []
+        sub_trunc_names.append("ep_timeout")
+
+        return sub_trunc_names
+    
+    def _get_custom_db_data(self, episode_finished):
+        # to be overridden by child class
+        pass
+    
+    def set_observed_joints(self):
+        # ny default observe all joints available 
+        return self._robot_state.jnt_names()
+    
+    def _set_jnts_blacklist_pattern(self):
+        self._jnt_q_blacklist_patterns=[]
+
+    def get_observed_joints(self):
+        return self._observed_jnt_names
+    
+    def _init_obs(self, obs_dim: int):
+        
+        device = "cuda" if self._use_gpu else "cpu"
+        
+        obs_threshold_default = 1e3
+        self._obs_threshold_lb = -obs_threshold_default # used for clipping observations
+        self._obs_threshold_ub = obs_threshold_default
+        
+        self._obs_ub = torch.full((1, obs_dim), dtype=self._dtype, device=device,
+                                        fill_value=1.0) 
+        self._obs_lb = torch.full((1, obs_dim), dtype=self._dtype, device=device,
+                                        fill_value=-1.0)
+        self._obs_scale = (self._obs_ub - self._obs_lb)/2.0
+        self._obs_offset = (self._obs_ub + self._obs_lb)/2.0
+
+        if not self._obs_dim==len(self._get_obs_names()):
+            error=f"obs dim {self._obs_dim} does not match obs names length {len(self._get_obs_names())}!!"
+            Journal.log(self.__class__.__name__,
+                "_init_obs",
+                error,
+                LogType.EXCEP,
+                throw_when_excep = True)
+
+        self._obs = Observations(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            obs_dim=self._obs_dim,
+                            obs_names=self._get_obs_names(),
+                            env_names=None,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            fill_value=0.0)
+        
+        self._next_obs = NextObservations(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            obs_dim=self._obs_dim,
+                            obs_names=self._get_obs_names(),
+                            env_names=None,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            fill_value=0.0)
+
+        self._obs.run()
+        self._next_obs.run()
+
+        self._is_substep_obs = torch.zeros((self.obs_dim(),), dtype=torch.bool, device=device)
+        self._is_substep_obs.fill_(False) # default to all step obs
+
+        # not super memory efficient
+        self._substep_obs=torch.full_like(self._obs.get_torch_mirror(gpu=self._use_gpu), fill_value=0.0)
+        
+    def _init_actions(self, actions_dim: int):
+        
+        device = "cuda" if self._use_gpu else "cpu"
+        # action scalings to be applied to agent's output
+        self._actions_ub = torch.full((1, actions_dim), dtype=self._dtype, device=device,
+                                        fill_value=1.0) 
+        self._actions_lb = torch.full((1, actions_dim), dtype=self._dtype, device=device,
+                                        fill_value=-1.0)
+        self._actions_scale = (self._actions_ub - self._actions_lb)/2.0
+        self._actions_offset = (self._actions_ub + self._actions_lb)/2.0
+
+        if not self._actions_dim==len(self._get_action_names()):
+            error=f"action dim {self._actions_dim} does not match action names length {len(self._get_action_names())}!!"
+            Journal.log(self.__class__.__name__,
+                "_init_actions",
+                error,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        self._actions = Actions(namespace=self._namespace,
+            n_envs=self._n_envs,
+            action_dim=self._actions_dim,
+            action_names=self._get_action_names(),
+            env_names=None,
+            is_server=True,
+            verbose=self._verbose,
+            vlevel=self._vlevel,
+            safe=True,
+            force_reconnection=True,
+            with_gpu_mirror=self._use_gpu,
+            fill_value=0.0)
+
+        self._actions.run()
+
+        self.default_action = torch.full_like(input=self.get_actions(),fill_value=0.0)
+        self.safe_action = torch.full_like(input=self.get_actions(),fill_value=0.0)
+
+        if self._env_opts["use_action_history"]:
+            self._act_mem_buffer=MemBuffer(name="ActionMemBuf",
+                data_tensor=self._actions.get_torch_mirror(),
+                data_names=self._get_action_names(),
+                debug=self._debug,
+                horizon=self._env_opts["actions_history_size"],
+                dtype=self._dtype,
+                use_gpu=self._use_gpu)
+        
+        # default to all continuous actions (changes the way noise is added)
+        self._is_continuous_actions=torch.full((actions_dim, ), 
+            dtype=torch.bool, device=device,
+            fill_value=True) 
+    
+    def _init_action_smoothing(self):
+            
+        continuous_actions=self.get_actions()[:, self._is_continuous_actions]
+        discrete_actions=self.get_actions()[:, ~self._is_continuous_actions]
+        self._action_smoother_continuous=ExponentialSignalSmoother(signal=continuous_actions,
+            update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent
+            smoothing_horizon=self._env_opts["smoothing_horizon_c"],
+            target_smoothing=0.5, 
+            debug=self._debug,
+            dtype=self._dtype,
+            use_gpu=self._use_gpu,
+            name="ActionSmootherContinuous")
+        self._action_smoother_discrete=ExponentialSignalSmoother(signal=discrete_actions,
+            update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent
+            smoothing_horizon=self._env_opts["smoothing_horizon_d"],
+            target_smoothing=0.5,
+            debug=self._debug,
+            dtype=self._dtype,
+            use_gpu=self._use_gpu,
+            name="ActionSmootherDiscrete")
+        
+        # we also need somewhere to keep the actual actions after smoothing
+        self._actual_actions = Actions(namespace=self._namespace+"_actual",
+            n_envs=self._n_envs,
+            action_dim=self._actions_dim,
+            action_names=self._get_action_names(),
+            env_names=None,
+            is_server=True,
+            verbose=self._verbose,
+            vlevel=self._vlevel,
+            safe=True,
+            force_reconnection=True,
+            with_gpu_mirror=self._use_gpu,
+            fill_value=0.0)
+        self._actual_actions.run()
+            
+    def _init_rewards(self):
+        
+        reward_thresh_default = 1.0
+        n_sub_rewards = len(self._get_rewards_names())
+        device = "cuda" if self._use_gpu else "cpu"
+        self._reward_thresh_lb = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=-reward_thresh_default, device=device) # used for clipping rewards
+        self._reward_thresh_ub = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=reward_thresh_default, device=device) 
+
+        self._sub_rewards = SubRewards(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            n_rewards=n_sub_rewards,
+                            reward_names=self._get_rewards_names(),
+                            env_names=None,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            fill_value=0.0)
+        
+        self._tot_rewards = TotRewards(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            reward_names=["total_reward"],
+                            env_names=None,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            fill_value=0.0)
+        
+        self._sub_rewards.run()
+        self._tot_rewards.run()
+
+        self._substep_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone() 
+        # used to hold substep rewards (not super mem. efficient)
+        self._is_substep_rew = torch.zeros((self._substep_rewards.shape[1],),dtype=torch.bool,device=device)
+        self._is_substep_rew.fill_(True) # default to all substep rewards
+   
+        self._episodic_rewards_metrics = EpisodicRewards(reward_tensor=self._sub_rewards.get_torch_mirror(),
+                                        reward_names=self._get_rewards_names(),
+                                        ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+                                        store_transitions=self._full_db,
+                                        max_ep_duration=self._max_ep_length())
+        self._episodic_rewards_metrics.set_constant_data_scaling(scaling=self._get_reward_scaling())
+        
+    def _get_reward_scaling(self):
+        # to be overridden by child (default to no scaling)
+        return 1
+    
+    def _max_ep_length(self):
+        #.should be overriden by child 
+        return self._env_opts["episode_timeout_ub"]
+    
+    def _init_custom_db_data(self):
+
+        self.custom_db_data = {}
+        # by default always log this contact data
+        rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror()
+        contact_names = self._rhc_refs.rob_refs.contact_names()
+        stepping_data = EpisodicData("RhcRefsFlag", rhc_latest_contact_ref, contact_names,
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        self._add_custom_db_data(db_data=stepping_data)
+        
+        # log also action data
+        actions = self._actions.get_torch_mirror()
+        action_names = self._get_action_names()
+        action_data = EpisodicData("Actions", actions, action_names,
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        self._add_custom_db_data(db_data=action_data)
+        
+        # and observations
+        observations = self._obs.get_torch_mirror()
+        observations_names = self._get_obs_names()
+        obs_data = EpisodicData("Obs", observations, observations_names,
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        self._add_custom_db_data(db_data=obs_data)
+
+        # log sub-term and sub-truncations data
+        t_scaling=1 # 1 so that we log an interpretable data in terms of why the episode finished
+        data_scaling = torch.full((self._n_envs, 1),
+                    fill_value=t_scaling,
+                    dtype=torch.int32,device="cpu")
+        sub_term = self._sub_terminations.get_torch_mirror()
+        term = self._terminations.get_torch_mirror()
+        sub_termination_names = self.sub_term_names()
+    
+        sub_term_data = EpisodicData("SubTerminations", sub_term, sub_termination_names,
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        sub_term_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+        self._add_custom_db_data(db_data=sub_term_data)
+        term_data = EpisodicData("Terminations", term, ["terminations"],
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        term_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+        self._add_custom_db_data(db_data=term_data)
+        
+        sub_trunc = self._sub_truncations.get_torch_mirror()
+        trunc = self._truncations.get_torch_mirror()
+        sub_truncations_names = self.sub_trunc_names()
+        sub_trunc_data = EpisodicData("SubTruncations", sub_trunc, sub_truncations_names,
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        sub_trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+        self._add_custom_db_data(db_data=sub_trunc_data)
+        trunc_data = EpisodicData("Truncations", trunc, ["truncations"],
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling)
+        self._add_custom_db_data(db_data=trunc_data)
+
+    def _add_custom_db_data(self, db_data: EpisodicData):
+        self.custom_db_data[db_data.name()] = db_data
+
+    def _init_terminations(self):
+
+        # Boolean array indicating whether each environment episode has terminated after 
+        # the current step. An episode termination could occur based on predefined conditions
+        # in the environment, such as reaching a goal or exceeding a time limit.
+
+        self._terminations = Terminations(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            fill_value=False) 
+        self._terminations.run()
+
+        sub_t_names = self.sub_term_names()
+        self._sub_terminations = SubTerminations(namespace=self._namespace,
+                n_envs=self._n_envs,
+                n_term=len(sub_t_names),
+                term_names=sub_t_names,
+                is_server=True,
+                verbose=self._verbose,
+                vlevel=self._vlevel,
+                safe=True,
+                force_reconnection=True,
+                with_gpu_mirror=self._use_gpu,
+                fill_value=False)
+        self._sub_terminations.run()
+
+        device = "cuda" if self._use_gpu else "cpu"
+        self._is_capsized=torch.zeros((self._n_envs,1), 
+            dtype=torch.bool, device=device)
+        self._is_rhc_capsized=torch.zeros((self._n_envs,1), 
+            dtype=torch.bool, device=device)
+        self._max_pitch_angle=60.0*math.pi/180.0
+    
+    def _init_truncations(self):
+        
+        self._truncations = Truncations(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            fill_value=False) 
+        
+        self._truncations.run()
+
+        sub_trc_names = self.sub_trunc_names()
+        self._sub_truncations = SubTruncations(namespace=self._namespace,
+                n_envs=self._n_envs,
+                n_trunc=len(sub_trc_names),
+                truc_names=sub_trc_names,
+                is_server=True,
+                verbose=self._verbose,
+                vlevel=self._vlevel,
+                safe=True,
+                force_reconnection=True,
+                with_gpu_mirror=self._use_gpu,
+                fill_value=False)
+        self._sub_truncations.run()
+    
+    def _update_jnt_blacklist(self):
+        device = "cuda" if self._use_gpu else "cpu"
+        all_available_jnts=self.get_observed_joints()        
+        blacklist=[]
+        for i in range(len(all_available_jnts)):
+            for pattern in self._jnt_q_blacklist_patterns:
+                if pattern in all_available_jnts[i]:
+                    # stop at first pattern match
+                    blacklist.append(i)
+                    break
+        if not len(blacklist)==0:
+            self._jnt_q_blacklist_idxs=torch.tensor(blacklist, dtype=torch.int, device=device)
+            
+    def _attach_to_shared_mem(self):
+
+        # runs shared mem clients for getting observation and setting RHC commands
+
+        # remote stepping data
+        self._remote_stepper = RemoteStepperSrvr(namespace=self._namespace,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            force_reconnection=True)
+        self._remote_stepper.run()
+        self._remote_resetter = RemoteResetSrvr(namespace=self._namespace,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            force_reconnection=True)
+        self._remote_resetter.run()
+        self._remote_reset_req = RemoteResetRequest(namespace=self._namespace,
+                                            is_server=False, 
+                                            verbose=self._verbose,
+                                            vlevel=self._vlevel,
+                                            safe=True)
+        self._remote_reset_req.run()
+
+        self._jnts_remapping=None
+        self._jnt_q_blacklist_idxs=None
+
+        self._robot_state = RobotState(namespace=self._namespace,
+                                is_server=False, 
+                                safe=self._safe_shared_mem,
+                                verbose=self._verbose,
+                                vlevel=self._vlevel,
+                                with_gpu_mirror=self._use_gpu,
+                                with_torch_view=True,
+                                enable_height_sensor=self._env_opts["add_heightmap_obs"])
+        
+        self._rhc_cmds = RhcCmds(namespace=self._namespace,
+                                is_server=False, 
+                                safe=self._safe_shared_mem,
+                                verbose=self._verbose,
+                                vlevel=self._vlevel,
+                                with_gpu_mirror=self._use_gpu,
+                                with_torch_view=True)
+        
+        self._rhc_pred = RhcPred(namespace=self._namespace,
+                                is_server=False, 
+                                safe=self._safe_shared_mem,
+                                verbose=self._verbose,
+                                vlevel=self._vlevel,
+                                with_gpu_mirror=self._use_gpu,
+                                with_torch_view=True)
+
+        self._rhc_refs = RhcRefs(namespace=self._namespace,
+                            is_server=False,
+                            safe=self._safe_shared_mem,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            with_gpu_mirror=self._use_gpu,
+                            with_torch_view=True)
+
+        self._rhc_status = RhcStatus(namespace=self._namespace,
+                                is_server=False,
+                                verbose=self._verbose,
+                                vlevel=self._vlevel,
+                                with_gpu_mirror=self._use_gpu,
+                                with_torch_view=True)
+        
+        self._robot_state.run()
+        self._n_envs = self._robot_state.n_robots()
+        self._n_jnts = self._robot_state.n_jnts()
+        self._n_contacts = self._robot_state.n_contacts() # we assume same n contacts for all rhcs for now
+
+        self._rhc_cmds.run()
+        self._rhc_pred.run()
+        self._rhc_refs.run()
+        self._rhc_status.run()
+        # we read rhc info now and just this time, since it's assumed to be static 
+        self._check_controllers_registered(retry=True) # blocking
+        # (we need controllers to be connected to read meaningful data)
+
+        self._rhc_status.rhc_static_info.synch_all(read=True,retry=True)
+        if self._use_gpu:
+            self._rhc_status.rhc_static_info.synch_mirror(from_gpu=False,non_blocking=False)
+        rhc_horizons=self._rhc_status.rhc_static_info.get("horizons",gpu=self._use_gpu)
+        rhc_nnodes=self._rhc_status.rhc_static_info.get("nnodes",gpu=self._use_gpu)
+        rhc_dts=self._rhc_status.rhc_static_info.get("dts",gpu=self._use_gpu)
+
+        # height sensor metadata (client side)
+        if self._env_opts["add_heightmap_obs"]:
+            self._height_grid_size = self._robot_state.height_sensor.grid_size
+            self._height_flat_dim = self._robot_state.height_sensor.n_cols
+        rhc_ncontacts=self._rhc_status.rhc_static_info.get("ncontacts",gpu=self._use_gpu)
+        robot_mass=self._rhc_status.rhc_static_info.get("robot_mass",gpu=self._use_gpu)
+        pred_node_idxs_rhc=self._rhc_status.rhc_static_info.get("pred_node_idx",gpu=self._use_gpu)
+
+        self._n_nodes_rhc=torch.round(rhc_nnodes) # we assume nodes are static during an env lifetime
+        self._rhc_horizons=rhc_horizons
+        self._rhc_dts=rhc_dts
+        self._n_contacts_rhc=rhc_ncontacts
+        self._rhc_robot_masses=robot_mass
+        if (self._rhc_robot_masses == 0).any():
+            zero_indices = torch.nonzero(self._rhc_robot_masses == 0, as_tuple=True)
+            print(zero_indices)  # This will print the indices of zero elements
+            Journal.log(self.__class__.__name__,
+                "_attach_to_shared_mem",
+                "Found at least one robot with 0 mass from RHC static info!!",
+                LogType.EXCEP,
+                throw_when_excep=True)
+
+        self._rhc_robot_weight=robot_mass*9.81
+        self._pred_node_idxs_rhc=pred_node_idxs_rhc
+        self._pred_horizon_rhc=self._pred_node_idxs_rhc*self._rhc_dts
+
+        # run server for agent commands
+        self._agent_refs = AgentRefs(namespace=self._namespace,
+                                is_server=True,
+                                n_robots=self._n_envs,
+                                n_jnts=self._robot_state.n_jnts(),
+                                n_contacts=self._robot_state.n_contacts(),
+                                contact_names=self._robot_state.contact_names(),
+                                q_remapping=None,
+                                with_gpu_mirror=self._use_gpu,
+                                force_reconnection=True,
+                                safe=False,
+                                verbose=self._verbose,
+                                vlevel=self._vlevel,
+                                fill_value=0)
+        self._agent_refs.run()
+        q_init_agent_refs=torch.full_like(self._robot_state.root_state.get(data_type="q", gpu=self._use_gpu),fill_value=0.0)
+        q_init_agent_refs[:, 0]=1.0
+        self._agent_refs.rob_refs.root_state.set(data_type="q", data=q_init_agent_refs,
+                gpu=self._use_gpu)
+        if self._use_gpu:
+            self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=True) 
+        self._agent_refs.rob_refs.root_state.synch_all(read=False, retry=True)
+        # episode steps counters (for detecting episode truncations for 
+        # time limits) 
+        self._ep_timeout_counter = EpisodesCounter(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            n_steps_lb=self._env_opts["episode_timeout_lb"],
+                            n_steps_ub=self._env_opts["episode_timeout_ub"],
+                            randomize_offsets_at_startup=True, # this has to be randomized
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            debug=self._debug) # handles step counter through episodes and through envs
+        self._ep_timeout_counter.run()
+        self._task_rand_counter = TaskRandCounter(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            n_steps_lb=self._env_opts["n_steps_task_rand_lb"],
+                            n_steps_ub=self._env_opts["n_steps_task_rand_ub"],
+                            randomize_offsets_at_startup=False, # not necessary since it will be synched with the timeout counter
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            debug=self._debug) # handles step counter through episodes and through envs
+        self._task_rand_counter.run()
+        self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter)
+        if self._env_opts["use_random_trunc"]:
+            self._rand_trunc_counter=RandomTruncCounter(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            n_steps_lb=self._env_opts["random_trunc_freq"]-self._env_opts["random_trunc_freq_delta"],
+                            n_steps_ub=self._env_opts["random_trunc_freq"],
+                            randomize_offsets_at_startup=True,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            debug=False)
+            self._rand_trunc_counter.run()
+            # self._rand_trunc_counter.sync_counters(other_counter=self._ep_timeout_counter)
+        if self._env_opts["use_random_safety_reset"]:
+            self._rand_safety_reset_counter=SafetyRandResetsCounter(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            n_steps_lb=self._env_opts["random_reset_freq"],
+                            n_steps_ub=self._env_opts["random_reset_freq"],
+                            randomize_offsets_at_startup=True,
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            debug=False)
+            self._rand_safety_reset_counter.run()
+            # self._rand_safety_reset_counter.sync_counters(other_counter=self._ep_timeout_counter)
+
+        # timer to track abs time in each env (reset logic to be implemented in child)
+        self._substep_abs_counter = SubStepAbsCounter(namespace=self._namespace,
+                            n_envs=self._n_envs,
+                            n_steps_lb=1e9,
+                            n_steps_ub=1e9,
+                            randomize_offsets_at_startup=True, # randomizing startup offsets
+                            is_server=True,
+                            verbose=self._verbose,
+                            vlevel=self._vlevel,
+                            safe=True,
+                            force_reconnection=True,
+                            with_gpu_mirror=self._use_gpu,
+                            debug=self._debug)
+        self._substep_abs_counter.run()
+
+        # debug data servers
+        traing_env_param_dict = {}
+        traing_env_param_dict["use_gpu"] = self._use_gpu
+        traing_env_param_dict["debug"] = self._is_debug
+        traing_env_param_dict["n_preinit_steps"] = self._env_opts["n_preinit_steps"]
+        traing_env_param_dict["n_preinit_steps"] = self._n_envs
+        
+        self._training_sim_info = SharedTrainingEnvInfo(namespace=self._namespace,
+                is_server=True, 
+                training_env_params_dict=traing_env_param_dict,
+                safe=False,
+                force_reconnection=True,
+                verbose=self._verbose,
+                vlevel=self._vlevel)
+        self._training_sim_info.run()
+
+        self._observed_jnt_names=self.set_observed_joints()
+        self._set_jnts_blacklist_pattern()
+        self._update_jnt_blacklist()
+
+        self._prev_root_p_substep=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone()
+        self._prev_root_q_substep=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone()
+        self._prev_root_p_step=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone()
+        self._prev_root_q_step=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone()
+        
+    def _activate_rhc_controllers(self):
+        self._rhc_status.activation_state.get_torch_mirror()[:, :] = True
+        self._rhc_status.activation_state.synch_all(read=False, retry=True) # activates all controllers
+    
+    def _synch_state(self,
+            gpu: bool = True):
+
+        # read from shared memory on CPU
+        # robot state
+        self._robot_state.root_state.synch_all(read = True, retry = True)
+        self._robot_state.jnts_state.synch_all(read = True, retry = True)
+        # rhc cmds
+        self._rhc_cmds.root_state.synch_all(read = True, retry = True)
+        self._rhc_cmds.jnts_state.synch_all(read = True, retry = True)
+        self._rhc_cmds.contact_wrenches.synch_all(read = True, retry = True)
+        # rhc pred
+        self._rhc_pred.root_state.synch_all(read = True, retry = True)
+        # self._rhc_pred.jnts_state.synch_all(read = True, retry = True)
+        # self._rhc_pred.contact_wrenches.synch_all(read = True, retry = True)
+        # refs for root link and contacts
+        self._rhc_refs.rob_refs.root_state.synch_all(read = True, retry = True)
+        self._rhc_refs.contact_flags.synch_all(read = True, retry = True)
+        self._rhc_refs.flight_info.synch_all(read = True, retry = True)
+        self._rhc_refs.flight_settings_req.synch_all(read = True, retry = True)
+        self._rhc_refs.rob_refs.contact_pos.synch_all(read = True, retry = True)
+        # rhc cost
+        self._rhc_status.rhc_cost.synch_all(read = True, retry = True)
+        # rhc constr. violations
+        self._rhc_status.rhc_constr_viol.synch_all(read = True, retry = True)
+        # failure states
+        self._rhc_status.fails.synch_all(read = True, retry = True)
+        # tot cost and cnstr viol on nodes + step variable
+        self._rhc_status.rhc_nodes_cost.synch_all(read = True, retry = True)
+        self._rhc_status.rhc_nodes_constr_viol.synch_all(read = True, retry = True)
+        self._rhc_status.rhc_fcn.synch_all(read = True, retry = True)
+        self._rhc_status.rhc_fail_idx.synch_all(read = True, retry = True)
+        if self._env_opts["add_heightmap_obs"]:
+            self._robot_state.height_sensor.synch_all(read=True, retry=True)
+        if gpu:
+            # copies data to "mirror" on GPU --> we can do it non-blocking since
+            # in this direction it should be safe
+            self._robot_state.root_state.synch_mirror(from_gpu=False,non_blocking=True) # copies shared data on GPU
+            self._robot_state.jnts_state.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_cmds.root_state.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_cmds.jnts_state.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_cmds.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_pred.root_state.synch_mirror(from_gpu=False,non_blocking=True)
+            # self._rhc_pred.jnts_state.synch_mirror(from_gpu=False,non_blocking=True)
+            # self._rhc_pred.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_refs.contact_flags.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_refs.flight_info.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_refs.flight_settings_req.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_status.rhc_cost.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_status.rhc_constr_viol.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_status.fails.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_status.rhc_nodes_cost.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_status.rhc_nodes_constr_viol.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_status.rhc_fcn.synch_mirror(from_gpu=False,non_blocking=True)
+            self._rhc_status.rhc_fail_idx.synch_mirror(from_gpu=False,non_blocking=True)
+            if self._env_opts["add_heightmap_obs"]:
+                self._robot_state.height_sensor.synch_mirror(from_gpu=False, non_blocking=True)
+            torch.cuda.synchronize() # ensuring that all the streams on the GPU are completed \
+            # before the CPU continues execution
+    
+    def _override_refs(self,
+            env_indxs: torch.Tensor = None):
+
+        # just used for setting agent refs externally (i.e. from shared mem on CPU)
+        self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem
+        if self._use_gpu:
+            # copies latest refs to GPU 
+            self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) 
+
+    def _clamp_obs(self, 
+            obs: torch.Tensor):
+        if self._is_debug:
+            self._check_finite(obs, "observations", False)
+        torch.nan_to_num(input=obs, out=obs, nan=self._obs_threshold_ub, 
+            posinf=self._obs_threshold_ub, 
+            neginf=self._obs_threshold_lb) # prevent nans
+
+        obs.clamp_(self._obs_threshold_lb, self._obs_threshold_ub)
+    
+    def _clamp_rewards(self, 
+            rewards: torch.Tensor):
+        if self._is_debug:
+            self._check_finite(rewards, "rewards", False)
+        torch.nan_to_num(input=rewards, out=rewards, nan=0.0, 
+            posinf=None, 
+            neginf=None) # prevent nans
+        rewards.clamp_(self._reward_thresh_lb, self._reward_thresh_ub)
+
+    def get_actions_lb(self):
+        return self._actions_lb
+
+    def get_actions_ub(self):
+        return self._actions_ub
+    
+    def get_actions_scale(self):
+        return self._actions_scale
+    
+    def get_actions_offset(self):
+        return self._actions_offset
+    
+    def get_obs_lb(self):
+        return self._obs_lb
+
+    def get_obs_ub(self):
+        return self._obs_ub
+    
+    def get_obs_scale(self):
+        self._obs_scale = (self._obs_ub - self._obs_lb)/2.0
+        return self._obs_scale
+    
+    def get_obs_offset(self):
+        self._obs_offset = (self._obs_ub + self._obs_lb)/2.0
+        return self._obs_offset
+    
+    def switch_random_reset(self, on: bool = True):
+        self._random_reset_active=on
+
+    def set_jnts_remapping(self, 
+        remapping: List = None):
+
+        self._jnts_remapping=remapping
+        if self._jnts_remapping is not None:
+            self._robot_state.set_jnts_remapping(jnts_remapping=self._jnts_remapping)
+            self._rhc_cmds.set_jnts_remapping(jnts_remapping=self._jnts_remapping)
+            self._rhc_pred.set_jnts_remapping(jnts_remapping=self._jnts_remapping)
+            # we need to also update the list of observed joints to match
+            available_joints=self._robot_state.jnt_names()
+            # the remapping ordering 
+            self._observed_jnt_names=[]
+            for i in range(len(available_joints)):
+                self._observed_jnt_names.append(available_joints[self._jnts_remapping[i]])
+
+            self._update_jnt_blacklist()
+
+            updated_obs_names=self._get_obs_names() # get updated obs names (should use get_observed_joints
+            # internally, so that jnt names are updated)
+
+            # also update jnt obs names on shared memory
+            names_old=self._obs.get_obs_names()
+            names_old_next=self._next_obs.get_obs_names()
+            names_old[:]=updated_obs_names
+            names_old_next[:]=updated_obs_names
+            self._obs.update_names()
+            self._next_obs.update_names()
+
+            # also update 
+            if "Obs" in self.custom_db_data:
+                db_obs_names=self.custom_db_data["Obs"].data_names()
+                db_obs_names[:]=updated_obs_names
+
+    def _check_finite(self, 
+                tensor: torch.Tensor,
+                name: str, 
+                throw: bool = False):
+        if not torch.isfinite(tensor).all().item():
+            exception = f"Found nonfinite elements in {name} tensor!!"            
+            non_finite_idxs=torch.nonzero(~torch.isfinite(tensor))
+            n_nonf_elems=non_finite_idxs.shape[0]
+
+            if name=="observations":
+                for i in range(n_nonf_elems):
+                    db_msg=f"{self.obs_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \
+                        f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}"
+                    print(db_msg)
+            if name=="rewards":
+                for i in range(n_nonf_elems):
+                    db_msg=f"{self.sub_rew_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \
+                        f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}"
+                    print(db_msg)
+            print(tensor)
+            Journal.log(self.__class__.__name__,
+                "_check_finite",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = throw)
+            
+    def _check_controllers_registered(self, 
+                retry: bool = False):
+
+        if retry:
+            self._rhc_status.controllers_counter.synch_all(read=True, retry=True)
+            n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item()
+            while not (n_connected_controllers == self._n_envs):
+                warn = f"Expected {self._n_envs} controllers to be connected during training, " + \
+                    f"but got {n_connected_controllers}. Will wait for all to be connected..."
+                Journal.log(self.__class__.__name__,
+                    "_check_controllers_registered",
+                    warn,
+                    LogType.WARN,
+                    throw_when_excep = False)
+                nsecs = int(2 * 1000000000)
+                PerfSleep.thread_sleep(nsecs) 
+                self._rhc_status.controllers_counter.synch_all(read=True, retry=True)
+                n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item()
+            info = f"All {n_connected_controllers} controllers connected!"
+            Journal.log(self.__class__.__name__,
+                "_check_controllers_registered",
+                info,
+                LogType.INFO,
+                throw_when_excep = False)
+            return True
+        else:
+            self._rhc_status.controllers_counter.synch_all(read=True, retry=True)
+            n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item()
+            if not (n_connected_controllers == self._n_envs):
+                exception = f"Expected {self._n_envs} controllers to be connected during training, " + \
+                    f"but got {n_connected_controllers}. Aborting..."
+                Journal.log(self.__class__.__name__,
+                    "_check_controllers_registered",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = False)
+                return False
+            return True
+    
+    def _check_truncations(self):
+        
+        self._check_sub_truncations()
+        sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu)
+        truncations = self._truncations.get_torch_mirror(gpu=self._use_gpu)
+        truncations[:, :] = torch.any(sub_truncations,dim=1,keepdim=True)
+
+    def _check_terminations(self):
+        
+        self._check_sub_terminations()
+        sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu)
+        terminations = self._terminations.get_torch_mirror(gpu=self._use_gpu)
+        terminations[:, :] = torch.any(sub_terminations,dim=1,keepdim=True)
+
+    def _check_sub_truncations(self):
+        # default behaviour-> to be overriden by child
+        sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu)
+        sub_truncations[:, 0:1]=self._ep_timeout_counter.time_limits_reached()
+
+    def _check_sub_terminations(self):
+        # default behaviour-> to be overriden by child
+        sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu)
+        robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+        robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu)
+
+        # terminate when either the real robot or the prediction from the MPC are capsized
+        check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle,
+            output_t=self._is_capsized)
+        check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle,
+            output_t=self._is_rhc_capsized)
+        
+        sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu)
+        sub_terminations[:, 1:2] = self._is_capsized
+        sub_terminations[:, 2:3] = self._is_rhc_capsized
+
+    def is_action_continuous(self):
+        return self._is_continuous_actions
+    
+    def is_action_discrete(self):
+        return ~self._is_continuous_actions
+
+    @abstractmethod
+    def _pre_substep(self):
+        pass
+    
+    @abstractmethod
+    def _custom_post_step(self,episode_finished):
+        pass
+    
+    @abstractmethod
+    def _custom_post_substp_post_rew(self):
+        pass
+    
+    @abstractmethod
+    def _custom_post_substp_pre_rew(self):
+        pass
+
+    @abstractmethod
+    def _apply_actions_to_rhc(self):
+        pass
+    
+    def _override_actions_with_demo(self):
+        pass
+
+    @abstractmethod
+    def _compute_substep_rewards(self):
+        pass
+    
+    @abstractmethod
+    def _set_substep_rew(self):
+        pass
+
+    @abstractmethod
+    def _set_substep_obs(self):
+        pass
+
+    @abstractmethod
+    def _compute_step_rewards(self):
+        pass
+    
+    @abstractmethod
+    def _fill_substep_obs(self,
+            obs: torch.Tensor):
+        pass
+    
+    @abstractmethod
+    def _fill_step_obs(self,
+            obs: torch.Tensor):
+        pass
+
+    @abstractmethod
+    def _randomize_task_refs(self,
+                env_indxs: torch.Tensor = None):
+        pass
+
+    def _custom_post_init(self):
+        pass
+    
+    def _get_avrg_substep_root_twist(self, 
+            out: torch.Tensor,
+            base_loc: bool = True):
+        # to be called at each substep
+        robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+        robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+        root_v_avrg_w=(robot_p_meas-self._prev_root_p_substep)/self._substep_dt
+        root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_substep,robot_q_meas),\
+            dt=self._substep_dt)
+        twist_w=torch.cat((root_v_avrg_w, 
+            root_omega_avrg_w), 
+            dim=1)
+        if not base_loc:
+            self._prev_root_p_substep[:, :]=robot_p_meas
+            self._prev_root_q_substep[:, :]=robot_q_meas
+            out[:, :]=twist_w
+        # rotate using the current (end-of-substep) orientation for consistency with other signals
+        world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out)
+        self._prev_root_p_substep[:, :]=robot_p_meas
+        self._prev_root_q_substep[:, :]=robot_q_meas
+
+    def _get_avrg_step_root_twist(self, 
+            out: torch.Tensor,
+            base_loc: bool = True):
+        # to be called after substeps of actions repeats
+        robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)
+        robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+
+        dt=self._substep_dt*self._action_repeat # accounting for frame skipping
+        root_v_avrg_w=(robot_p_meas-self._prev_root_p_step)/(dt)
+        root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_step,robot_q_meas),\
+            dt=dt)
+        twist_w=torch.cat((root_v_avrg_w, 
+            root_omega_avrg_w), 
+            dim=1)
+        if not base_loc:
+            out[:, :]=twist_w
+        # rotate using the current (end-of-step) orientation for consistency with other signals
+        world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out)
+
+    def _get_avrg_rhc_root_twist(self,
+            out: torch.Tensor,
+            base_loc: bool = True):
+        
+        rhc_root_p =self._rhc_cmds.root_state.get(data_type="p",gpu=self._use_gpu)
+        rhc_root_q =self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu)
+        rhc_root_p_pred =self._rhc_pred.root_state.get(data_type="p",gpu=self._use_gpu)
+        rhc_root_q_pred =self._rhc_pred.root_state.get(data_type="q",gpu=self._use_gpu)
+
+        rhc_root_v_avrg_rhc_w=(rhc_root_p_pred-rhc_root_p)/self._pred_horizon_rhc
+        rhc_root_omega_avrg_rhc_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(rhc_root_q,rhc_root_q_pred),\
+            dt=self._pred_horizon_rhc)
+    
+        rhc_pred_avrg_twist_rhc_w = torch.cat((rhc_root_v_avrg_rhc_w, 
+            rhc_root_omega_avrg_rhc_w), 
+            dim=1)
+        if not base_loc:
+            out[:, :]=rhc_pred_avrg_twist_rhc_w
+        # to rhc base frame (using first node as reference)
+        world2base_frame(t_w=rhc_pred_avrg_twist_rhc_w, q_b=rhc_root_q, t_out=out)
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/twist_tracking_env.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/twist_tracking_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..adeb8da8fdb11895a2c306052abc80f3b9126c39
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/twist_tracking_env.py
@@ -0,0 +1,1395 @@
+from typing import Dict
+
+import os
+
+import torch
+
+from EigenIPC.PyEigenIPC import VLevel, LogType, Journal
+
+from mpc_hive.utilities.shared_data.rhc_data import RobotState, RhcStatus, RhcRefs
+from mpc_hive.utilities.math_utils_torch import world2base_frame, base2world_frame, w2hor_frame
+
+from aug_mpc.utils.sys_utils import PathsGetter
+from aug_mpc.utils.timers import PeriodicTimer
+from aug_mpc.utils.episodic_data import EpisodicData
+from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother
+from aug_mpc.utils.math_utils import check_capsize
+from aug_mpc.training_envs.training_env_base import AugMPCTrainingEnvBase
+
+class TwistTrackingEnv(AugMPCTrainingEnvBase):
+    """Base AugMPC training env that tracks commanded twists by pushing velocity and contact targets into the RHC controller while handling locomotion rewards/resets."""
+
+    def __init__(self,
+            namespace: str,
+            actions_dim: int = 10,
+            verbose: bool = False,
+            vlevel: VLevel = VLevel.V1,
+            use_gpu: bool = True,
+            dtype: torch.dtype = torch.float32,
+            debug: bool = True,
+            override_agent_refs: bool = False,
+            timeout_ms: int = 60000,
+            env_opts: Dict = {}):
+        
+        env_name = "TwistTrackingEnv"
+
+        self._add_env_opt(env_opts, "srew_drescaling", 
+            False)
+        
+        self._add_env_opt(env_opts, "step_thresh", 0.) # when step action < thresh, a step is requested
+
+        # counters settings
+        self._add_env_opt(env_opts, "single_task_ref_per_episode", 
+            True # if True, the task ref is constant over the episode (ie
+            # episodes are truncated when task is changed) 
+            )
+        self._add_env_opt(env_opts, "add_angvel_ref_rand", default=True) # randomize also agular vel ref (just z component)
+
+        self._add_env_opt(env_opts, "episode_timeout_lb", 
+            1024)
+        self._add_env_opt(env_opts, "episode_timeout_ub", 
+            1024)
+        self._add_env_opt(env_opts, "n_steps_task_rand_lb", 
+            512)
+        self._add_env_opt(env_opts, "n_steps_task_rand_ub", 
+            512)
+        self._add_env_opt(env_opts, "use_random_safety_reset", 
+            True)
+        self._add_env_opt(env_opts, "random_reset_freq", 
+            10) # a random reset once every n-episodes (per env)
+        self._add_env_opt(env_opts, "use_random_trunc", 
+            True)
+        self._add_env_opt(env_opts, "random_trunc_freq", 
+            env_opts["episode_timeout_ub"]*5) # to remove temporal correlations between envs
+        self._add_env_opt(env_opts, "random_trunc_freq_delta", 
+            env_opts["episode_timeout_ub"]*2)  # to randomize trunc frequency between envs
+    
+        if not env_opts["single_task_ref_per_episode"]:
+            env_opts["random_reset_freq"]=int(env_opts["random_reset_freq"]/\
+                (env_opts["episode_timeout_lb"]/float(env_opts["n_steps_task_rand_lb"])))
+        
+        self._add_env_opt(env_opts, "action_repeat", 1) # frame skipping (different agent action every action_repeat
+        # env substeps)
+        
+        self._add_env_opt(env_opts, "n_preinit_steps", 1) # n steps of the controllers to properly initialize everything
+        
+        self._add_env_opt(env_opts, "vec_ep_freq_metrics_db", 1) # n eps over which debug metrics are reported
+        self._add_env_opt(env_opts, "demo_envs_perc", 0.0)
+        self._add_env_opt(env_opts, "max_cmd_v", 1.0) # maximum cmd v for lin v actions (single component)
+        self._add_env_opt(env_opts, "max_cmd_omega", 0.5) # maximum cmd v for omega v actions (single component)
+
+        # action smoothing
+        self._add_env_opt(env_opts, "use_action_smoothing", False)
+        self._add_env_opt(env_opts, "smoothing_horizon_c", 0.01)
+        self._add_env_opt(env_opts, "smoothing_horizon_d", 0.03)
+
+        # whether to smooth vel error signal
+        self._add_env_opt(env_opts, "use_track_reward_smoother", False)
+        self._add_env_opt(env_opts, "smoothing_horizon_vel_err", 0.08)
+        self._add_env_opt(env_opts, "track_rew_smoother", None)
+
+        # rewards
+        self._reward_map={}
+        self._reward_lb_map={}
+
+        self._add_env_opt(env_opts, "reward_lb_default", -0.5)
+        self._add_env_opt(env_opts, "reward_ub_default", 1e6)
+
+        self._add_env_opt(env_opts, "task_error_reward_lb", -0.5)
+        self._add_env_opt(env_opts, "CoT_reward_lb", -0.5)
+        self._add_env_opt(env_opts, "power_reward_lb", -0.5)
+        self._add_env_opt(env_opts, "action_rate_reward_lb", -0.5)
+        self._add_env_opt(env_opts, "jnt_vel_reward_lb", -0.5)
+        self._add_env_opt(env_opts, "rhc_avrg_vel_reward_lb", -0.5)
+
+        self._add_env_opt(env_opts, "add_power_reward", False)
+        self._add_env_opt(env_opts, "add_CoT_reward", True)
+        self._add_env_opt(env_opts, "use_CoT_wrt_ref", False)
+        self._add_env_opt(env_opts, "add_action_rate_reward", True)
+        self._add_env_opt(env_opts, "add_jnt_v_reward", False)
+
+        self._add_env_opt(env_opts, "use_rhc_avrg_vel_tracking", False)
+
+        # task tracking
+        self._add_env_opt(env_opts, "use_relative_error", default=False) # use relative vel error (wrt current task norm)
+        self._add_env_opt(env_opts, "directional_tracking", default=True) # whether to compute tracking error based on reference direction
+        # if env_opts["add_angvel_ref_rand"]:
+        #     env_opts["directional_tracking"]=False
+
+        self._add_env_opt(env_opts, "use_L1_norm", default=True) # whether to use L1 norm for the error (otherwise L2)
+        self._add_env_opt(env_opts, "use_exp_track_rew", default=True) # whether to use a reward of the form A*e^(B*x), 
+        # otherwise A*(1-B*x)
+
+        self._add_env_opt(env_opts, "use_fail_idx_weight", default=False)
+        self._add_env_opt(env_opts, "task_track_offset_exp", default=1.0)
+        self._add_env_opt(env_opts, "task_track_scale_exp", default=5.0)
+        self._add_env_opt(env_opts, "task_track_offset", default=1.0)
+        self._add_env_opt(env_opts, "task_track_scale", default=1.5)
+        self._add_env_opt(env_opts, "task_track_front_weight", default=1.0)
+        self._add_env_opt(env_opts, "task_track_lat_weight", default=0.05)
+        self._add_env_opt(env_opts, "task_track_vert_weight", default=0.05)
+        self._add_env_opt(env_opts, "task_track_omega_z_weight", default=1.0)
+        self._add_env_opt(env_opts, "task_track_omega_x_weight", default=0.05)
+        self._add_env_opt(env_opts, "task_track_omega_y_weight", default=0.05)
+        # if env_opts["add_angvel_ref_rand"]:
+        #     env_opts["task_track_omega_x_weight"]=0.0
+        #     env_opts["task_track_omega_y_weight"]=0.0
+        #     env_opts["task_track_omega_z_weight"]=1.0
+
+        # task pred tracking
+        self._add_env_opt(env_opts, "task_pred_track_offset", default=1.0)
+        self._add_env_opt(env_opts, "task_pred_track_scale", default=3.0)
+
+        # energy penalties
+        self._add_env_opt(env_opts, "CoT_offset", default=0.1)
+        self._add_env_opt(env_opts, "CoT_scale", default=0.1)
+        self._add_env_opt(env_opts, "power_offset", default=0.1)
+        self._add_env_opt(env_opts, "power_scale", default=8e-4)
+
+        # action rate penalty
+        self._add_env_opt(env_opts, "action_rate_offset", default=0.1)
+        self._add_env_opt(env_opts, "action_rate_scale", default=2.0)
+        self._add_env_opt(env_opts, "action_rate_rew_d_weight", default=0.1)
+        self._add_env_opt(env_opts, "action_rate_rew_c_weight", default=1.0)
+
+        # jnt vel penalty
+        self._add_env_opt(env_opts, "jnt_vel_offset", default=0.1)
+        self._add_env_opt(env_opts, "jnt_vel_scale", default=2.0)
+
+        # terminations
+        self._add_env_opt(env_opts, "add_term_mpc_capsize", default=False) # add termination based on mpc capsizing prediction
+
+        # observations
+        self._add_env_opt(env_opts, "rhc_fail_idx_scale", default=1.0)
+        self._add_env_opt(env_opts, "use_action_history", default=True) # whether to add information on past actions to obs
+        self._add_env_opt(env_opts, "add_prev_actions_stats_to_obs", default=False) # add actions std, mean + last action over a horizon to obs (if self._use_action_history True)
+        self._add_env_opt(env_opts, "actions_history_size", default=3)
+        
+        self._add_env_opt(env_opts, "add_mpc_contact_f_to_obs", default=True) # add estimate vertical contact f to obs
+        self._add_env_opt(env_opts, "add_fail_idx_to_obs", default=True) # we need to obserse mpc failure idx to correlate it with terminations
+        
+        self._add_env_opt(env_opts, "use_linvel_from_rhc", default=True) # no lin vel meas available, we use est. from mpc
+        self._add_env_opt(env_opts, "add_flight_info", default=True) # add feedback info on pos, remamining duration, length, 
+        # apex and landing height of flight phases
+        self._add_env_opt(env_opts, "add_flight_settings", default=False) # add feedback info on current flight requests for mpc
+
+        self._add_env_opt(env_opts, "use_prob_based_stepping", default=False) # interpret actions as stepping prob (never worked)
+
+        self._add_env_opt(env_opts, "add_rhc_cmds_to_obs", default=True) # add the rhc cmds which are being applied now to the robot
+
+        if not "add_periodic_clock_to_obs" in env_opts:
+            # add a sin/cos clock to obs (useful if task is explicitly
+            # time-dependent)
+            self._add_env_opt(env_opts, "add_periodic_clock_to_obs", default=False) 
+
+        self._add_env_opt(env_opts, "add_heightmap_obs", default=False)         
+
+        # temporarily creating robot state client to get some data
+        robot_state_tmp = RobotState(namespace=namespace,
+                                is_server=False, 
+                                safe=False,
+                                verbose=verbose,
+                                vlevel=vlevel,
+                                with_gpu_mirror=False,
+                                with_torch_view=False,
+                                enable_height_sensor=env_opts["add_heightmap_obs"])
+        robot_state_tmp.run()
+        rhc_status_tmp = RhcStatus(is_server=False,
+                        namespace=namespace, 
+                        verbose=verbose, 
+                        vlevel=vlevel,
+                        with_torch_view=False, 
+                        with_gpu_mirror=False)
+        rhc_status_tmp.run()
+        rhc_refs_tmp = RhcRefs(namespace=namespace,
+                            is_server=False,
+                            safe=False,
+                            verbose=verbose,
+                            vlevel=vlevel,
+                            with_gpu_mirror=False,
+                            with_torch_view=False)
+        rhc_refs_tmp.run()
+        n_jnts = robot_state_tmp.n_jnts()
+        self._contact_names = robot_state_tmp.contact_names()
+        self._n_contacts = len(self._contact_names)
+        self._flight_info_size=rhc_refs_tmp.flight_info.n_cols
+        self._flight_setting_size=rhc_refs_tmp.flight_settings_req.n_cols
+        # height sensor metadata (if present)
+        self._height_grid_size = None
+        self._height_flat_dim = 0
+        if env_opts["add_heightmap_obs"]:
+            self._height_grid_size = robot_state_tmp.height_sensor.grid_size
+            self._height_flat_dim = robot_state_tmp.height_sensor.n_cols
+
+        robot_state_tmp.close()
+        rhc_status_tmp.close()
+        rhc_refs_tmp.close()
+
+        # defining obs dimension
+        obs_dim=3 # normalized gravity vector in base frame
+        obs_dim+=6 # meas twist in base frame
+        obs_dim+=2*n_jnts # joint pos + vel
+        if env_opts["add_mpc_contact_f_to_obs"]:
+            obs_dim+=3*self._n_contacts
+        obs_dim+=6 # twist reference in base frame frame
+        if env_opts["add_fail_idx_to_obs"]:
+            obs_dim+=1 # rhc controller failure index
+        if env_opts["add_term_mpc_capsize"]: 
+            obs_dim+=3 # gravity vec from mpc
+        if env_opts["use_rhc_avrg_vel_tracking"]:
+            obs_dim+=6 # mpc avrg twist
+        if env_opts["add_flight_info"]: # contact pos, remaining duration, length, apex, landing height, landing dx, dy
+            obs_dim+=self._flight_info_size
+        if env_opts["add_flight_settings"]:
+            obs_dim+=self._flight_setting_size
+        if env_opts["add_rhc_cmds_to_obs"]:
+            obs_dim+=3*n_jnts 
+        if env_opts["use_action_history"]:
+            if env_opts["add_prev_actions_stats_to_obs"]:
+                obs_dim+=3*actions_dim # previous agent actions statistics (mean, std + last action)
+            else: # full action history
+                obs_dim+=env_opts["actions_history_size"]*actions_dim
+        if env_opts["use_action_smoothing"]:
+            obs_dim+=actions_dim # it's better to also add the smoothed actions as obs
+        if env_opts["add_periodic_clock_to_obs"]:
+            obs_dim+=2
+        if env_opts["add_heightmap_obs"]:
+            obs_dim+=self._height_flat_dim
+        # Agent task reference
+        self._add_env_opt(env_opts, "use_pof0", default=True) # with some prob, references will be null
+        self._add_env_opt(env_opts, "pof0_linvel", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr)
+        self._add_env_opt(env_opts, "pof0_omega", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr)
+        self._add_env_opt(env_opts, "max_linvel_ref", default=0.3) # m/s
+        self._add_env_opt(env_opts, "max_angvel_ref", default=0.0) # rad/s
+        if env_opts["add_angvel_ref_rand"]:   
+            env_opts["max_angvel_ref"]=0.4
+
+        # ready to init base class
+        self._this_child_path = os.path.abspath(__file__)
+        AugMPCTrainingEnvBase.__init__(self,
+                    namespace=namespace,
+                    obs_dim=obs_dim,
+                    actions_dim=actions_dim,
+                    env_name=env_name,
+                    verbose=verbose,
+                    vlevel=vlevel,
+                    use_gpu=use_gpu,
+                    dtype=dtype,
+                    debug=debug,
+                    override_agent_refs=override_agent_refs,
+                    timeout_ms=timeout_ms,
+                    env_opts=env_opts)
+
+    def _custom_post_init(self):
+
+        device = "cuda" if self._use_gpu else "cpu"
+
+        self._update_jnt_blacklist() # update blacklist for joints
+
+        # constant base-frame unit vectors (reuse to avoid per-call allocations)
+        self._base_x_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device)
+        self._base_x_dir[:, 0] = 1.0
+        self._base_y_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device)
+        self._base_y_dir[:, 1] = 1.0
+
+        self._twist_ref_lb = torch.full((1, 6), dtype=self._dtype, device=device,
+                            fill_value=-1.5) 
+        self._twist_ref_ub = torch.full((1, 6), dtype=self._dtype, device=device,
+                            fill_value=1.5)
+        
+        # task reference parameters (world frame)
+        # lin vel
+        self._twist_ref_lb[0, 0] = -self._env_opts["max_linvel_ref"]
+        self._twist_ref_lb[0, 1] = -self._env_opts["max_linvel_ref"]
+        self._twist_ref_lb[0, 2] = 0.0
+        self._twist_ref_ub[0, 0] = self._env_opts["max_linvel_ref"]
+        self._twist_ref_ub[0, 1] = self._env_opts["max_linvel_ref"]
+        self._twist_ref_ub[0, 2] = 0.0
+        # angular vel
+        self._twist_ref_lb[0, 3] = 0.0
+        self._twist_ref_lb[0, 4] = 0.0
+        self._twist_ref_lb[0, 5] = -self._env_opts["max_angvel_ref"]
+        self._twist_ref_ub[0, 3] = 0.0
+        self._twist_ref_ub[0, 4] = 0.0
+        self._twist_ref_ub[0, 5] = self._env_opts["max_angvel_ref"]
+
+        self._twist_ref_offset = (self._twist_ref_ub + self._twist_ref_lb)/2.0
+        self._twist_ref_scale = (self._twist_ref_ub - self._twist_ref_lb)/2.0
+
+        # adding some custom db info 
+        agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=False)
+        agent_twist_ref_data = EpisodicData("AgentTwistRefs", agent_twist_ref, 
+            ["v_x", "v_y", "v_z", "omega_x", "omega_y", "omega_z"],
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        rhc_fail_idx = EpisodicData("RhcFailIdx", self._rhc_fail_idx(gpu=False), ["rhc_fail_idx"],
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        
+        f_names=[]
+        for contact in self._contact_names:
+            f_names.append(f"fc_{contact}_x_base_loc")
+            f_names.append(f"fc_{contact}_y_base_loc")
+            f_names.append(f"fc_{contact}_z_base_loc")
+        rhc_contact_f = EpisodicData("RhcContactForces", 
+            self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), 
+            f_names,
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+
+        self._pow_db_data=torch.full(size=(self._n_envs,2),
+                dtype=self._dtype, device="cpu",
+                fill_value=-1.0)
+        power_db = EpisodicData("Power", 
+            self._pow_db_data, 
+            ["CoT", "W"],
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+        
+        self._track_error_db=torch.full_like(agent_twist_ref, fill_value=0.0)
+        task_err_db = EpisodicData("TrackingError", 
+            agent_twist_ref, 
+            ["e_vx", "e_vy", "e_vz", "e_omegax", "e_omegay", "e_omegaz"],
+            ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"],
+            store_transitions=self._full_db,
+            max_ep_duration=self._max_ep_length())
+
+        self._add_custom_db_data(db_data=agent_twist_ref_data)
+        self._add_custom_db_data(db_data=rhc_fail_idx)
+        self._add_custom_db_data(db_data=rhc_contact_f)
+        self._add_custom_db_data(db_data=power_db)
+        self._add_custom_db_data(db_data=task_err_db)
+
+        # rewards
+        self._task_err_weights = torch.full((1, 6), dtype=self._dtype, device=device,
+                            fill_value=0.0) 
+        if self._env_opts["directional_tracking"]:
+            self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] # frontal
+            self._task_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] # lateral
+            self._task_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] # vertical
+            self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+            self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+            self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+        else:
+            self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"]
+            self._task_err_weights[0, 1] = self._env_opts["task_track_front_weight"]
+            self._task_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"]
+            self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+            self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+            self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+            
+        self._task_pred_err_weights = torch.full((1, 6), dtype=self._dtype, device=device,
+                            fill_value=0.0) 
+        if self._env_opts["directional_tracking"]:
+            self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"]
+            self._task_pred_err_weights[0, 1] = self._env_opts["task_track_lat_weight"]
+            self._task_pred_err_weights[0, 2] = self._env_opts["task_track_vert_weight"]
+            self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+            self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+            self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+        else:
+            self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"]
+            self._task_pred_err_weights[0, 1] = self._env_opts["task_track_front_weight"]
+            self._task_pred_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"]
+            self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"]
+            self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"]
+            self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"]
+
+        self._power_penalty_weights = torch.full((1, self._n_jnts), dtype=self._dtype, device=device,
+                            fill_value=1.0)
+        self._power_penalty_weights_sum = torch.sum(self._power_penalty_weights).item()
+        subr_names=self._get_rewards_names() # initializes
+        
+        # reward clipping
+        self._reward_thresh_lb[:, :] = self._env_opts["reward_lb_default"]
+        self._reward_thresh_ub[:, :]= self._env_opts["reward_ub_default"]
+
+        for reward_name, env_opt_key in self._reward_lb_map.items():
+            if reward_name in self._reward_map:
+                self._reward_thresh_lb[:, self._reward_map[reward_name]] = self._env_opts[env_opt_key]
+
+        # obs bounds
+        self._obs_threshold_lb = -1e3 # used for clipping observations
+        self._obs_threshold_ub = 1e3
+
+        # actions
+        if not self._env_opts["use_prob_based_stepping"]:
+            self._is_continuous_actions[6:10]=False
+
+        v_cmd_max = self._env_opts["max_cmd_v"]
+        omega_cmd_max = self._env_opts["max_cmd_omega"]
+        self._actions_lb[:, 0:3] = -v_cmd_max 
+        self._actions_ub[:, 0:3] = v_cmd_max  
+        self._actions_lb[:, 3:6] = -omega_cmd_max # twist cmds
+        self._actions_ub[:, 3:6] = omega_cmd_max  
+        if "contact_flag_start" in self._actions_map:
+            idx=self._actions_map["contact_flag_start"]
+            if self._env_opts["use_prob_based_stepping"]:
+                self._actions_lb[:, idx:idx+self._n_contacts] = 0.0 # contact flags
+                self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 
+            else:
+                self._actions_lb[:, idx:idx+self._n_contacts] = -1.0 
+                self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 
+        
+        self.default_action[:, :] = (self._actions_ub+self._actions_lb)/2.0
+        # self.default_action[:, ~self._is_continuous_actions] = 1.0
+        self.safe_action[:, :] = self.default_action
+        if "contact_flag_start" in self._actions_map: # safe actions for contacts is 1 (keep contact)
+            idx=self._actions_map["contact_flag_start"]
+            self.safe_action[:, idx:idx+self._n_contacts] = 1.0
+
+        # assign obs bounds (useful if not using automatic obs normalization)
+        obs_names=self._get_obs_names()
+        obs_patterns=["gn",
+            "linvel",
+            "omega",
+            "q_jnt",
+            "v_jnt",
+            "fc",
+            "rhc_fail",
+            "rhc_cmd_q",
+            "rhc_cmd_v",
+            "rhc_cmd_eff",
+            "flight_pos"
+            ]
+        obs_ubs=[1.0,
+            5*v_cmd_max,
+            5*omega_cmd_max,
+            2*torch.pi,
+            30.0,
+            2.0,
+            1.0,
+            2*torch.pi,
+            30.0,
+            200.0,
+            self._n_nodes_rhc.mean().item()]
+        obs_lbs=[-1.0,
+            -5*v_cmd_max,
+            -5*omega_cmd_max,
+            -2*torch.pi,
+            -30.0,
+            -2.0,
+            0.0,
+            -2*torch.pi,
+            -30.0,
+            -200.0,
+            0.0]
+        obs_bounds = {name: (lb, ub) for name, lb, ub in zip(obs_patterns, obs_lbs, obs_ubs)}
+        
+        for i in range(len(obs_names)):
+            obs_name=obs_names[i]
+            for pattern in obs_patterns:
+                if pattern in obs_name:
+                    lb=obs_bounds[pattern][0]
+                    ub=obs_bounds[pattern][1]
+                    self._obs_lb[:, i]=lb
+                    self._obs_ub[:, i]=ub
+                    break
+        
+        # handle action memory buffer in obs
+        if self._env_opts["use_action_history"]: # just history stats
+            if self._env_opts["add_prev_actions_stats_to_obs"]:
+                i=0
+                prev_actions_idx = next((i for i, s in enumerate(obs_names) if "_prev_act" in s), None)
+                prev_actions_mean_idx=next((i for i, s in enumerate(obs_names) if "_avrg_act" in s), None)
+                prev_actions_std_idx=next((i for i, s in enumerate(obs_names) if "_std_act" in s), None)
+                
+                # assume actions are always normalized in [-1, 1] by agent
+                if prev_actions_idx is not None:
+                    self._obs_lb[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=-1.0
+                    self._obs_ub[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=1.0
+                if prev_actions_mean_idx is not None:
+                    self._obs_lb[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=-1.0
+                    self._obs_ub[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=1.0
+                if prev_actions_std_idx is not None:
+                    self._obs_lb[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=0
+                    self._obs_ub[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=1.0
+                
+            else: # full history
+                i=0
+                first_action_mem_buffer_idx = next((i for i, s in enumerate(obs_names) if "_m1_act" in s), None)
+                if first_action_mem_buffer_idx is not None:
+                    action_idx_start_idx_counter=first_action_mem_buffer_idx
+                    for j in range(self._env_opts["actions_history_size"]):
+                        self._obs_lb[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=-1.0
+                        self._obs_ub[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=1.0
+                        action_idx_start_idx_counter+=self.actions_dim()
+
+        # some aux data to avoid allocations at training runtime
+        self._rhc_twist_cmd_rhc_world=self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu).detach().clone()
+        self._rhc_twist_cmd_rhc_h=self._rhc_twist_cmd_rhc_world.detach().clone()
+        self._agent_twist_ref_current_w=self._rhc_twist_cmd_rhc_world.detach().clone()
+        self._agent_twist_ref_current_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+        self._substep_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+        self._step_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+        self._root_twist_avrg_rhc_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone()
+        self._root_twist_avrg_rhc_base_loc_next=self._rhc_twist_cmd_rhc_world.detach().clone()
+        
+        self._random_thresh_contacts=torch.rand((self._n_envs,self._n_contacts), device=device)
+        # aux data
+        self._task_err_scaling = torch.zeros((self._n_envs, 1),dtype=self._dtype,device=device)
+
+        self._pof1_b_linvel= torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_linvel"])
+        self._pof1_b_omega = torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_omega"])
+        self._bernoulli_coeffs_linvel = self._pof1_b_linvel.clone()
+        self._bernoulli_coeffs_linvel[:, :] = 1.0
+        self._bernoulli_coeffs_omega = self._pof1_b_omega.clone()
+        self._bernoulli_coeffs_omega[:, :] = 1.0
+
+        # smoothing
+        self._track_rew_smoother=None
+        if self._env_opts["use_track_reward_smoother"]:
+            sub_reward_proxy=self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)[:, 0:1]
+            smoothing_dt=self._substep_dt
+            if not self._is_substep_rew[self._reward_map["task_error"]]: # assuming first reward is tracking
+                smoothing_dt=self._substep_dt*self._action_repeat
+            self._track_rew_smoother=ExponentialSignalSmoother(
+                name=self.__class__.__name__+"VelErrorSmoother",
+                signal=sub_reward_proxy, # same dimension of vel error
+                update_dt=smoothing_dt,
+                smoothing_horizon=self._env_opts["smoothing_horizon_vel_err"],
+                target_smoothing=0.5,
+                debug=self._is_debug,
+                dtype=self._dtype,
+                use_gpu=self._use_gpu)
+
+        # if we need the action rate, we also need the action history
+        if self._env_opts["add_action_rate_reward"]:
+            if not self._env_opts["use_action_history"]:
+                Journal.log(self.__class__.__name__,
+                    "_custom_post_init",
+                    "add_action_rate_reward is True, but ",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+            
+            history_size=self._env_opts["actions_history_size"]
+            if history_size < 2:
+                Journal.log(self.__class__.__name__,
+                    "_custom_post_init",
+                    f"add_action_rate_reward  requires actions history ({history_size}) to be >=2!",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+        
+        # add periodic timer if required
+        self._periodic_clock=None
+        if self._env_opts["add_periodic_clock_to_obs"]:
+            self._add_env_opt(self._env_opts, "clock_period", 
+                default=int(1.5*self._action_repeat*self.task_rand_timeout_bounds()[1])) # correcting with n substeps
+            # (we are using the _substep_abs_counter counter)
+            self._periodic_clock=PeriodicTimer(counter=self._substep_abs_counter,
+                                    period=self._env_opts["clock_period"], 
+                                    dtype=self._dtype,
+                                    device=self._device)
+
+    def get_file_paths(self):
+        paths=AugMPCTrainingEnvBase.get_file_paths(self)
+        paths.append(self._this_child_path)        
+        return paths
+
+    def get_aux_dir(self):
+        aux_dirs = []
+        path_getter = PathsGetter()
+        aux_dirs.append(path_getter.RHCDIR)
+        return aux_dirs
+
+    def _get_reward_scaling(self):
+        if self._env_opts["single_task_ref_per_episode"]:
+            return self._env_opts["n_steps_task_rand_ub"]
+        else:
+            return self._env_opts["episode_timeout_ub"]
+    
+    def _max_ep_length(self):
+        if self._env_opts["single_task_ref_per_episode"]:
+            return self._env_opts["n_steps_task_rand_ub"]
+        else:
+            return self._env_opts["episode_timeout_ub"]
+    
+    def _check_sub_truncations(self):
+        # overrides parent
+        sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu)
+        sub_truncations[:, 0:1] = self._ep_timeout_counter.time_limits_reached()
+        if self._env_opts["single_task_ref_per_episode"]:
+            sub_truncations[:, 1:2] = self._task_rand_counter.time_limits_reached()
+    
+    def _check_sub_terminations(self):
+        # default behaviour-> to be overriden by child
+        sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu)
+        
+        # terminate if mpc just failed
+        sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu)
+
+        # check if robot is capsizing
+        robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+        check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle,
+            output_t=self._is_capsized)
+        sub_terminations[:, 1:2] = self._is_capsized
+        
+        if self._env_opts["add_term_mpc_capsize"]:
+            # check if robot is about to capsize accordin to MPC
+            robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu)
+            check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle,
+                output_t=self._is_rhc_capsized)
+            sub_terminations[:, 2:3] = self._is_rhc_capsized
+
+    def _custom_reset(self):
+        return None
+    
+    def reset(self):
+        AugMPCTrainingEnvBase.reset(self)
+
+    def _pre_substep(self): 
+        pass
+
+    def _custom_post_step(self,episode_finished):
+        # executed after checking truncations and terminations and remote env reset
+        if self._use_gpu:
+            time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached().cuda(),episode_finished)
+            self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten())
+        else:
+            time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached(),episode_finished)
+            self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten())
+        # task refs are randomized in world frame -> we rotate them in base local
+        # (not super efficient, we should do it just for the finished envs)
+        self._update_loc_twist_refs()
+
+        if self._track_rew_smoother is not None: # reset smoother
+            self._track_rew_smoother.reset_all(to_be_reset=episode_finished.flatten(), 
+                    value=0.0)
+
+    def _custom_post_substp_pre_rew(self):
+        self._update_loc_twist_refs()
+        
+    def _custom_post_substp_post_rew(self):
+        pass
+    
+    def _update_loc_twist_refs(self):
+        # get fresh robot orientation
+        if not self._override_agent_refs:
+            robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu)
+            # rotate agent ref from world to robot base
+            world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, 
+                t_out=self._agent_twist_ref_current_base_loc)
+            # write it to agent refs tensors
+            self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc,
+                                                gpu=self._use_gpu)
+        
+    def _apply_actions_to_rhc(self):
+        
+        self._set_rhc_refs()
+
+        self._write_rhc_refs()
+
+    def _set_rhc_refs(self):
+
+        action_to_be_applied = self.get_actual_actions() # see _get_action_names() to get 
+        # the meaning of each component of this tensor
+
+        rhc_latest_twist_cmd = self._rhc_refs.rob_refs.root_state.get(data_type="twist", gpu=self._use_gpu)
+        rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror(gpu=self._use_gpu)
+        rhc_latest_pos_ref = self._rhc_refs.rob_refs.contact_pos.get(data_type="p_z", gpu=self._use_gpu)
+        rhc_q=self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) # this is always 
+        # avaialble
+
+        # reference twist for MPC is assumed to always be specified in MPC's 
+        # horizontal frame, while agent actions are interpreted as in MPC's
+        # base frame -> we need to rotate the actions into the horizontal frame
+        base2world_frame(t_b=action_to_be_applied[:, 0:6],q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_world)
+        w2hor_frame(t_w=self._rhc_twist_cmd_rhc_world,q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_h)
+
+        rhc_latest_twist_cmd[:, 0:6] = self._rhc_twist_cmd_rhc_h
+        
+        # self._rhc_refs.rob_refs.root_state.set(data_type="p", data=rhc_latest_p_ref,
+        #                                     gpu=self._use_gpu)
+        self._rhc_refs.rob_refs.root_state.set(data_type="twist", data=rhc_latest_twist_cmd,
+            gpu=self._use_gpu) 
+        
+        # contact flags
+        idx=self._actions_map["contact_flag_start"]
+        if self._env_opts["use_prob_based_stepping"]:
+            # encode actions as probs
+            self._random_thresh_contacts.uniform_() # random values in-place between 0 and 1
+            rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] >= self._random_thresh_contacts  # keep contact with 
+            # probability action_to_be_applied[:, 6:10]
+        else: # just use a threshold
+            rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] > self._env_opts["step_thresh"]
+        # actually apply actions to controller
+        
+    def _write_rhc_refs(self):
+
+        if self._use_gpu:
+            # GPU->CPU --> we cannot use asynchronous data transfer since it's unsafe
+            self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) # write from gpu to cpu mirror
+            self._rhc_refs.contact_flags.synch_mirror(from_gpu=True,non_blocking=False)
+            self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=True,non_blocking=False)
+
+        self._rhc_refs.rob_refs.root_state.synch_all(read=False, retry=True) # write mirror to shared mem
+        self._rhc_refs.contact_flags.synch_all(read=False, retry=True)
+        self._rhc_refs.rob_refs.contact_pos.synch_all(read=False, retry=True)
+    
+    def _override_refs(self,
+            env_indxs: torch.Tensor = None):
+        
+        # runs at every post_step
+        self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem
+        if self._use_gpu:
+            # copies latest refs to GPU 
+            self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) 
+
+        agent_linvel_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="v",
+                gpu=self._use_gpu)
+        
+        agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega",
+                gpu=self._use_gpu)
+        
+        # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \
+        #     agent_p_ref_current[:, 0:2]
+        self._agent_twist_ref_current_w[:, 0:3]=agent_linvel_ref_current # set linvel target
+        
+        self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem
+
+    def _fill_substep_obs(self,
+            obs: torch.Tensor):
+
+        # measured stuff
+        robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu)
+        robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+        
+        if self._env_opts["use_linvel_from_rhc"]:
+            # twist estimate from mpc
+            robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu)
+            obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_rhc_base_loc_next[:, 0:3]
+        else:
+            obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_meas_base_loc[:, 0:3]
+        obs[:, self._obs_map["omega_meas"]:(self._obs_map["omega_meas"]+3)] = robot_twist_meas_base_loc[:, 3:6]
+
+        obs[:, self._obs_map["v_jnt"]:(self._obs_map["v_jnt"]+self._n_jnts)] = robot_jnt_v_meas 
+
+    def _fill_step_obs(self,
+            obs: torch.Tensor):
+
+        # measured stuff
+        robot_gravity_norm_base_loc = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu)
+        robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu)
+        robot_jnt_q_meas = self._robot_state.jnts_state.get(data_type="q",gpu=self._use_gpu)
+        if self._jnt_q_blacklist_idxs is not None: # we don't want to read joint pos from blacklist
+            robot_jnt_q_meas[:, self._jnt_q_blacklist_idxs]=0.0
+        robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+        
+        # twist estimate from mpc
+        robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu)
+        # cmds for jnt imp to be applied next
+        robot_jnt_q_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="q",gpu=self._use_gpu)
+        robot_jnt_v_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="v",gpu=self._use_gpu)
+        robot_jnt_eff_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="eff",gpu=self._use_gpu)
+
+        flight_info_now = self._rhc_refs.flight_info.get(data_type="all",gpu=self._use_gpu)
+        flight_settings_now = self._rhc_refs.flight_settings_req.get(data_type="all",gpu=self._use_gpu)
+        
+        # refs
+        agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu)
+
+        obs[:, self._obs_map["gn_base"]:(self._obs_map["gn_base"]+3)] = robot_gravity_norm_base_loc # norm. gravity vector in base frame
+        
+        obs[:, self._obs_map["q_jnt"]:(self._obs_map["q_jnt"]+self._n_jnts)] = robot_jnt_q_meas # meas jnt pos
+        obs[:, self._obs_map["twist_ref"]:(self._obs_map["twist_ref"]+6)] = agent_twist_ref # high lev agent refs to be tracked
+
+        if self._env_opts["add_mpc_contact_f_to_obs"]:
+            n_forces=3*len(self._contact_names)
+            obs[:, self._obs_map["contact_f_mpc"]:(self._obs_map["contact_f_mpc"]+n_forces)] = self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=self._use_gpu)
+        if self._env_opts["add_fail_idx_to_obs"]:
+            obs[:, self._obs_map["rhc_fail_idx"]:(self._obs_map["rhc_fail_idx"]+1)] = self._rhc_fail_idx(gpu=self._use_gpu)
+        if self._env_opts["add_term_mpc_capsize"]:
+            obs[:, self._obs_map["gn_base_mpc"]:(self._obs_map["gn_base_mpc"]+3)] = self._rhc_cmds.root_state.get(data_type="gn",gpu=self._use_gpu)
+        if self._env_opts["use_rhc_avrg_vel_tracking"]:
+            self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc, base_loc=True)
+            obs[:, self._obs_map["avrg_twist_mpc"]:(self._obs_map["avrg_twist_mpc"]+6)] = self._root_twist_avrg_rhc_base_loc
+        if self._env_opts["add_flight_info"]:
+            obs[:, self._obs_map["flight_info"]:(self._obs_map["flight_info"]+self._flight_info_size)] = flight_info_now
+        if self._env_opts["add_flight_settings"]:
+            obs[:, self._obs_map["flight_settings_req"]:(self._obs_map["flight_settings_req"]+self._flight_setting_size)] = \
+                flight_settings_now
+
+        if self._env_opts["add_rhc_cmds_to_obs"]:
+            obs[:, self._obs_map["rhc_cmds_q"]:(self._obs_map["rhc_cmds_q"]+self._n_jnts)] = robot_jnt_q_rhc_applied_next
+            obs[:, self._obs_map["rhc_cmds_v"]:(self._obs_map["rhc_cmds_v"]+self._n_jnts)] = robot_jnt_v_rhc_applied_next
+            obs[:, self._obs_map["rhc_cmds_eff"]:(self._obs_map["rhc_cmds_eff"]+self._n_jnts)] = robot_jnt_eff_rhc_applied_next
+        if self._env_opts["use_action_history"]:
+            if self._env_opts["add_prev_actions_stats_to_obs"]: # just add last, std and mean to obs
+                obs[:, self._obs_map["action_history_prev"]:(self._obs_map["action_history_prev"]+self.actions_dim())]=self._act_mem_buffer.get(idx=0)
+                obs[:, self._obs_map["action_history_avrg"]:(self._obs_map["action_history_avrg"]+self.actions_dim())]=self._act_mem_buffer.mean(clone=False)
+                obs[:, self._obs_map["action_history_std"]:(self._obs_map["action_history_std"]+self.actions_dim())]=self._act_mem_buffer.std(clone=False)
+            else: # add whole memory buffer to obs
+                next_idx=self._obs_map["action_history"]
+                for i in range(self._env_opts["actions_history_size"]):
+                    obs[:, next_idx:(next_idx+self.actions_dim())]=self._act_mem_buffer.get(idx=i) # get all (n_envs x (obs_dim x horizon))
+                    next_idx+=self.actions_dim()
+
+        if self._env_opts["use_action_smoothing"]: # adding smoothed actions
+            obs[:, self._obs_map["action_smoothing"]:(self._obs_map["action_smoothing"]+self.actions_dim())]=self.get_actual_actions(normalized=True)
+            next_idx+=self.actions_dim()
+        
+        if self._env_opts["add_periodic_clock_to_obs"]:
+            obs[:, next_idx:(next_idx+2)]=self._periodic_clock.get()
+            next_idx+=2
+        if self._env_opts["add_heightmap_obs"]:
+            hm = self._robot_state.height_sensor.get(gpu=self._use_gpu)
+            obs[:, self._obs_map["heightmap"]:(self._obs_map["heightmap"]+self._height_flat_dim)] = hm
+
+    def _get_custom_db_data(self, 
+            episode_finished,
+            ignore_ep_end):
+        episode_finished = episode_finished.cpu()
+        self.custom_db_data["AgentTwistRefs"].update(
+                new_data=self._agent_refs.rob_refs.root_state.get(data_type="twist", gpu=False), 
+                ep_finished=episode_finished,
+                ignore_ep_end=ignore_ep_end)
+        self.custom_db_data["RhcFailIdx"].update(new_data=self._rhc_fail_idx(gpu=False), 
+                ep_finished=episode_finished,
+                ignore_ep_end=ignore_ep_end)
+        self.custom_db_data["RhcContactForces"].update(
+                new_data=self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), 
+                ep_finished=episode_finished,
+                ignore_ep_end=ignore_ep_end)
+        self.custom_db_data["Power"].update(
+                new_data=self._pow_db_data, 
+                ep_finished=episode_finished,
+                ignore_ep_end=ignore_ep_end)
+        self.custom_db_data["TrackingError"].update(
+                new_data=self._track_error_db, 
+                ep_finished=episode_finished,
+                ignore_ep_end=ignore_ep_end)
+
+    # reward functions
+    def _action_rate(self):
+        continuous_actions=self._is_continuous_actions
+        discrete_actions=~self._is_continuous_actions
+        n_c_actions=continuous_actions.sum().item()
+        n_d_actions=discrete_actions.sum().item()
+        actions_prev=self._act_mem_buffer.get(idx=1) 
+        actions_now=self._act_mem_buffer.get(idx=0)
+        actions_rate=(actions_now-actions_prev) # actions already normalized
+        actions_rate_c=actions_rate[:, continuous_actions]
+        actions_rate_d=actions_rate[:, discrete_actions]
+
+        actions_rate_sqrd=None # assuming n_c_actions > 0 always
+        actions_rate_sqrd=self._env_opts["action_rate_rew_c_weight"]*torch.sum(actions_rate_c*actions_rate_c, dim=1, keepdim=True)/n_c_actions
+        if discrete_actions.any():
+            actions_rate_sqrd+=self._env_opts["action_rate_rew_d_weight"]*torch.sum(actions_rate_d*actions_rate_d, dim=1, keepdim=True)/n_d_actions
+        return actions_rate_sqrd
+
+    def _mech_pow(self, jnts_vel, jnts_effort, autoscaled: bool = False, drained: bool = True):
+        mech_pow_jnts=(jnts_effort*jnts_vel)*self._power_penalty_weights
+        if drained:
+            mech_pow_jnts.clamp_(0.0,torch.inf) # do not account for regenerative power
+        mech_pow_tot = torch.sum(mech_pow_jnts, dim=1, keepdim=True)
+        self._pow_db_data[:, 1:2]=mech_pow_tot.cpu()
+        if autoscaled:
+            mech_pow_tot=mech_pow_tot/self._power_penalty_weights_sum
+        return mech_pow_tot
+
+    def _cost_of_transport(self, jnts_vel, jnts_effort, v_norm, mass_weight: bool = False):
+        drained_mech_pow=self._mech_pow(jnts_vel=jnts_vel,
+            jnts_effort=jnts_effort, 
+            drained=True)
+        CoT=drained_mech_pow/(v_norm+1e-2)
+        if mass_weight:
+            robot_weight=self._rhc_robot_weight
+            CoT=CoT/robot_weight
+        
+        # add to db metrics
+        self._pow_db_data[:, 0:1]=CoT.cpu()
+        self._pow_db_data[:, 1:2]=drained_mech_pow.cpu()
+
+        return CoT
+
+    def _jnt_vel_penalty(self, jnts_vel):
+        weighted_jnt_vel = torch.sum(jnts_vel*jnts_vel, dim=1, keepdim=True)/self._n_jnts
+        return weighted_jnt_vel
+    
+    def _rhc_fail_idx(self, gpu: bool):
+        rhc_fail_idx = self._rhc_status.rhc_fail_idx.get_torch_mirror(gpu=gpu)
+        return self._env_opts["rhc_fail_idx_scale"]*rhc_fail_idx
+    
+    # basic L1 and L2 error functions
+    def _track_err_wmse(self, task_ref, task_meas, scaling, weights):
+        # weighted mean-squared error computation 
+        task_error = (task_meas-task_ref)
+        # add to db metrics
+        self._track_error_db[:, :]=torch.abs(task_error)
+        scaled_error=task_error/scaling
+        
+        task_wmse = torch.sum(scaled_error*scaled_error*weights, dim=1, keepdim=True)/torch.sum(weights).item()
+        return task_wmse # weighted mean square error (along task dimension)
+    
+    def _track_err_dir_wmse(self, task_ref, task_meas, scaling, weights):
+        # weighted DIRECTIONAL mean-squared error computation 
+        task_error = (task_meas-task_ref)
+        # add to db metrics
+        self._track_error_db[:, :]=torch.abs(task_error)
+        task_error=task_error/scaling
+
+        # projection along commanded direction and gravity, matching paper formulation
+        v_ref=task_ref[:, 0:3]
+        delta_v=task_error[:, 0:3]
+
+        v_ref_norm=torch.norm(v_ref, dim=1, keepdim=True)
+        cmd_dir=v_ref/(v_ref_norm+1e-8)
+        # fallback to measured direction if command is (near) zero to avoid degenerate projection
+        meas_dir=task_meas[:, 0:3]
+        meas_dir=meas_dir/(torch.norm(meas_dir, dim=1, keepdim=True)+1e-8)
+        cmd_dir=torch.where((v_ref_norm>1e-6), cmd_dir, meas_dir)
+
+        gravity_dir = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) # normalized gravity in base frame
+        gravity_dir = gravity_dir/(torch.norm(gravity_dir, dim=1, keepdim=True)+1e-8)
+
+        forward_error=torch.sum(delta_v*cmd_dir, dim=1, keepdim=True)
+        vertical_error=torch.sum(delta_v*gravity_dir, dim=1, keepdim=True)
+        lateral_vec=delta_v - vertical_error*gravity_dir - forward_error*cmd_dir
+        lateral_error=torch.norm(lateral_vec, dim=1, keepdim=True)
+
+        # angular directional components: use gravity as vertical, project base x onto the world xy plane for roll, and close the triad with pitch
+        base_x = self._base_x_dir
+        base_y = self._base_y_dir
+
+        roll_dir = base_x - torch.sum(base_x*gravity_dir, dim=1, keepdim=True)*gravity_dir
+        roll_norm = torch.norm(roll_dir, dim=1, keepdim=True)
+        roll_dir_alt = base_y - torch.sum(base_y*gravity_dir, dim=1, keepdim=True)*gravity_dir # fallback if base x is almost aligned with gravity
+        roll_norm_alt = torch.norm(roll_dir_alt, dim=1, keepdim=True)
+        use_alt_roll = roll_norm < 1e-6
+        roll_dir = torch.where(use_alt_roll, roll_dir_alt, roll_dir)
+        roll_norm = torch.where(use_alt_roll, roll_norm_alt, roll_norm)
+        roll_dir = roll_dir/(roll_norm+1e-8)
+
+        pitch_dir = torch.cross(gravity_dir, roll_dir, dim=1)
+        pitch_dir = pitch_dir/(torch.norm(pitch_dir, dim=1, keepdim=True)+1e-8)
+
+        delta_omega = task_error[:, 3:6]
+        omega_roll_error = torch.sum(delta_omega*roll_dir, dim=1, keepdim=True)
+        omega_pitch_error = torch.sum(delta_omega*pitch_dir, dim=1, keepdim=True)
+        omega_vertical_error = torch.sum(delta_omega*gravity_dir, dim=1, keepdim=True)
+
+        full_error=torch.cat((forward_error, lateral_error, vertical_error, omega_roll_error, omega_pitch_error, omega_vertical_error), dim=1)
+        task_wmse_dir = torch.sum(full_error*full_error*weights, dim=1, keepdim=True)/torch.sum(weights).item()
+        return task_wmse_dir # weighted mean square error (along task dimension)
+    
+    # L2 errors
+    def _tracking_err_rel_wmse(self, task_ref, task_meas, weights, directional: bool = False):
+        ref_norm = task_ref.norm(dim=1,keepdim=True) # norm of the full twist reference
+        self._task_err_scaling[:, :] = ref_norm+1e-2
+        if directional:
+            task_rel_err_wmse=self._track_err_dir_wmse(task_ref=task_ref, task_meas=task_meas, 
+                scaling=self._task_err_scaling, weights=weights)
+        else:
+            task_rel_err_wmse=self._track_err_wmse(task_ref=task_ref, task_meas=task_meas, 
+                scaling=self._task_err_scaling, weights=weights)
+        return task_rel_err_wmse
+    
+    def _tracking_err_wmse(self, task_ref, task_meas, weights, directional: bool = False):
+        self._task_err_scaling[:, :] = 1
+        if directional:
+            task_err_wmse = self._track_err_dir_wmse(task_ref=task_ref, 
+                task_meas=task_meas, scaling=self._task_err_scaling, weights=weights)
+        else:
+            task_err_wmse = self._track_err_wmse(task_ref=task_ref, 
+                task_meas=task_meas, scaling=self._task_err_scaling, weights=weights)
+        return task_err_wmse
+    
+    # L1 errors
+    def _tracking_err_rel_lin(self, task_ref, task_meas, weights, directional):
+        task_rel_err_wmse = self._tracking_err_rel_wmse(task_ref=task_ref, 
+            task_meas=task_meas, weights=weights, directional=directional)
+        return task_rel_err_wmse.sqrt()
+    
+    def _tracking_err_lin(self, task_ref, task_meas, weights, directional: bool = False):
+        self._task_err_scaling[:, :] = 1
+        task_err_wmse=self._tracking_err_wmse(task_ref=task_ref,
+            task_meas=task_meas, weights=weights, directional=directional)
+        return task_err_wmse.sqrt()
+    
+    # reward computation over steps/substeps
+    def _compute_step_rewards(self):
+        
+        sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+
+        # tracking reward
+        if self._env_opts["use_L1_norm"]: # linear errors
+            task_error_fun = self._tracking_err_lin
+            if self._env_opts["use_relative_error"]:
+                task_error_fun = self._tracking_err_rel_lin
+        else: # quadratic error
+            task_error_fun = self._tracking_err_wmse
+            if self._env_opts["use_relative_error"]:
+                task_error_fun = self._tracking_err_rel_wmse
+                
+        agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) # high level agent refs (hybrid twist)
+        self._get_avrg_step_root_twist(out=self._step_avrg_root_twist_base_loc, base_loc=True)
+        task_error = task_error_fun(task_meas=self._step_avrg_root_twist_base_loc, 
+            task_ref=agent_task_ref_base_loc,
+            weights=self._task_err_weights,
+            directional=self._env_opts["directional_tracking"])
+
+        idx=self._reward_map["task_error"]
+        if self._env_opts["use_exp_track_rew"]:
+            sub_rewards[:, idx:(idx+1)] =  \
+                self._env_opts["task_track_offset_exp"]*torch.exp(-self._env_opts["task_track_scale_exp"]*task_error)
+        else: # simple linear reward
+            sub_rewards[:, idx:(idx+1)] = \
+                self._env_opts["task_track_offset"]*(1.0-self._env_opts["task_track_scale"]*task_error)
+
+        if self._env_opts["use_fail_idx_weight"]: # add weight based on fail idx
+            fail_idx=self._rhc_fail_idx(gpu=self._use_gpu)
+            sub_rewards[:, idx:(idx+1)]=(1-fail_idx)*sub_rewards[:, idx:(idx+1)]
+        if self._track_rew_smoother is not None: # smooth reward if required
+            self._track_rew_smoother.update(new_signal=sub_rewards[:, 0:1])
+            sub_rewards[:, idx:(idx+1)]=self._track_rew_smoother.get()
+
+        # action rate
+        if self._env_opts["add_action_rate_reward"]:
+            action_rate=self._action_rate()
+            idx=self._reward_map["action_rate"]
+            sub_rewards[:, idx:(idx+1)] = self._env_opts["action_rate_offset"]*(1.0-self._env_opts["action_rate_scale"]*action_rate)
+
+        # mpc vel tracking
+        if self._env_opts["use_rhc_avrg_vel_tracking"]:
+            self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc_next,base_loc=True) # get estimated avrg vel 
+            # from MPC after stepping
+            task_pred_error=task_error_fun(task_meas=self._root_twist_avrg_rhc_base_loc_next, 
+                task_ref=agent_task_ref_base_loc,
+                weights=self._task_pred_err_weights,
+                directional=self._env_opts["directional_tracking"])
+            idx=self._reward_map["rhc_avrg_vel_error"]
+            sub_rewards[:, idx:(idx+1)] = self._env_opts["task_pred_track_offset"]*torch.exp(-self._env_opts["task_pred_track_scale"]*task_pred_error)
+        
+    def _compute_substep_rewards(self):
+        
+        sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)
+
+        if self._env_opts["add_CoT_reward"] or self._env_opts["add_power_reward"]:
+            jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+            jnts_effort = self._robot_state.jnts_state.get(data_type="eff",gpu=self._use_gpu)
+
+            if self._env_opts["add_CoT_reward"]:
+                if self._env_opts["use_CoT_wrt_ref"]: # uses v ref norm for computing cot
+                    agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu)
+                    v_norm=torch.norm(agent_task_ref_base_loc, dim=1, keepdim=True)
+                else: # uses measured velocity
+                    robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu)
+                    v_norm=torch.norm(robot_twist_meas_base_loc[:,0:3], dim=1, keepdim=True)
+                CoT=self._cost_of_transport(jnts_vel=jnts_vel,jnts_effort=jnts_effort,v_norm=v_norm, 
+                    mass_weight=True
+                    )
+                idx=self._reward_map["CoT"]
+                sub_rewards[:, idx:(idx+1)] = self._env_opts["CoT_offset"]*(1-self._env_opts["CoT_scale"]*CoT)
+            if self._env_opts["add_power_reward"]:
+                weighted_mech_power=self._mech_pow(jnts_vel=jnts_vel,jnts_effort=jnts_effort, drained=True)
+                idx=self._reward_map["mech_pow"]
+                sub_rewards[:, idx:(idx+1)] = self._env_opts["power_offset"]*(1-self._env_opts["power_scale"]*weighted_mech_power)
+        
+        if self._env_opts["add_jnt_v_reward"]:
+            jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu)
+            jnt_v=self._jnt_vel_penalty(jnts_vel=jnts_vel)
+            idx=self._reward_map["jnt_v"]
+            sub_rewards[:, idx:(idx+1)] = self._env_opts["jnt_vel_offset"]*(1-self._env_opts["jnt_vel_scale"]*jnt_v)
+
+    def _randomize_task_refs(self,
+        env_indxs: torch.Tensor = None):
+
+        # we randomize the reference in world frame, since it's much more intuitive 
+        # (it will be rotated in base frame when provided to the agent and used for rew 
+        # computation)
+        
+        if self._env_opts["use_pof0"]: # sample from bernoulli distribution
+            torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"]
+            torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega)
+        if env_indxs is None:
+            random_uniform=torch.full_like(self._agent_twist_ref_current_w, fill_value=0.0)
+            torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+            self._agent_twist_ref_current_w[:, :] = random_uniform*self._twist_ref_scale + self._twist_ref_offset
+            self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel
+            self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega
+        else:
+            random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, :], fill_value=0.0)
+            torch.nn.init.uniform_(random_uniform, a=-1, b=1)
+            self._agent_twist_ref_current_w[env_indxs, :] = random_uniform * self._twist_ref_scale + self._twist_ref_offset
+            self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :]
+            self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega
+    
+    def _get_obs_names(self):
+
+        obs_names = [""] * self.obs_dim()
+
+        # proprioceptive stream of obs
+        next_idx=0
+
+        self._obs_map["gn_base"]=next_idx
+        obs_names[0] = "gn_x_base_loc"
+        obs_names[1] = "gn_y_base_loc"
+        obs_names[2] = "gn_z_base_loc"
+        next_idx+=3
+
+        self._obs_map["linvel_meas"]=next_idx
+        obs_names[next_idx] = "linvel_x_base_loc"
+        obs_names[next_idx+1] = "linvel_y_base_loc"
+        obs_names[next_idx+2] = "linvel_z_base_loc"
+        next_idx+=3
+
+        self._obs_map["omega_meas"]=next_idx
+        obs_names[next_idx] = "omega_x_base_loc"
+        obs_names[next_idx+1] = "omega_y_base_loc"
+        obs_names[next_idx+2] = "omega_z_base_loc"
+        next_idx+=3
+
+        jnt_names=self.get_observed_joints()
+        self._obs_map["q_jnt"]=next_idx
+        for i in range(self._n_jnts): # jnt obs (pos):
+            obs_names[next_idx+i] = f"q_jnt_{jnt_names[i]}"
+        next_idx+=self._n_jnts
+        
+        self._obs_map["v_jnt"]=next_idx
+        for i in range(self._n_jnts): # jnt obs (v):
+            obs_names[next_idx+i] = f"v_jnt_{jnt_names[i]}"
+        next_idx+=self._n_jnts
+        
+        # references
+        self._obs_map["twist_ref"]=next_idx
+        obs_names[next_idx] = "linvel_x_ref_base_loc"
+        obs_names[next_idx+1] = "linvel_y_ref_base_loc"
+        obs_names[next_idx+2] = "linvel_z_ref_base_loc"
+        obs_names[next_idx+3] = "omega_x_ref_base_loc"
+        obs_names[next_idx+4] = "omega_y_ref_base_loc"
+        obs_names[next_idx+5] = "omega_z_ref_base_loc"
+        next_idx+=6
+        
+        # contact forces
+        if self._env_opts["add_mpc_contact_f_to_obs"]:
+            i = 0
+            self._obs_map["contact_f_mpc"]=next_idx
+            for contact in self._contact_names:
+                obs_names[next_idx+i] = f"fc_{contact}_x_base_loc"
+                obs_names[next_idx+i+1] = f"fc_{contact}_y_base_loc"
+                obs_names[next_idx+i+2] = f"fc_{contact}_z_base_loc"
+                i+=3        
+            next_idx+=3*len(self._contact_names)
+            
+        # data directly from MPC
+        if self._env_opts["add_fail_idx_to_obs"]:
+            self._obs_map["rhc_fail_idx"]=next_idx
+            obs_names[next_idx] = "rhc_fail_idx"
+            next_idx+=1
+        if self._env_opts["add_term_mpc_capsize"]:
+            self._obs_map["gn_base_mpc"]=next_idx
+            obs_names[next_idx] = "gn_x_rhc_base_loc"
+            obs_names[next_idx+1] = "gn_y_rhc_base_loc"
+            obs_names[next_idx+2] = "gn_z_rhc_base_loc"
+            next_idx+=3
+        if self._env_opts["use_rhc_avrg_vel_tracking"]:
+            self._obs_map["avrg_twist_mpc"]=next_idx
+            obs_names[next_idx] = "linvel_x_avrg_rhc"
+            obs_names[next_idx+1] = "linvel_y_avrg_rhc"
+            obs_names[next_idx+2] = "linvel_z_avrg_rhc"
+            obs_names[next_idx+3] = "omega_x_avrg_rhc"
+            obs_names[next_idx+4] = "omega_y_avrg_rhc"
+            obs_names[next_idx+5] = "omega_z_avrg_rhc"
+            next_idx+=6
+        
+        if self._env_opts["add_flight_info"]:
+            self._obs_map["flight_info"]=next_idx
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_pos_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_len_remaining_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_len_nominal_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_apex_nominal_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_end_nominal_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+        
+        if self._env_opts["add_flight_settings"]:
+            self._obs_map["flight_settings_req"]=next_idx
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_len_req_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_apex_req_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+            for i in range(len(self._contact_names)):
+                obs_names[next_idx+i] = "flight_end_req_"+ self._contact_names[i]
+            next_idx+=len(self._contact_names)
+
+        if self._env_opts["add_rhc_cmds_to_obs"]:
+            self._obs_map["rhc_cmds_q"]=next_idx
+            for i in range(self._n_jnts): # jnt obs (pos):
+                obs_names[next_idx+i] = f"rhc_cmd_q_{jnt_names[i]}"
+            next_idx+=self._n_jnts
+            self._obs_map["rhc_cmds_v"]=next_idx
+            for i in range(self._n_jnts): # jnt obs (pos):
+                obs_names[next_idx+i] = f"rhc_cmd_v_{jnt_names[i]}"
+            next_idx+=self._n_jnts
+            self._obs_map["rhc_cmds_eff"]=next_idx
+            for i in range(self._n_jnts): # jnt obs (pos):
+                obs_names[next_idx+i] = f"rhc_cmd_eff_{jnt_names[i]}"
+            next_idx+=self._n_jnts
+
+        # previous actions info
+        if self._env_opts["use_action_history"]:
+            self._obs_map["action_history"]=next_idx
+            action_names = self._get_action_names()
+            if self._env_opts["add_prev_actions_stats_to_obs"]:
+                self._obs_map["action_history_prev"]=next_idx
+                for act_idx in range(self.actions_dim()):
+                    obs_names[next_idx+act_idx] = action_names[act_idx]+f"_prev_act"
+                next_idx+=self.actions_dim()
+                self._obs_map["action_history_avrg"]=next_idx
+                for act_idx in range(self.actions_dim()):
+                    obs_names[next_idx+act_idx] = action_names[act_idx]+f"_avrg_act"
+                next_idx+=self.actions_dim()
+                self._obs_map["action_history_std"]=next_idx
+                for act_idx in range(self.actions_dim()):
+                    obs_names[next_idx+act_idx] = action_names[act_idx]+f"_std_act"
+                next_idx+=self.actions_dim()
+            else:
+                for i in range(self._env_opts["actions_history_size"]):
+                    for act_idx in range(self.actions_dim()):
+                        obs_names[next_idx+act_idx] = action_names[act_idx]+f"_m{i+1}_act"
+                    next_idx+=self.actions_dim()
+
+        if self._env_opts["use_action_smoothing"]:
+            self._obs_map["action_smoothing"]=next_idx
+            for smoothed_action in range(self.actions_dim()):
+                obs_names[next_idx+smoothed_action] = action_names[smoothed_action]+f"_smoothed"
+            next_idx+=self.actions_dim()
+
+        if self._env_opts["add_periodic_clock_to_obs"]:
+            self._obs_map["clock"]=next_idx
+            obs_names[next_idx] = "clock_cos"
+            obs_names[next_idx+1] = "clock_sin"
+            next_idx+=2
+        if self._env_opts["add_heightmap_obs"] and self._height_grid_size is not None:
+            self._obs_map["heightmap"]=next_idx
+            gs = self._height_grid_size
+            for r in range(gs):
+                for c in range(gs):
+                    obs_names[next_idx] = f"height_r{r}_c{c}"
+                    next_idx += 1
+
+        return obs_names
+
+    def _set_substep_obs(self):
+        # which obs are to be averaged over substeps?
+
+        self._is_substep_obs[self._obs_map["linvel_meas"]:self._obs_map["linvel_meas"]+3]=True
+        self._is_substep_obs[self._obs_map["omega_meas"]:self._obs_map["omega_meas"]+3]=True
+        self._is_substep_obs[self._obs_map["v_jnt"]:self._obs_map["v_jnt"]+self._n_jnts]=True # also good for noise
+
+        # self._is_substep_obs[self._obs_map["contact_f_mpc"]:self._obs_map["contact_f_mpc"]+3*len(self._contact_names)]=True
+
+    def _get_action_names(self):
+        
+        action_names = [""] * self.actions_dim()
+        action_names[0] = "vx_cmd" # twist commands from agent to RHC controller
+        action_names[1] = "vy_cmd"
+        action_names[2] = "vz_cmd"
+        action_names[3] = "roll_omega_cmd"
+        action_names[4] = "pitch_omega_cmd"
+        action_names[5] = "yaw_omega_cmd"
+
+        next_idx=6
+        
+        self._actions_map["contact_flag_start"]=next_idx
+        for i in range(len(self._contact_names)):
+            contact=self._contact_names[i]
+            action_names[next_idx] = f"contact_flag_{contact}"
+            next_idx+=1
+
+        return action_names
+    
+    def _set_substep_rew(self):
+
+        # which rewards are to be computed at substeps frequency?
+        self._is_substep_rew[self._reward_map["task_error"]]=False
+        if self._env_opts["add_CoT_reward"]:
+            self._is_substep_rew[self._reward_map["CoT"]]=True
+        if self._env_opts["add_power_reward"]:
+            self._is_substep_rew[self._reward_map["mech_pow"]]=True
+        if self._env_opts["add_action_rate_reward"]:
+            self._is_substep_rew[self._reward_map["action_rate"]]=False
+        if self._env_opts["add_jnt_v_reward"]:
+            self._is_substep_rew[self._reward_map["jnt_v"]]=True
+
+        if self._env_opts["use_rhc_avrg_vel_tracking"]:
+            self._is_substep_rew[self._reward_map["rhc_avrg_vel_error"]]=False
+
+    def _get_rewards_names(self):
+        
+        counter=0
+        reward_names = []
+
+        # adding rewards
+        reward_names.append("task_error")
+        self._reward_map["task_error"]=counter
+        self._reward_lb_map["task_error"]="task_error_reward_lb"
+        counter+=1
+        if self._env_opts["add_power_reward"] and self._env_opts["add_CoT_reward"]:
+            Journal.log(self.__class__.__name__,
+                    "__init__",
+                    "Only one between CoT and power reward can be used!",
+                    LogType.EXCEP,
+                    throw_when_excep=True)
+        if self._env_opts["add_CoT_reward"]:
+            reward_names.append("CoT")
+            self._reward_map["CoT"]=counter
+            self._reward_lb_map["CoT"]="CoT_reward_lb"
+            counter+=1
+        if self._env_opts["add_power_reward"]:
+            reward_names.append("mech_pow")
+            self._reward_map["mech_pow"]=counter
+            self._reward_lb_map["mech_pow"]="power_reward_lb"
+            counter+=1
+        if self._env_opts["add_action_rate_reward"]:
+            reward_names.append("action_rate")   
+            self._reward_map["action_rate"]=counter
+            self._reward_lb_map["action_rate"]="action_rate_reward_lb"
+            counter+=1   
+        if self._env_opts["add_jnt_v_reward"]:
+            reward_names.append("jnt_v")   
+            self._reward_map["jnt_v"]=counter
+            self._reward_lb_map["jnt_v"]="jnt_vel_reward_lb"
+            counter+=1   
+        if self._env_opts["use_rhc_avrg_vel_tracking"]:
+            reward_names.append("rhc_avrg_vel_error")   
+            self._reward_map["rhc_avrg_vel_error"]=counter
+            self._reward_lb_map["rhc_avrg_vel_error"]="rhc_avrg_vel_reward_lb"
+            counter+=1   
+
+        return reward_names
+
+    def _get_sub_trunc_names(self):
+        sub_trunc_names = []
+        sub_trunc_names.append("ep_timeout")
+        if self._env_opts["single_task_ref_per_episode"]:
+            sub_trunc_names.append("task_ref_rand")
+        return sub_trunc_names
+
+    def _get_sub_term_names(self):
+        # to be overridden by child class
+        sub_term_names = []
+        sub_term_names.append("rhc_failure")
+        sub_term_names.append("robot_capsize")
+        if self._env_opts["add_term_mpc_capsize"]:
+            sub_term_names.append("rhc_capsize")
+
+        return sub_term_names
+
+    def _set_jnts_blacklist_pattern(self):
+        # used to exclude pos measurement from wheels
+        self._jnt_q_blacklist_patterns=["wheel"]
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.srdf b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.srdf
new file mode 100644
index 0000000000000000000000000000000000000000..0eccb9d4f451dbd6f85affd6972ecce13a609d04
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.srdf
@@ -0,0 +1,55 @@
+<?xml version="1.0" ?>
+<!-- =================================================================================== -->
+<!-- |    This document was autogenerated by xacro from /root/ibrido_ws/src/unitree_ros/robots/b2w_description/xacro/robot.srdf.xacro | -->
+<!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
+<!-- =================================================================================== -->
+<!--This does not replace URDF, and is not an extension of URDF.
+    This is a format for representing semantic information about the robot structure.
+    A URDF file must exist for this robot as well, where the joints and the links that are referenced are defined
+-->
+<robot name="unitree_b2w">
+  <!-- ************ MODEL CONFIGURATION ************ -->
+  <group name="base">
+    <link name="base"/>
+  </group>
+  <group name="imu_sensors">
+    <link name="imu_link"/>
+  </group>
+  <group name="front_left_leg">
+    <chain base="base" tip_link="FL_foot"/>
+  </group>
+  <group name="front_right_leg">
+    <chain base="base" tip_link="FR_foot"/>
+  </group>
+  <group name="rear_right_leg">
+    <chain base="base" tip_link="RR_foot"/>
+  </group>
+  <group name="rear_left_leg">
+    <chain base="base" tip_link="RL_foot"/>
+  </group>
+  <group name="chains">
+    <group name="front_left_leg"/>
+    <group name="front_right_leg"/>
+    <group name="rear_right_leg"/>
+    <group name="rear_left_leg"/>
+  </group>
+  <!-- Homing posture -->
+  <group_state group="chains" name="home">
+    <joint name="FL_hip_joint" value="0.0"/>
+    <joint name="FL_thigh_joint" value="0.7"/>
+    <joint name="FL_calf_joint" value="-1.4"/>
+    <joint name="FL_foot_joint" value="0.0"/>
+    <joint name="FR_hip_joint" value="0.0"/>
+    <joint name="FR_thigh_joint" value="0.7"/>
+    <joint name="FR_calf_joint" value="-1.4"/>
+    <joint name="FR_foot_joint" value="0.0"/>
+    <joint name="RL_hip_joint" value="0.0"/>
+    <joint name="RL_thigh_joint" value="0.7"/>
+    <joint name="RL_calf_joint" value="-1.4"/>
+    <joint name="RL_foot_joint" value="0.0"/>
+    <joint name="RR_hip_joint" value="0.0"/>
+    <joint name="RR_thigh_joint" value="0.7"/>
+    <joint name="RR_calf_joint" value="-1.4"/>
+    <joint name="RR_foot_joint" value="0.0"/>
+  </group_state>
+</robot>
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.urdf b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.urdf
new file mode 100644
index 0000000000000000000000000000000000000000..6932d9779b22f20dae4458878e998b39517c997a
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/unitree_b2w_2026_03_28_11_09_54_ID.urdf
@@ -0,0 +1,1429 @@
+<?xml version="1.0" ?>
+<!-- =================================================================================== -->
+<!-- |    This document was autogenerated by xacro from /root/ibrido_ws/src/unitree_ros/robots/b2w_description/xacro/robot.urdf.xacro | -->
+<!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
+<!-- =================================================================================== -->
+<robot name="unitree_b2w">
+  <!-- <xacro:property name="stick_mass" value="0.00001"/> -->
+  <!-- <xacro:property name="stick_radius" value="0.01"/>
+    <xacro:property name="stick_length" value="0.2"/> -->
+  <material name="black">
+    <color rgba="0.0 0.0 0.0 1.0"/>
+  </material>
+  <material name="blue">
+    <color rgba="0.0 0.0 0.8 1.0"/>
+  </material>
+  <material name="green">
+    <color rgba="0.0 0.8 0.0 1.0"/>
+  </material>
+  <material name="grey">
+    <color rgba="0.2 0.2 0.2 1.0"/>
+  </material>
+  <material name="silver">
+    <color rgba="0.9137254901960784 0.9137254901960784 0.8470588235294118 1.0"/>
+  </material>
+  <material name="orange">
+    <color rgba="1.0 0.4235294117647059 0.0392156862745098 1.0"/>
+  </material>
+  <material name="brown">
+    <color rgba="0.8705882352941177 0.8117647058823529 0.7647058823529411 1.0"/>
+  </material>
+  <material name="red">
+    <color rgba="0.8 0.0 0.0 1.0"/>
+  </material>
+  <material name="white">
+    <color rgba="1.0 1.0 1.0 1.0"/>
+  </material>
+  <!-- ros_control plugin -->
+  <gazebo>
+    <plugin filename="libgazebo_ros_control.so" name="gazebo_ros_control">
+      <robotNamespace>/b2w_gazebo</robotNamespace>
+      <robotSimType>gazebo_ros_control/DefaultRobotHWSim</robotSimType>
+    </plugin>
+  </gazebo>
+  <!-- Show the trajectory of trunk center. -->
+  <gazebo>
+    <plugin filename="libLinkPlot3DPlugin.so" name="3dplot">
+      <frequency>10</frequency>
+      <plot>
+        <link>base</link>
+        <pose>0 0 0 0 0 0</pose>
+        <material>Gazebo/Yellow</material>
+      </plot>
+    </plugin>
+  </gazebo>
+  <!-- Show the trajectory of foot. You can add another trajectory about another foot. -->
+  <!-- <gazebo>
+        <plugin name="3dplot" filename="libLinkPlot3DPlugin.so">
+            <frequency>100</frequency>
+            <plot>
+                <link>FL_foot</link>
+                <pose>0 0 0 0 0 0</pose>
+                <material>Gazebo/Green</material>
+            </plot>
+        </plugin>
+    </gazebo> -->
+  <gazebo>
+    <plugin filename="libgazebo_ros_force.so" name="gazebo_ros_force">
+      <bodyName>trunk</bodyName>
+      <topicName>/apply_force/trunk</topicName>
+    </plugin>
+  </gazebo>
+  <gazebo reference="imu_link">
+    <gravity>true</gravity>
+    <sensor name="imu_sensor" type="imu">
+      <always_on>true</always_on>
+      <update_rate>1000</update_rate>
+      <visualize>true</visualize>
+      <topic>__default_topic__</topic>
+      <plugin filename="libgazebo_ros_imu_sensor.so" name="imu_plugin">
+        <topicName>trunk_imu</topicName>
+        <bodyName>imu_link</bodyName>
+        <updateRateHZ>1000.0</updateRateHZ>
+        <gaussianNoise>0.0</gaussianNoise>
+        <xyzOffset>0 0 0</xyzOffset>
+        <rpyOffset>0 0 0</rpyOffset>
+        <frameName>imu_link</frameName>
+      </plugin>
+      <pose>0 0 0 0 0 0</pose>
+    </sensor>
+  </gazebo>
+  <!-- Foot contacts. -->
+  <gazebo reference="FR_calf">
+    <sensor name="FR_foot_contact" type="contact">
+      <update_rate>100</update_rate>
+      <plugin filename="libunitreeFootContactPlugin.so" name="contactPlugin"/>
+      <contact>
+        <collision>FR_calf_fixed_joint_lump__FR_foot_collision_1</collision>
+      </contact>
+    </sensor>
+  </gazebo>
+  <gazebo reference="FL_calf">
+    <sensor name="FL_foot_contact" type="contact">
+      <update_rate>100</update_rate>
+      <plugin filename="libunitreeFootContactPlugin.so" name="contactPlugin"/>
+      <contact>
+        <collision>FL_calf_fixed_joint_lump__FL_foot_collision_1</collision>
+      </contact>
+    </sensor>
+  </gazebo>
+  <gazebo reference="RR_calf">
+    <sensor name="RR_foot_contact" type="contact">
+      <update_rate>100</update_rate>
+      <plugin filename="libunitreeFootContactPlugin.so" name="contactPlugin"/>
+      <contact>
+        <collision>RR_calf_fixed_joint_lump__RR_foot_collision_1</collision>
+      </contact>
+    </sensor>
+  </gazebo>
+  <gazebo reference="RL_calf">
+    <sensor name="RL_foot_contact" type="contact">
+      <update_rate>100</update_rate>
+      <plugin filename="libunitreeFootContactPlugin.so" name="contactPlugin"/>
+      <contact>
+        <collision>RL_calf_fixed_joint_lump__RL_foot_collision_1</collision>
+      </contact>
+    </sensor>
+  </gazebo>
+  <!-- Visualization of Foot contacts. -->
+  <!-- <gazebo reference="FR_foot">
+        <visual>
+            <plugin name="drawForcePlugin" filename="libunitreeDrawForcePlugin.so">
+                <topicName>FR_foot_contact</topicName>
+            </plugin>
+        </visual>
+    </gazebo>
+    <gazebo reference="FL_foot">
+        <visual>
+            <plugin name="drawForcePlugin" filename="libunitreeDrawForcePlugin.so">
+                <topicName>FL_foot_contact</topicName>
+            </plugin>
+        </visual>
+    </gazebo>
+    <gazebo reference="RR_foot">
+        <visual>
+            <plugin name="drawForcePlugin" filename="libunitreeDrawForcePlugin.so">
+                <topicName>RR_foot_contact</topicName>
+            </plugin>
+        </visual>
+    </gazebo>
+    <gazebo reference="RL_foot">
+        <visual>
+            <plugin name="drawForcePlugin" filename="libunitreeDrawForcePlugin.so">
+                <topicName>RL_foot_contact</topicName>
+            </plugin>
+        </visual>
+    </gazebo> -->
+  <gazebo reference="base">
+    <material>Gazebo/Green</material>
+    <turnGravityOff>false</turnGravityOff>
+  </gazebo>
+  <gazebo reference="trunk">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <gazebo reference="stick_link">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <material>Gazebo/White</material>
+  </gazebo>
+  <gazebo reference="imu_link">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <material>Gazebo/Red</material>
+  </gazebo>
+  <!-- FL leg -->
+  <gazebo reference="FL_hip">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+  </gazebo>
+  <gazebo reference="FL_thigh">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <gazebo reference="FL_calf">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+  </gazebo>
+  <gazebo reference="FL_foot">
+    <mu1>0.6</mu1>
+    <mu2>0.6</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <!-- FR leg -->
+  <gazebo reference="FR_hip">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+  </gazebo>
+  <gazebo reference="FR_thigh">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <gazebo reference="FR_calf">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+  </gazebo>
+  <gazebo reference="FR_foot">
+    <mu1>0.6</mu1>
+    <mu2>0.6</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <!-- RL leg -->
+  <gazebo reference="RL_hip">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+  </gazebo>
+  <gazebo reference="RL_thigh">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <gazebo reference="RL_calf">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+  </gazebo>
+  <gazebo reference="RL_foot">
+    <mu1>0.6</mu1>
+    <mu2>0.6</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <!-- RR leg -->
+  <gazebo reference="RR_hip">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+  </gazebo>
+  <gazebo reference="RR_thigh">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <gazebo reference="RR_calf">
+    <mu1>0.2</mu1>
+    <mu2>0.2</mu2>
+    <self_collide>1</self_collide>
+  </gazebo>
+  <gazebo reference="RR_foot">
+    <mu1>0.6</mu1>
+    <mu2>0.6</mu2>
+    <self_collide>1</self_collide>
+    <!-- <material>Gazebo/DarkGrey</material> -->
+    <kp value="1000000.0"/>
+    <kd value="1.0"/>
+  </gazebo>
+  <!-- Rotor related joint and link is only for demonstrate location. -->
+  <!-- Actually, the rotor will rotate and the joint is not fixed. Reduction ratio should be considered. -->
+  <link name="base">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <box size="0.001 0.001 0.001"/>
+      </geometry>
+    </visual>
+  </link>
+  <joint name="base2trunk" type="fixed">
+    <origin rpy="0 0 0" xyz="0 0 0"/>
+    <parent link="base"/>
+    <child link="trunk"/>
+  </joint>
+  <link name="trunk">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/trunk.stl" scale="1 1 1"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <box size="0.5 0.28 0.15"/>
+      </geometry>
+    </collision>
+    <!-- <collision>
+            <origin rpy="0 0 0" xyz="0 0 0"/>
+            <geometry>
+                <mesh filename="package://b1_description/meshes/trunk.stl" scale="1 1 1"/>
+            </geometry>
+        </collision> -->
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.000458 0.005261 0.000665"/>
+      <mass value="35.606"/>
+      <inertia ixx="0.27466" ixy="-0.000622" ixz="-0.00315" iyy="1.0618" iyz="-0.00139" izz="1.1825"/>
+    </inertial>
+  </link>
+  <joint name="imu_joint" type="fixed">
+    <parent link="trunk"/>
+    <child link="imu_link"/>
+    <origin rpy="0 0 0" xyz="0 0 0"/>
+  </joint>
+  <link name="imu_link">
+    <inertial>
+      <mass value="0.001"/>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <inertia ixx="0.0001" ixy="0" ixz="0" iyy="0.0001" iyz="0" izz="0.0001"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <box size="0.001 0.001 0.001"/>
+      </geometry>
+      <!-- <material name="red"/> -->
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <box size=".001 .001 .001"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="head_joint" type="fixed">
+    <parent link="trunk"/>
+    <child link="head_Link"/>
+    <origin rpy="0 0 0" xyz="0 0 0"/>
+  </joint>
+  <link name="head_Link">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.33989 -0.000168 0.12029"/>
+      <mass value="3.366"/>
+      <inertia ixx="0.026455" ixy="3.15E-05" ixz="0.00166" iyy="0.029221" iyz="-3.27E-05" izz="0.010918"/>
+    </inertial>
+    <collision>
+      <origin rpy="0 0 0" xyz="0.41 0 0.005"/>
+      <geometry>
+        <box size="0.04 0.12 0.14"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="lidar_joint" type="fixed">
+    <parent link="trunk"/>
+    <child link="lidar_link"/>
+    <origin rpy="0 0 0" xyz="0.34218 0 0.17851"/>
+  </joint>
+  <link name="lidar_link">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <mass value="1.0E-03"/>
+      <inertia ixx="1e-3" ixy="1E-45" ixz="1E-45" iyy="1e-3" iyz="1E-45" izz="1e-3"/>
+    </inertial>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 -0.02"/>
+      <geometry>
+        <cylinder length="0.16" radius="0.076"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="tail_joint" type="fixed">
+    <parent link="trunk"/>
+    <child link="tail_link"/>
+    <origin rpy="0 0 0" xyz="0 0 0"/>
+  </joint>
+  <link name="tail_link">
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.37448 0.000488 0.006495"/>
+      <mass value="0.777"/>
+      <inertia ixx="0.0028118" ixy="1.95E-05" ixz="-7.87E-06" iyy="0.0047506" iyz="-1.57E-05" izz="0.0029154"/>
+    </inertial>
+    <collision>
+      <origin rpy="0 0 0" xyz="-0.405 0 0.005"/>
+      <geometry>
+        <box size="0.025 0.12 0.14"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="FR_hip_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0.3285 -0.072 0"/>
+    <parent link="trunk"/>
+    <child link="FR_hip"/>
+    <axis xyz="1 0 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-0.87" upper="0.87" velocity="23"/>
+  </joint>
+  <joint name="FR_hip_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.20205 -0.072 0"/>
+    <parent link="trunk"/>
+    <child link="FR_hip_rotor"/>
+  </joint>
+  <link name="FR_hip">
+    <visual>
+      <origin rpy="3.141592653589793 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/hip.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 -0.11973 0"/>
+      <geometry>
+        <cylinder length="0.05" radius="0.07"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.009305 0.010228 0.000264"/>
+      <mass value="2.256"/>
+      <inertia ixx="0.0026431" ixy="-0.00019234" ixz="-6.76e-06" iyy="0.0046728" iyz="7.16e-06" izz="0.0034208"/>
+    </inertial>
+  </link>
+  <link name="FR_hip_rotor">
+    <visual>
+      <origin rpy="0 1.5707963267948966 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="FR_thigh_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 -0.11973 0"/>
+    <parent link="FR_hip"/>
+    <child link="FR_thigh"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-94" upper="4.69" velocity="23"/>
+  </joint>
+  <joint name="FR_thigh_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 -0.00798 0"/>
+    <parent link="FR_hip"/>
+    <child link="FR_thigh_rotor"/>
+  </joint>
+  <link name="FR_thigh">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh_mirror.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.5707963267948966 0" xyz="-0.025 0 -0.16"/>
+      <geometry>
+        <box size="0.32 0.0455 0.054"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.004346 0.035797 -0.044921"/>
+      <mass value="3.591"/>
+      <inertia ixx="0.041718" ixy="-0.00055623" ixz="-0.0022768" iyy="0.041071" iyz="-0.005796" izz="0.0065677"/>
+    </inertial>
+  </link>
+  <link name="FR_thigh_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="FR_calf_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 0 -0.32"/>
+    <parent link="FR_thigh"/>
+    <child link="FR_calf"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="320" lower="-2.82" upper="-0.43" velocity="14"/>
+  </joint>
+  <joint name="FR_calf_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 0.05788 0"/>
+    <parent link="FR_thigh"/>
+    <child link="FR_calf_rotor"/>
+  </joint>
+  <link name="FR_calf">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh_mirror.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/calf_mirror.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.2 0" xyz="-0.002 0 -0.03"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.35 0" xyz="0.022 0 -0.1"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.53 0" xyz="0.036 0 -0.225"/>
+      <geometry>
+        <box size="0.15 0.0245 0.015"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 2 0" xyz="0.023 0 -0.31"/>
+      <geometry>
+        <box size="0.05 0.028 0.035"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.006095 0.012046 -0.29122"/>
+      <mass value="2.053"/>
+      <inertia ixx="0.028133" ixy="0.00016307" ixz="-0.0010034" iyy="0.029251" iyz="0.0014765" izz="0.0028379"/>
+    </inertial>
+  </link>
+  <link name="FR_calf_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="FR_ball_joint" type="fixed">
+    <parent link="FR_calf"/>
+    <child link="FR_foot_ball"/>
+    <origin rpy="-1.5707963267948966 0.0 0.0" xyz="0.0 -0.001 -0.35"/>
+  </joint>
+  <link name="FR_foot_ball">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <mass value="0.1"/>
+      <inertia ixx="1E-04" ixy="0.0" ixz="0.0" iyy="1E-04" iyz="0.0" izz="1E-04"/>
+    </inertial>
+    <visual>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="FR_foot_joint" type="continuous">
+    <origin rpy="0 0 0" xyz="0 0 0.0"/>
+    <parent link="FR_foot_ball"/>
+    <child link="FR_foot"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="20" velocity="50"/>
+  </joint>
+  <link name="FR_foot">
+    <inertial>
+      <origin rpy="1.5707963267948966 0 0" xyz="-0.000991 -0.051053 -0.00113"/>
+      <mass value="1.083"/>
+      <inertia ixx="0.004121" ixy="-6E-06" ixz="-4.8E-05" iyy="0.007776" iyz="-7E-06" izz="0.004108"/>
+    </inertial>
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/FR_foot.stl"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <!-- <origin xyz="0 0 0" rpy="${PI/2.0} 0 0" /> -->
+      <!-- <geometry>
+                        <mesh filename="${MESH_PATH}/${name}_foot.stl" />
+                    </geometry> -->
+      <origin rpy="0 0 0" xyz="0 0 -0.045"/>
+      <geometry>
+        <cylinder length="0.03" radius="0.1125"/>
+      </geometry>
+    </collision>
+  </link>
+  <transmission name="FR_hip_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="FR_hip_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="FR_hip_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="FR_thigh_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="FR_thigh_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="FR_thigh_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="FR_calf_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="FR_calf_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="FR_calf_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <joint name="FL_hip_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0.3285 0.072 0"/>
+    <parent link="trunk"/>
+    <child link="FL_hip"/>
+    <axis xyz="1 0 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-0.87" upper="0.87" velocity="23"/>
+  </joint>
+  <joint name="FL_hip_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.20205 0.072 0"/>
+    <parent link="trunk"/>
+    <child link="FL_hip_rotor"/>
+  </joint>
+  <link name="FL_hip">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/hip.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0.11973 0"/>
+      <geometry>
+        <cylinder length="0.05" radius="0.07"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.009305 -0.010228 0.000264"/>
+      <mass value="2.256"/>
+      <inertia ixx="0.0026431" ixy="0.00019234" ixz="-6.76e-06" iyy="0.0046728" iyz="-7.16e-06" izz="0.0034208"/>
+    </inertial>
+  </link>
+  <link name="FL_hip_rotor">
+    <visual>
+      <origin rpy="0 1.5707963267948966 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="FL_thigh_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 0.11973 0"/>
+    <parent link="FL_hip"/>
+    <child link="FL_thigh"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-94" upper="4.69" velocity="23"/>
+  </joint>
+  <joint name="FL_thigh_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 0.00798 0"/>
+    <parent link="FL_hip"/>
+    <child link="FL_thigh_rotor"/>
+  </joint>
+  <link name="FL_thigh">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.5707963267948966 0" xyz="-0.025 0 -0.16"/>
+      <geometry>
+        <box size="0.32 0.0455 0.054"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.004346 -0.035797 -0.044921"/>
+      <mass value="3.591"/>
+      <inertia ixx="0.041718" ixy="0.00055623" ixz="-0.0022768" iyy="0.041071" iyz="0.005796" izz="0.0065677"/>
+    </inertial>
+  </link>
+  <link name="FL_thigh_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="FL_calf_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 0 -0.32"/>
+    <parent link="FL_thigh"/>
+    <child link="FL_calf"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="320" lower="-2.82" upper="-0.43" velocity="14"/>
+  </joint>
+  <joint name="FL_calf_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 -0.05788 0"/>
+    <parent link="FL_thigh"/>
+    <child link="FL_calf_rotor"/>
+  </joint>
+  <link name="FL_calf">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/calf.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.2 0" xyz="-0.002 0 -0.03"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.35 0" xyz="0.022 0 -0.1"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.53 0" xyz="0.036 0 -0.225"/>
+      <geometry>
+        <box size="0.15 0.0245 0.015"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 2 0" xyz="0.023 0 -0.31"/>
+      <geometry>
+        <box size="0.05 0.028 0.035"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.006095 0.012046 -0.29122"/>
+      <mass value="2.053"/>
+      <inertia ixx="0.028133" ixy="0.00016307" ixz="-0.0010034" iyy="0.029251" iyz="0.0014765" izz="0.0028379"/>
+    </inertial>
+  </link>
+  <link name="FL_calf_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="FL_ball_joint" type="fixed">
+    <parent link="FL_calf"/>
+    <child link="FL_foot_ball"/>
+    <origin rpy="-1.5707963267948966 0.0 0.0" xyz="0.0 -0.001 -0.35"/>
+  </joint>
+  <link name="FL_foot_ball">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <mass value="0.1"/>
+      <inertia ixx="1E-04" ixy="0.0" ixz="0.0" iyy="1E-04" iyz="0.0" izz="1E-04"/>
+    </inertial>
+    <visual>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="FL_foot_joint" type="continuous">
+    <origin rpy="0 0 0" xyz="0 0 0.0"/>
+    <parent link="FL_foot_ball"/>
+    <child link="FL_foot"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="20" velocity="50"/>
+  </joint>
+  <link name="FL_foot">
+    <inertial>
+      <origin rpy="1.5707963267948966 0 0" xyz="-0.000991 -0.051053 -0.00113"/>
+      <mass value="1.083"/>
+      <inertia ixx="0.004121" ixy="-6E-06" ixz="-4.8E-05" iyy="0.007776" iyz="-7E-06" izz="0.004108"/>
+    </inertial>
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/FL_foot.stl"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <!-- <origin xyz="0 0 0" rpy="${PI/2.0} 0 0" /> -->
+      <!-- <geometry>
+                        <mesh filename="${MESH_PATH}/${name}_foot.stl" />
+                    </geometry> -->
+      <origin rpy="0 0 0" xyz="0 0 0.045"/>
+      <geometry>
+        <cylinder length="0.03" radius="0.1125"/>
+      </geometry>
+    </collision>
+  </link>
+  <transmission name="FL_hip_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="FL_hip_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="FL_hip_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="FL_thigh_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="FL_thigh_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="FL_thigh_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="FL_calf_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="FL_calf_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="FL_calf_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <joint name="RR_hip_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="-0.3285 -0.072 0"/>
+    <parent link="trunk"/>
+    <child link="RR_hip"/>
+    <axis xyz="1 0 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-0.87" upper="0.87" velocity="23"/>
+  </joint>
+  <joint name="RR_hip_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="-0.20205 -0.072 0"/>
+    <parent link="trunk"/>
+    <child link="RR_hip_rotor"/>
+  </joint>
+  <link name="RR_hip">
+    <visual>
+      <origin rpy="3.141592653589793 3.141592653589793 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/hip.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 -0.11973 0"/>
+      <geometry>
+        <cylinder length="0.05" radius="0.07"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.009305 0.010228 0.000264"/>
+      <mass value="2.256"/>
+      <inertia ixx="0.0026431" ixy="0.00019234" ixz="6.76e-06" iyy="0.0046728" iyz="7.16e-06" izz="0.0034208"/>
+    </inertial>
+  </link>
+  <link name="RR_hip_rotor">
+    <visual>
+      <origin rpy="0 1.5707963267948966 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="RR_thigh_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 -0.11973 0"/>
+    <parent link="RR_hip"/>
+    <child link="RR_thigh"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-94" upper="4.69" velocity="23"/>
+  </joint>
+  <joint name="RR_thigh_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 -0.00798 0"/>
+    <parent link="RR_hip"/>
+    <child link="RR_thigh_rotor"/>
+  </joint>
+  <link name="RR_thigh">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh_mirror.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.5707963267948966 0" xyz="-0.025 0 -0.16"/>
+      <geometry>
+        <box size="0.32 0.0455 0.054"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.004346 0.035797 -0.044921"/>
+      <mass value="3.591"/>
+      <inertia ixx="0.041718" ixy="-0.00055623" ixz="-0.0022768" iyy="0.041071" iyz="-0.005796" izz="0.0065677"/>
+    </inertial>
+  </link>
+  <link name="RR_thigh_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="RR_calf_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 0 -0.32"/>
+    <parent link="RR_thigh"/>
+    <child link="RR_calf"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="320" lower="-2.82" upper="-0.43" velocity="14"/>
+  </joint>
+  <joint name="RR_calf_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 0.05788 0"/>
+    <parent link="RR_thigh"/>
+    <child link="RR_calf_rotor"/>
+  </joint>
+  <link name="RR_calf">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh_mirror.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/calf_mirror.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.2 0" xyz="-0.002 0 -0.03"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.35 0" xyz="0.022 0 -0.1"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.53 0" xyz="0.036 0 -0.225"/>
+      <geometry>
+        <box size="0.15 0.0245 0.015"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 2 0" xyz="0.023 0 -0.31"/>
+      <geometry>
+        <box size="0.05 0.028 0.035"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.006095 0.012046 -0.29122"/>
+      <mass value="2.053"/>
+      <inertia ixx="0.028133" ixy="0.00016307" ixz="-0.0010034" iyy="0.029251" iyz="0.0014765" izz="0.0028379"/>
+    </inertial>
+  </link>
+  <link name="RR_calf_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="RR_ball_joint" type="fixed">
+    <parent link="RR_calf"/>
+    <child link="RR_foot_ball"/>
+    <origin rpy="-1.5707963267948966 0.0 0.0" xyz="0.0 -0.001 -0.35"/>
+  </joint>
+  <link name="RR_foot_ball">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <mass value="0.1"/>
+      <inertia ixx="1E-04" ixy="0.0" ixz="0.0" iyy="1E-04" iyz="0.0" izz="1E-04"/>
+    </inertial>
+    <visual>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="RR_foot_joint" type="continuous">
+    <origin rpy="0 0 0" xyz="0 0 0.0"/>
+    <parent link="RR_foot_ball"/>
+    <child link="RR_foot"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="20" velocity="50"/>
+  </joint>
+  <link name="RR_foot">
+    <inertial>
+      <origin rpy="1.5707963267948966 0 0" xyz="-0.000991 -0.051053 -0.00113"/>
+      <mass value="1.083"/>
+      <inertia ixx="0.004121" ixy="-6E-06" ixz="-4.8E-05" iyy="0.007776" iyz="-7E-06" izz="0.004108"/>
+    </inertial>
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/RR_foot.stl"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <!-- <origin xyz="0 0 0" rpy="${PI/2.0} 0 0" /> -->
+      <!-- <geometry>
+                        <mesh filename="${MESH_PATH}/${name}_foot.stl" />
+                    </geometry> -->
+      <origin rpy="0 0 0" xyz="0 0 -0.045"/>
+      <geometry>
+        <cylinder length="0.03" radius="0.1125"/>
+      </geometry>
+    </collision>
+  </link>
+  <transmission name="RR_hip_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="RR_hip_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="RR_hip_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="RR_thigh_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="RR_thigh_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="RR_thigh_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="RR_calf_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="RR_calf_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="RR_calf_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <joint name="RL_hip_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="-0.3285 0.072 0"/>
+    <parent link="trunk"/>
+    <child link="RL_hip"/>
+    <axis xyz="1 0 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-0.87" upper="0.87" velocity="23"/>
+  </joint>
+  <joint name="RL_hip_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="-0.20205 0.072 0"/>
+    <parent link="trunk"/>
+    <child link="RL_hip_rotor"/>
+  </joint>
+  <link name="RL_hip">
+    <visual>
+      <origin rpy="0 3.141592653589793 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/hip.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0.11973 0"/>
+      <geometry>
+        <cylinder length="0.05" radius="0.07"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.009305 -0.010228 0.000264"/>
+      <mass value="2.256"/>
+      <inertia ixx="0.0026431" ixy="-0.00019234" ixz="6.76e-06" iyy="0.0046728" iyz="-7.16e-06" izz="0.0034208"/>
+    </inertial>
+  </link>
+  <link name="RL_hip_rotor">
+    <visual>
+      <origin rpy="0 1.5707963267948966 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="RL_thigh_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 0.11973 0"/>
+    <parent link="RL_hip"/>
+    <child link="RL_thigh"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="200" lower="-94" upper="4.69" velocity="23"/>
+  </joint>
+  <joint name="RL_thigh_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 0.00798 0"/>
+    <parent link="RL_hip"/>
+    <child link="RL_thigh_rotor"/>
+  </joint>
+  <link name="RL_thigh">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.5707963267948966 0" xyz="-0.025 0 -0.16"/>
+      <geometry>
+        <box size="0.32 0.0455 0.054"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.004346 -0.035797 -0.044921"/>
+      <mass value="3.591"/>
+      <inertia ixx="0.041718" ixy="0.00055623" ixz="-0.0022768" iyy="0.041071" iyz="0.005796" izz="0.0065677"/>
+    </inertial>
+  </link>
+  <link name="RL_thigh_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="RL_calf_joint" type="revolute">
+    <origin rpy="0 0 0" xyz="0 0 -0.32"/>
+    <parent link="RL_thigh"/>
+    <child link="RL_calf"/>
+    <axis xyz="0 1 0"/>
+    <dynamics damping="0" friction="0"/>
+    <limit effort="320" lower="-2.82" upper="-0.43" velocity="14"/>
+  </joint>
+  <joint name="RL_calf_rotor_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0.0 -0.05788 0"/>
+    <parent link="RL_thigh"/>
+    <child link="RL_calf_rotor"/>
+  </joint>
+  <link name="RL_calf">
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/thigh.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/calf.stl" scale="1 1 1"/>
+      </geometry>
+      <material name="orange"/>
+    </visual>
+    <collision>
+      <origin rpy="0 1.2 0" xyz="-0.002 0 -0.03"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.35 0" xyz="0.022 0 -0.1"/>
+      <geometry>
+        <box size="0.1 0.0245 0.018"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 1.53 0" xyz="0.036 0 -0.225"/>
+      <geometry>
+        <box size="0.15 0.0245 0.015"/>
+      </geometry>
+    </collision>
+    <collision>
+      <origin rpy="0 2 0" xyz="0.023 0 -0.31"/>
+      <geometry>
+        <box size="0.05 0.028 0.035"/>
+      </geometry>
+    </collision>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.006095 0.012046 -0.29122"/>
+      <mass value="2.053"/>
+      <inertia ixx="0.028133" ixy="0.00016307" ixz="-0.0010034" iyy="0.029251" iyz="0.0014765" izz="0.0028379"/>
+    </inertial>
+  </link>
+  <link name="RL_calf_rotor">
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.02" radius="0.05"/>
+      </geometry>
+      <material name="green"/>
+    </visual>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0 0.0 0.0"/>
+      <mass value="0.2734"/>
+      <inertia ixx="0.000144463" ixy="0.0" ixz="0.0" iyy="0.000144463" iyz="0.0" izz="0.000263053"/>
+    </inertial>
+  </link>
+  <joint name="RL_ball_joint" type="fixed">
+    <parent link="RL_calf"/>
+    <child link="RL_foot_ball"/>
+    <origin rpy="-1.5707963267948966 0.0 0.0" xyz="0.0 -0.001 -0.35"/>
+  </joint>
+  <link name="RL_foot_ball">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <mass value="0.1"/>
+      <inertia ixx="1E-04" ixy="0.0" ixz="0.0" iyy="1E-04" iyz="0.0" izz="1E-04"/>
+    </inertial>
+    <visual>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+      <geometry>
+        <sphere radius="0.05"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="RL_foot_joint" type="continuous">
+    <origin rpy="0 0 0" xyz="0 0 0.0"/>
+    <parent link="RL_foot_ball"/>
+    <child link="RL_foot"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="20" velocity="50"/>
+  </joint>
+  <link name="RL_foot">
+    <inertial>
+      <origin rpy="1.5707963267948966 0 0" xyz="-0.000991 -0.051053 -0.00113"/>
+      <mass value="1.083"/>
+      <inertia ixx="0.004121" ixy="-6E-06" ixz="-4.8E-05" iyy="0.007776" iyz="-7E-06" izz="0.004108"/>
+    </inertial>
+    <visual>
+      <origin rpy="1.5707963267948966 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="/root/ibrido_ws/src/unitree_ros/robots/b2w_description/meshes/RL_foot.stl"/>
+      </geometry>
+      <material name="">
+        <color rgba="1 1 1 1"/>
+      </material>
+    </visual>
+    <collision>
+      <!-- <origin xyz="0 0 0" rpy="${PI/2.0} 0 0" /> -->
+      <!-- <geometry>
+                        <mesh filename="${MESH_PATH}/${name}_foot.stl" />
+                    </geometry> -->
+      <origin rpy="0 0 0" xyz="0 0 0.045"/>
+      <geometry>
+        <cylinder length="0.03" radius="0.1125"/>
+      </geometry>
+    </collision>
+  </link>
+  <transmission name="RL_hip_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="RL_hip_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="RL_hip_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="RL_thigh_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="RL_thigh_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="RL_thigh_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <transmission name="RL_calf_tran">
+    <type>transmission_interface/SimpleTransmission</type>
+    <joint name="RL_calf_joint">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+    </joint>
+    <actuator name="RL_calf_motor">
+      <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
+      <mechanicalReduction>1</mechanicalReduction>
+    </actuator>
+  </transmission>
+  <!-- <xacro:stairs stairs="15" xpos="0" ypos="2.0" zpos="0" /> -->
+</robot>
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/world_interface_base.py b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/world_interface_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..9dc8f41cb9b4a2c1518bd204e663bb6568902b3d
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/world_interface_base.py
@@ -0,0 +1,1719 @@
+from aug_mpc.controllers.rhc.augmpc_cluster_server import AugMpcClusterServer
+from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperClnt
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetClnt
+from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest
+from aug_mpc.utils.jnt_imp_control_base import JntImpCntrlBase
+from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds
+from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf
+from aug_mpc.utils.math_utils import quaternion_difference
+from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds
+
+from aug_mpc.utils.filtering import FirstOrderFilter
+
+from mpc_hive.utilities.homing import RobotHomer
+from mpc_hive.utilities.shared_data.jnt_imp_control import JntImpCntrlData
+
+from EigenIPC.PyEigenIPC import VLevel, Journal, LogType, dtype
+from EigenIPC.PyEigenIPC import StringTensorServer
+from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper
+
+from typing import List, Dict, TypeVar
+
+import os
+import inspect
+import signal
+import time
+
+import numpy as np
+import torch
+
+from abc import ABC, abstractmethod
+
+JntImpCntrlChild = TypeVar('JntImpCntrlChild', bound='JntImpCntrlBase')
+
+class AugMPCWorldInterfaceBase(ABC):
+
+    def __init__(self,
+                robot_names: List[str],
+                robot_urdf_paths: List[str],
+                robot_srdf_paths: List[str],
+                jnt_imp_config_paths: List[str],
+                n_contacts: List[int],
+                cluster_dt: List[float],
+                use_remote_stepping: List[bool],
+                name: str = "AugMPCWorldInterfaceBase",
+                num_envs: int = 1,
+                debug = False,
+                verbose: bool = False,
+                vlevel: VLevel = VLevel.V1,
+                n_init_step: int = 0,
+                timeout_ms: int = 60000,
+                env_opts: Dict = None,
+                use_gpu: bool = True,
+                dtype: torch.dtype = torch.float32,
+                dump_basepath: str = "/tmp",
+                override_low_lev_controller: bool = False):
+
+        # checks on input args
+        # type checks
+        if not isinstance(robot_names, List):
+            exception = "robot_names must be a list!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not isinstance(robot_urdf_paths, List):
+            exception = "robot_urdf_paths must be a list!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not isinstance(robot_srdf_paths, List):
+            exception = "robot_srdf_paths must be a list!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not isinstance(cluster_dt, List):
+            exception = "cluster_dt must be a list!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not isinstance(use_remote_stepping, List):
+            exception = "use_remote_stepping must be a list!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not isinstance(n_contacts, List):
+            exception = "n_contacts must be a list (of integers)!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not isinstance(jnt_imp_config_paths, List):
+            exception = "jnt_imp_config_paths must be a list paths!"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+            
+        # dim checks
+        if not len(robot_urdf_paths) == len(robot_names):
+            exception = f"robot_urdf_paths has len {len(robot_urdf_paths)}" + \
+             f" while robot_names {len(robot_names)}"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not len(robot_srdf_paths) == len(robot_names):
+            exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \
+             f" while robot_names {len(robot_names)}"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not len(cluster_dt) == len(robot_names):
+            exception = f"cluster_dt has len {len(cluster_dt)}" + \
+             f" while robot_names {len(robot_names)}"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not len(use_remote_stepping) == len(robot_names):
+            exception = f"use_remote_stepping has len {len(use_remote_stepping)}" + \
+             f" while robot_names {len(robot_names)}"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not len(robot_srdf_paths) == len(robot_names):
+            exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \
+             f" while robot_names {len(robot_names)}"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        if not len(jnt_imp_config_paths) == len(robot_names):
+            exception = f"jnt_imp_config_paths has len {len(jnt_imp_config_paths)}" + \
+             f" while robot_names {len(robot_names)}"
+            Journal.log(self.__class__.__name__,
+                "__init__",
+                exception,
+                LogType.EXCEP,
+                throw_when_excep = True)
+        
+        self._remote_exit_flag=None
+
+        self._name=name
+        self._num_envs=num_envs
+        self._debug=debug
+        self._verbose=verbose
+        self._vlevel=vlevel
+        self._force_reconnection=True
+        self._timeout_ms=timeout_ms
+        self._use_gpu=use_gpu
+        self._device = "cuda" if self._use_gpu else "cpu"
+        self._dtype=dtype
+        self._robot_names=robot_names
+        self._env_opts={}
+        self._env_opts["deact_when_failure"]=True
+        self._env_opts["filter_jnt_vel"]=False
+        self._env_opts["filter_cutoff_freq"]=10.0 # [Hz]
+        self._env_opts["filter_sampling_rate"]=100 # rate at which state is filtered [Hz]
+        self._env_opts["add_remote_exit_flag"]=False # add shared data server to trigger a remote exit
+        self._env_opts["wheel_joint_patterns"]=["wheel"]
+        self._env_opts["filter_wheel_pos_ref"]=True
+        self._env_opts["zero_wheel_eff_ref"]=True
+
+        self._env_opts["enable_height_sensor"]=False
+        self._env_opts["height_sensor_resolution"]=0.16
+        self._env_opts["height_sensor_pixels"]=10
+        self._env_opts["height_sensor_lateral_offset"]=0.0
+        self._env_opts["height_sensor_forward_offset"]=0.0
+
+        self._env_opts["run_cluster_bootstrap"] = False
+        
+        self._filter_step_ssteps_freq=None
+
+        self._env_opts.update(env_opts)
+
+        self.step_counter = 0 # global step counter
+        self._n_init_steps = n_init_step # n steps to be performed before applying solutions from control clusters
+        self._srdf_dump_paths = robot_srdf_paths
+        self._homers = {} 
+        self._homing = None
+        self._jnt_imp_cntrl_shared_data = {}
+        self._jnt_imp_controllers = {}
+        self._jnt_imp_config_paths = {}
+
+        # control cluster data
+        self.cluster_sim_step_counters = {}
+        self.cluster_servers = {}
+        self._trigger_sol = {}
+        self._wait_sol = {}
+        self._cluster_dt = {}
+        self._robot_urdf_paths={}
+        self._robot_srdf_paths={}
+        self._contact_names={}
+        self._num_contacts={}
+
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            self._cluster_dt[robot_name]=cluster_dt[i]
+            self._robot_urdf_paths[robot_name]=robot_urdf_paths[i]
+            self._robot_srdf_paths[robot_name]=robot_srdf_paths[i]
+            self._contact_names[robot_name]=None
+            self._num_contacts[robot_name]=n_contacts[i]
+            self._jnt_imp_config_paths[robot_name]=jnt_imp_config_paths[i]
+        # db data
+        self.debug_data = {}
+        self.debug_data["time_to_step_world"] = np.nan
+        self.debug_data["time_to_get_states_from_env"] = np.nan
+        self.debug_data["cluster_sol_time"] = {}
+        self.debug_data["cluster_state_update_dt"] = {}
+        self.debug_data["sim_time"] = {}
+        self.debug_data["cluster_time"] = {}
+        
+        self._env_timer = time.perf_counter()
+
+        # remote sim stepping options
+        self._timeout = timeout_ms # timeout for remote stepping
+        self._use_remote_stepping = use_remote_stepping
+        # should use remote stepping
+        self._remote_steppers = {}
+        self._remote_resetters = {}
+        self._remote_reset_requests = {}
+        self._is_first_trigger = {}
+
+        self._closed = False
+             
+        self._this_child_path=os.path.abspath(inspect.getfile(self.__class__))
+        self._descr_dump_path=dump_basepath+"/"+f"{self.__class__.__name__}"
+        self._urdf_dump_paths = {}
+        self._srdf_dump_paths = {}
+        self.xrdf_cmd_vals = [] # by default empty, needs to be overriden by
+        # child class
+        self._world_iface_files_server=None
+
+        self._override_low_lev_controller=override_low_lev_controller
+
+        self._root_p = {}
+        self._root_q = {}
+        self._jnts_q = {} 
+        self._root_p_prev = {} # used for num differentiation
+        self._root_q_prev = {} # used for num differentiation
+        self._jnts_q_prev = {} # used for num differentiation
+        self._root_v_prev = {} # used for num differentiation
+        self._root_omega_prev = {} # used for num differentiation
+        self._root_p_default = {} 
+        self._root_q_default = {}
+        self._jnts_q_default = {}
+        
+        self._gravity_normalized = {}
+        self._gravity_normalized_base_loc = {}
+
+        self._root_v = {}
+        self._root_v_base_loc = {}
+        self._root_v_default = {}
+        self._root_omega = {}
+        self._root_omega_base_loc = {}
+        self._root_omega_default = {}
+        self._root_a = {}
+        self._root_a_base_loc = {}
+        self._root_alpha = {}
+        self._root_alpha_base_loc = {}
+
+        self._jnts_v = {}
+        self._jnt_vel_filter = {}
+        self._jnts_v_default = {}
+        self._jnts_eff = {}
+        self._jnts_eff_default = {}
+
+        self._root_pos_offsets = {} 
+        self._root_q_offsets = {} 
+        self._root_q_offsets_yaw = {}
+        self._root_q_yaw_rel_ws = {}
+
+        self._parse_env_opts()
+
+        self._enable_height_shared = self._env_opts["enable_height_sensor"]
+        self._height_sensor_resolution = self._env_opts["height_sensor_resolution"]
+        self._height_sensor_pixels = self._env_opts["height_sensor_pixels"]
+
+        self._pre_setup() # child's method
+
+        self._init_world() # after this point all info from sim or robot is 
+        # available
+
+        self._publish_world_interface_files()
+        
+        setup_ok=self._setup()
+        if not setup_ok:
+            self.close()
+        
+        self._exit_request=False
+        signal.signal(signal.SIGINT, self.signal_handler)   
+
+    def signal_handler(self, sig, frame):
+        Journal.log(self.__class__.__name__,
+            "signal_handler",
+            "received SIGINT -> cleaning up",
+            LogType.WARN)
+        self._exit_request=True
+    
+    def __del__(self):
+        self.close()
+    
+    def is_closed(self):
+        return self._closed
+    
+    def close(self) -> None:
+        if not self._closed:
+            for i in range(len(self._robot_names)):
+                if self._robot_names[i] in self.cluster_servers:
+                    self.cluster_servers[self._robot_names[i]].close()
+                if self._use_remote_stepping[i]: # remote signaling
+                    if self._robot_names[i] in self._remote_reset_requests:
+                        self._remote_reset_requests[self._robot_names[i]].close()
+                        self._remote_resetters[self._robot_names[i]].close()
+                        self._remote_steppers[self._robot_names[i]].close()
+                if self._robot_names[i] in self._jnt_imp_cntrl_shared_data:
+                    jnt_imp_shared_data=self._jnt_imp_cntrl_shared_data[self._robot_names[i]]
+                    if jnt_imp_shared_data is not None:
+                        jnt_imp_shared_data.close()
+            if self._remote_exit_flag is not None:
+                self._remote_exit_flag.close()
+            if self._world_iface_files_server is not None:
+                self._world_iface_files_server.close()
+            self._close()
+            self._closed=True
+
+    def _collect_world_interface_files(self):
+        files = [self._this_child_path]
+        # prefer generated URDF/SRDF if available, fallback to provided xacros
+        if len(self._urdf_dump_paths) > 0:
+            files.extend(list(self._urdf_dump_paths.values()))
+        else:
+            files.extend(list(self._robot_urdf_paths.values()))
+        if len(self._srdf_dump_paths) > 0:
+            files.extend(list(self._srdf_dump_paths.values()))
+        else:
+            files.extend(list(self._robot_srdf_paths.values()))
+        files.extend(list(self._jnt_imp_config_paths.values()))
+        # remove duplicates while preserving order
+        unique_files=[]
+        for f in files:
+            if f not in unique_files:
+                unique_files.append(f)
+        return unique_files
+
+    def _publish_world_interface_files(self):
+
+        if not any(self._use_remote_stepping):
+            return
+        self._world_iface_files_server=StringTensorServer(length=1,
+            basename="SharedWorldInterfaceFilesDropDir",
+            name_space=self._robot_names[0],
+            verbose=self._verbose,
+            vlevel=self._vlevel,
+            force_reconnection=True)
+        self._world_iface_files_server.run()
+        combined_paths=", ".join(self._collect_world_interface_files())
+        while not self._world_iface_files_server.write_vec([combined_paths], 0):
+            Journal.log(self.__class__.__name__,
+            "_publish_world_interface_files",
+            f"Failed to pub world interface files. Retrying...",
+            LogType.WARN)
+            time.sleep(0.1)
+        Journal.log(self.__class__.__name__,
+            "_publish_world_interface_files",
+            f"World interface files advertised: {combined_paths}",
+            LogType.STAT)
+
+    def _setup(self) -> bool:
+    
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+
+            # normalized gravity vector
+            self._gravity_normalized[robot_name]=torch.full_like(self._root_v[robot_name], fill_value=0.0)
+            self._gravity_normalized[robot_name][:, 2]=-1.0
+            self._gravity_normalized_base_loc[robot_name]=self._gravity_normalized[robot_name].detach().clone()
+
+            # Pre-allocate yaw-related buffers once and reuse them in root_q_yaw_rel().
+            q_ref = self._root_q[robot_name]
+            self._root_q_offsets_yaw[robot_name] = torch.zeros(
+                (self._num_envs,), dtype=q_ref.dtype, device=q_ref.device)
+            self._root_q_yaw_rel_ws[robot_name] = {
+                "yaw_abs": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+                "yaw_rel": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+                "yaw_sin": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+                "yaw_cos": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device),
+                "q_abs_unit": torch.zeros_like(q_ref),
+                "q_yaw_abs": torch.zeros_like(q_ref),
+                "q_yaw_rel": torch.zeros_like(q_ref),
+                "q_yaw_abs_conj": torch.zeros_like(q_ref),
+                "q_pr": torch.zeros_like(q_ref),
+                "q_rel": torch.zeros_like(q_ref),
+            }
+
+            self.cluster_sim_step_counters[robot_name]=0
+            self._is_first_trigger[robot_name] = True
+            if not isinstance(self._cluster_dt[robot_name], (float)):
+                exception = f"cluster_dt[{i}] should be a float!"
+                Journal.log(self.__class__.__name__,
+                    "_setup",
+                    exception,
+                    LogType.EXCEP,
+                    throw_when_excep = False)
+                return False
+            self._cluster_dt[robot_name] = self._cluster_dt[robot_name]
+            self._trigger_sol[robot_name] = True # allow first trigger
+            self._wait_sol[robot_name] = False
+
+            # initialize a lrhc cluster server for communicating with rhc controllers
+            self.cluster_servers[robot_name] = AugMpcClusterServer(cluster_size=self._num_envs, 
+                        cluster_dt=self._cluster_dt[robot_name], 
+                        control_dt=self.physics_dt(), 
+                        jnt_names=self._robot_jnt_names(robot_name=robot_name), 
+                        n_contacts=self._n_contacts(robot_name=robot_name),
+                        contact_linknames=self._contact_names[robot_name], 
+                        verbose=self._verbose, 
+                        vlevel=self._vlevel,
+                        debug=self._debug, 
+                        robot_name=robot_name,
+                        use_gpu=self._use_gpu,
+                        force_reconnection=self._force_reconnection,
+                        timeout_ms=self._timeout,
+                        enable_height_sensor=self._enable_height_shared,
+                        height_grid_size=self._height_sensor_pixels,
+                        height_grid_resolution=self._height_sensor_resolution)
+            self.cluster_servers[robot_name].run()
+            self.debug_data["cluster_sol_time"][robot_name] = np.nan
+            self.debug_data["cluster_state_update_dt"][robot_name] = np.nan
+            self.debug_data["sim_time"][robot_name] = np.nan
+            # remote sim stepping
+            if self._use_remote_stepping[i]:
+                self._remote_steppers[robot_name] = RemoteStepperClnt(namespace=robot_name,
+                                                            verbose=self._debug,
+                                                            vlevel=self._vlevel)
+                self._remote_resetters[robot_name] = RemoteResetClnt(namespace=robot_name,
+                                                            verbose=self._debug,
+                                                            vlevel=self._vlevel)
+                self._remote_reset_requests[robot_name] = RemoteResetRequest(namespace=robot_name,
+                                                                    n_env=self._num_envs,
+                                                                    is_server=True,
+                                                                    verbose=self._debug,
+                                                                    vlevel=self._vlevel, 
+                                                                    force_reconnection=self._force_reconnection, 
+                                                                    safe=False)
+                self._remote_steppers[robot_name].run()
+                self._remote_resetters[robot_name].run()
+                self._remote_reset_requests[robot_name].run()
+            else:
+                self._remote_steppers[robot_name] = None
+                self._remote_reset_requests[robot_name] = None
+                self._remote_resetters[robot_name] = None
+
+            self._homers[robot_name] = RobotHomer(srdf_path=self._srdf_dump_paths[robot_name], 
+                            jnt_names=self._robot_jnt_names(robot_name=robot_name),
+                            filter=True,
+                            verbose=self._verbose)
+            robot_homing=torch.from_numpy(self._homers[robot_name].get_homing().reshape(1,-1))
+            if "cuda" in self._device:
+                robot_homing=robot_homing.cuda()
+            self._homing=robot_homing.repeat(self._num_envs, 1)
+
+            self._jnts_q_default[robot_name] = self._homing
+            self._set_jnts_to_homing(robot_name=robot_name)
+            self._set_root_to_defconfig(robot_name=robot_name)
+            self._reset_sim()
+
+            self._init_safe_cluster_actions(robot_name=robot_name)
+
+            Journal.log(self.__class__.__name__,
+                "_setup",
+                f"Will use joint impedance config at {self._jnt_imp_config_paths[robot_name]} for {robot_name}",
+                LogType.STAT)
+            
+            self._jnt_imp_controllers[robot_name] = self._generate_jnt_imp_control(robot_name=robot_name)
+            self._jnt_imp_controllers[robot_name].set_velocity_controlled_joints(
+                name_patterns=self._env_opts["wheel_joint_patterns"],
+                filter_pos_ref=self._env_opts["filter_wheel_pos_ref"],
+                zero_eff_ref=self._env_opts["zero_wheel_eff_ref"])
+            self._jnt_imp_cntrl_shared_data[robot_name] = JntImpCntrlData(is_server=True, 
+                                            n_envs=self._num_envs, 
+                                            n_jnts=len(self._robot_jnt_names(robot_name=robot_name)),
+                                            jnt_names=self._robot_jnt_names(robot_name=robot_name),
+                                            namespace=robot_name, 
+                                            verbose=self._verbose, 
+                                            force_reconnection=self._force_reconnection,
+                                            vlevel=self._vlevel,
+                                            use_gpu=self._use_gpu,
+                                            safe=False)
+            self._jnt_imp_cntrl_shared_data[robot_name].run()
+
+            self._jnt_vel_filter[robot_name]=None
+            if self._env_opts["filter_jnt_vel"]:
+                self._jnt_vel_filter[robot_name]=FirstOrderFilter(dt=1.0/self._env_opts["filter_sampling_rate"],
+                    filter_BW=self._env_opts["filter_cutoff_freq"],
+                    rows=self._num_envs,
+                    cols=len(self._robot_jnt_names(robot_name=robot_name)),
+                    device=self._device,
+                    dtype=self._dtype)
+                
+                physics_rate=1.0/self.physics_dt()
+                self._filter_step_ssteps_freq=int(physics_rate/self._env_opts["filter_sampling_rate"])
+                if self._filter_step_ssteps_freq <=0:
+                    Journal.log(self.__class__.__name__,
+                        "_setup",
+                        f"The filter_sampling_rate should be smaller that the physics rate ({physics_rate} Hz)",
+                        LogType.EXCEP,
+                        throw_when_excep=True)
+
+            for n in range(self._n_init_steps): # run some initialization steps
+                if hasattr(self, "_alter_twist_warmup"):
+                    self._alter_twist_warmup(robot_name=robot_name, env_indxs=None)
+                self._step_world()
+               
+            self._read_jnts_state_from_robot(robot_name=robot_name,
+                env_indxs=None)
+            self._read_root_state_from_robot(robot_name=robot_name,
+                    env_indxs=None)
+            # allow child to perform additional warmup validations (e.g., terrain/tilt)
+            # retry_done = False
+            if hasattr(self, "_post_warmup_validation"):
+                failing = self._post_warmup_validation(robot_name=robot_name)
+                if failing is not None and failing.numel() > 0:
+                    # retry: reset only failing envs, rerun warmup, revalidate once
+                    failing = failing.to(self._device)
+                    Journal.log(self.__class__.__name__,
+                        "_setup",
+                        f"Warmup validation failed for {robot_name}, envs indexes {failing.tolist()}",
+                        LogType.EXCEP,
+                        throw_when_excep=True)
+                else:
+                    Journal.log(self.__class__.__name__,
+                        "_setup",
+                        f"Warmup validation passed for {robot_name}",
+                        LogType.INFO)
+            
+            # write some inits for all robots
+            # self._update_root_offsets(robot_name)
+            self._synch_default_root_states(robot_name=robot_name)
+            epsi=0.03 # adding a bit of height to avoid initial penetration
+            self._root_p_default[robot_name][:, 2]=self._root_p_default[robot_name][:, 2]+epsi
+            
+            reset_ok=self._reset(env_indxs=None,
+                robot_name=robot_name,
+                reset_cluster=True,
+                reset_cluster_counter=False,
+                randomize=True,
+                acquire_offsets=True) # resets everything, updates the cluster with fresh reset states 
+            # and acquire offsets
+            if not reset_ok:
+                return False
+            
+            # cluster setup here
+            control_cluster=self.cluster_servers[robot_name]
+
+            control_cluster.pre_trigger()
+            to_be_activated=control_cluster.get_inactive_controllers()
+            if to_be_activated is not None:
+                control_cluster.activate_controllers(
+                    idxs=to_be_activated)   
+
+            if self._env_opts["run_cluster_bootstrap"]:
+                cluster_setup_ok=self._setup_mpc_cluster(robot_name)
+                if not cluster_setup_ok:
+                    return False
+                self._set_cluster_actions(robot_name=robot_name) # write last cmds
+                self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot
+            
+            if self._use_remote_stepping[i]:
+                step_wait_ok = self._wait_for_remote_step_req(robot_name=robot_name)
+                if not step_wait_ok:
+                   return False
+            
+            self._set_startup_jnt_imp_gains(robot_name=robot_name) # set gains to
+            # startup config (usually lower)
+
+            control_cluster.pre_trigger()
+            control_cluster.trigger_solution(bootstrap=False) # trigger first solution (in real-time iteration) before first call to step to ensure that first solution is ready when step is called the first time
+            
+        if self._env_opts["add_remote_exit_flag"]:
+            self._remote_exit_flag=SharedTWrapper(namespace = self._robot_names[0],# use first robot as name
+                basename = "IbridoRemoteEnvExitFlag",
+                is_server = True, 
+                n_rows = 1, 
+                n_cols = 1, 
+                verbose = True, 
+                vlevel = self._vlevel,
+                safe = False,
+                dtype=dtype.Bool,
+                force_reconnection=True,
+                fill_value = False)
+            self._remote_exit_flag.run()
+
+        self._setup_done=True
+
+        return self._setup_done
+
+    def _setup_mpc_cluster(self, robot_name: str):
+
+        control_cluster = self.cluster_servers[robot_name]
+        
+        # self._set_state_to_cluster(robot_name=robot_name)
+        rhc_state = control_cluster.get_state()
+        root_twist=rhc_state.root_state.get(data_type="twist", robot_idxs = None, gpu=self._use_gpu)
+        jnt_v=rhc_state.jnts_state.get(data_type="v", robot_idxs = None, gpu=self._use_gpu) 
+        root_twist[:, :]=0 # override meas state to make sure MPC bootstrap uses zero velocity
+        jnt_v[:, :]=0 
+
+        control_cluster.write_robot_state()
+        
+        # trigger bootstrap solution (solvers will run up to convergence) 
+        control_cluster.trigger_solution(bootstrap=True) # this will trigger the bootstrap solver with the initial state,
+        # which will run until convergence before returning
+        wait_ok=control_cluster.wait_for_solution() # blocking
+        if not wait_ok:
+            return False
+        failed = control_cluster.get_failed_controllers(gpu=self._use_gpu)
+        if failed is not None:
+            failed_idxs = torch.nonzero(failed).squeeze(-1)
+            if failed_idxs.numel() > 0:
+                Journal.log(self.__class__.__name__,
+                    "_setup",
+                    f"Bootstrap solution failed for {robot_name} | n_failed: {failed_idxs.numel()}, idxs: {failed_idxs.cpu().tolist()}",
+                    LogType.EXCEP,
+                    throw_when_excep=False)
+                return False
+
+        return True
+
+    def step(self) -> bool:
+
+        success=False
+
+        if self._remote_exit_flag is not None:
+            # check for exit request
+            self._remote_exit_flag.synch_all(read=True, retry = False)
+            self._exit_request=self._exit_request or \
+                bool(self._remote_exit_flag.get_numpy_mirror()[0, 0].item())
+
+        if self._exit_request:
+            self.close()
+            
+        if self.is_running() and (not self.is_closed()):
+            if self._debug:
+                pre_step_ok=self._pre_step_db()
+                if not pre_step_ok:
+                    return False
+                self._env_timer=time.perf_counter()
+                self._step_world()
+                self.debug_data["time_to_step_world"] = \
+                    time.perf_counter() - self._env_timer
+                self._post_world_step_db()
+                success=True
+            else:
+                pre_step_ok=self._pre_step()
+                if not pre_step_ok:
+                    return False
+                self._step_world()
+                self._post_world_step()
+                success=True
+        
+        return success
+    
+    def render(self, mode:str="human") -> None:
+        self._render_sim(mode)
+
+    def reset(self,
+        env_indxs: torch.Tensor = None,
+        reset_cluster: bool = False,
+        reset_cluster_counter = False,
+        randomize: bool = False,
+        reset_sim: bool = False) -> None:
+
+        for i in range(len(self._robot_names)):
+            robot_name=self._robot_names[i]
+            reset_ok=self._reset(robot_name=robot_name,
+                env_indxs=env_indxs,
+                randomize=randomize,
+                reset_cluster=reset_cluster,
+                reset_cluster_counter=reset_cluster_counter)
+            if not reset_ok:
+                return False    
+            self._set_startup_jnt_imp_gains(robot_name=robot_name,
+                env_indxs=env_indxs)
+            
+        if reset_sim:
+            self._reset_sim()
+        
+        return True
+
+    def _reset_cluster(self,
+            robot_name: str,
+            env_indxs: torch.Tensor = None,
+            reset_cluster_counter: bool = False):
+        
+        control_cluster = self.cluster_servers[robot_name]
+
+        reset_ok=control_cluster.reset_controllers(idxs=env_indxs)
+        if not reset_ok:
+            return False
+        
+        self._set_state_to_cluster(robot_name=robot_name,
+            env_indxs=env_indxs)
+        control_cluster.write_robot_state() # writes to shared memory
+
+        if reset_cluster_counter:
+            self.cluster_sim_step_counters[robot_name] = 0 
+        
+        return True
+
+    def _step_jnt_vel_filter(self,
+            robot_name: str, 
+            env_indxs: torch.Tensor = None):
+        
+        self._jnt_vel_filter[robot_name].update(refk=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs), 
+            idxs=env_indxs)
+
+    def _set_state_to_cluster(self, 
+        robot_name: str, 
+        env_indxs: torch.Tensor = None,
+        base_loc: bool = True):
+        
+        if self._debug:
+            if not isinstance(env_indxs, (torch.Tensor, type(None))):
+                msg = "Provided env_indxs should be a torch tensor of indexes!"
+                raise Exception(f"[{self.__class__.__name__}]" + f"[{self.journal.exception}]: " + msg)
+            
+        control_cluster = self.cluster_servers[robot_name]
+        # floating base
+        rhc_state = control_cluster.get_state()
+        # configuration
+        rhc_state.root_state.set(data=self.root_p_rel(robot_name=robot_name, env_idxs=env_indxs), 
+                data_type="p", robot_idxs = env_indxs, gpu=self._use_gpu)
+        rhc_state.root_state.set(data=self.root_q(robot_name=robot_name, env_idxs=env_indxs), 
+                data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu)
+        # rhc_state.root_state.set(data=self.root_q_yaw_rel(robot_name=robot_name, env_idxs=env_indxs), 
+        #         data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu)
+        # twist
+        rhc_state.root_state.set(data=self.root_v(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), 
+                data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu)
+        rhc_state.root_state.set(data=self.root_omega(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), 
+                data_type="omega", robot_idxs = env_indxs, gpu=self._use_gpu)
+        
+        # angular accc.
+        rhc_state.root_state.set(data=self.root_a(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), 
+                data_type="a", robot_idxs = env_indxs, gpu=self._use_gpu)
+        rhc_state.root_state.set(data=self.root_alpha(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), 
+                data_type="alpha", robot_idxs = env_indxs, gpu=self._use_gpu)
+        
+        # gravity vec
+        rhc_state.root_state.set(data=self.gravity(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), 
+                data_type="gn", robot_idxs = env_indxs, gpu=self._use_gpu)
+        
+        # joints
+        rhc_state.jnts_state.set(data=self.jnts_q(robot_name=robot_name, env_idxs=env_indxs), 
+            data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu)
+        
+        v_jnts=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs)
+        if self._jnt_vel_filter[robot_name] is not None: # apply filtering
+            v_jnts=self._jnt_vel_filter[robot_name].get(idxs=env_indxs)
+        rhc_state.jnts_state.set(data=v_jnts, 
+            data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu) 
+        rhc_state.jnts_state.set(data=self.jnts_eff(robot_name=robot_name, env_idxs=env_indxs), 
+            data_type="eff", robot_idxs = env_indxs, gpu=self._use_gpu) 
+
+        # height map
+        if self._enable_height_shared:
+            hdata = self._height_imgs[robot_name]
+            if env_indxs is not None:
+                hdata = hdata[env_indxs]
+            flat = hdata.reshape(hdata.shape[0], -1)
+            rhc_state.height_sensor.set(data=flat, data_type=None, robot_idxs=env_indxs, gpu=self._use_gpu)
+
+        # Updating contact state for selected contact links
+        self._update_contact_state(robot_name=robot_name, env_indxs=env_indxs)
+    
+    def _update_contact_state(self, 
+            robot_name: str, 
+            env_indxs: torch.Tensor = None):
+        for i in range(0, self.cluster_servers[robot_name].n_contact_sensors()):
+            contact_link = self.cluster_servers[robot_name].contact_linknames()[i]
+            f_contact = self._get_contact_f(robot_name=robot_name,
+                contact_link=contact_link,
+                env_indxs=env_indxs)
+            if f_contact is not None:
+                self.cluster_servers[robot_name].get_state().contact_wrenches.set(data=f_contact, data_type="f",
+                                contact_name=contact_link, 
+                                robot_idxs = env_indxs, 
+                                gpu=self._use_gpu)
+                    
+    def _init_safe_cluster_actions(self,
+                            robot_name: str):
+
+        # this does not actually write on shared memory, 
+        # but it's enough to get safe actions for the simulator before the 
+        # cluster starts to receive data from the controllers
+        control_cluster = self.cluster_servers[robot_name]
+        rhc_cmds = control_cluster.get_actions()
+        n_jnts = rhc_cmds.n_jnts()
+        
+        null_action = torch.zeros((self._num_envs, n_jnts), 
+                        dtype=self._dtype,
+                        device=self._device)
+        rhc_cmds.jnts_state.set(data=self._homing, data_type="q", gpu=self._use_gpu)
+        rhc_cmds.jnts_state.set(data=null_action, data_type="v", gpu=self._use_gpu)
+        rhc_cmds.jnts_state.set(data=null_action, data_type="eff", gpu=self._use_gpu)
+
+    def _pre_step_db(self) -> None:
+        
+        # cluster step logic here
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+
+            if self._override_low_lev_controller:
+                # if overriding low-lev jnt imp. this has to run at the highest
+                # freq possible
+                start=time.perf_counter()
+                self._read_jnts_state_from_robot(robot_name=robot_name)
+                self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start
+                
+                self._write_state_to_jnt_imp(robot_name=robot_name)
+                self._apply_cmds_to_jnt_imp_control(robot_name=robot_name)
+
+            if self._jnt_vel_filter[robot_name] is not None and \
+                (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0:
+                # filter joint vel at a fixed frequency wrt sim steps
+                if not self._override_low_lev_controller:
+                    # we need a fresh sensor reading
+                    self._read_jnts_state_from_robot(robot_name=robot_name)
+                self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None)
+
+            control_cluster = self.cluster_servers[robot_name]
+            if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]):
+                wait_ok=control_cluster.wait_for_solution() # this is blocking
+                if not wait_ok:
+                    return False
+                failed = control_cluster.get_failed_controllers(gpu=self._use_gpu)
+                self._set_cluster_actions(robot_name=robot_name) # write last cmds to low level control
+                if not self._override_low_lev_controller:
+                    self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot
+                    # we can update the jnt state just at the rate at which the cluster needs it
+                    start=time.perf_counter()
+                    self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None)
+                else:
+                    # read state necessary for cluster
+                    start=time.perf_counter()
+                self._read_root_state_from_robot(robot_name=robot_name, 
+                    env_indxs=None)
+                self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start
+                start=time.perf_counter()
+                self._set_state_to_cluster(robot_name=robot_name, 
+                    env_indxs=None)
+                control_cluster.write_robot_state()
+                self.debug_data["cluster_state_update_dt"][robot_name] = time.perf_counter()-start
+
+                self._update_jnt_imp_cntrl_shared_data() # only if debug_mode_jnt_imp is enabled
+
+                if self._use_remote_stepping[i]:
+                    self._remote_steppers[robot_name].ack() # signal cluster stepping is finished
+                    if failed is not None and self._env_opts["deact_when_failure"]: # deactivate robot completely 
+                        self._deactivate(env_indxs=failed,
+                            robot_name=robot_name)
+                    wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking)
+                    wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name)
+                    if not wait_reset_ok or not wait_step_ok:   
+                        return False
+                else:
+                    if failed is not None:
+                        reset_ok=self._reset(env_indxs=failed,
+                            robot_name=robot_name,
+                            reset_cluster=True,
+                            reset_cluster_counter=False,
+                            randomize=True)
+                        if not reset_ok:
+                            return False
+                        self._set_startup_jnt_imp_gains(robot_name=robot_name,
+                            env_indxs=failed)
+
+                    control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers())
+
+                control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving
+                # values of some rhc flags on shared memory
+                control_cluster.trigger_solution() # trigger only active controllers
+
+        return True
+
+    def _pre_step(self) -> None:
+        
+        # cluster step logic here
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            if self._override_low_lev_controller:
+                # if overriding low-lev jnt imp. this has to run at the highest
+                # freq possible
+                self._read_jnts_state_from_robot(robot_name=robot_name)
+                self._write_state_to_jnt_imp(robot_name=robot_name)
+                self._apply_cmds_to_jnt_imp_control(robot_name=robot_name)
+
+            if self._jnt_vel_filter[robot_name] is not None and \
+                (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0:
+                # filter joint vel at a fixed frequency wrt sim steps
+                if not self._override_low_lev_controller:
+                    # we need a fresh sensor reading
+                    self._read_jnts_state_from_robot(robot_name=robot_name)
+                self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None)
+
+            control_cluster = self.cluster_servers[robot_name]
+            if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]):
+                wait_ok=control_cluster.wait_for_solution() # this is blocking
+                if not wait_ok:
+                    return False
+                failed = control_cluster.get_failed_controllers(gpu=self._use_gpu)
+                self._set_cluster_actions(robot_name=robot_name) # set last cmds to low level control
+                if not self._override_low_lev_controller:
+                    self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot
+                    # we can update the jnt state just at the rate at which the cluster needs it
+                    self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None)
+                # read state necessary for cluster
+                self._read_root_state_from_robot(robot_name=robot_name, 
+                    env_indxs=None)
+                # write last robot state to the cluster of controllers
+                self._set_state_to_cluster(robot_name=robot_name, 
+                    env_indxs=None)
+                control_cluster.write_robot_state() # write on shared mem
+
+                if self._use_remote_stepping[i]:
+                    self._remote_steppers[robot_name].ack() # signal cluster stepping is finished
+                    if failed is not None and self._env_opts["deact_when_failure"]:
+                        self._deactivate(env_indxs=failed,
+                            robot_name=robot_name)
+                    wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking)
+                    wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name)
+                    if not wait_reset_ok or not wait_step_ok:   
+                        return False
+                else:
+                    if failed is not None:
+                        reset_ok=self._reset(env_indxs=failed,
+                            robot_name=robot_name,
+                            reset_cluster=True,
+                            reset_cluster_counter=False,
+                            randomize=True)
+                        if not reset_ok:
+                            return False
+                        self._set_startup_jnt_imp_gains(robot_name=robot_name,
+                            env_indxs=failed)
+                    control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers())
+
+                control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving
+                # values of some rhc flags on shared memory
+                control_cluster.trigger_solution() # trigger only active controllers
+        
+        return True
+    
+    def _post_world_step_db(self) -> bool:
+
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            control_cluster = self.cluster_servers[robot_name]
+            self.cluster_sim_step_counters[robot_name]+=1 # this has to be update with sim freq
+            if self._debug:
+                self.debug_data["sim_time"][robot_name]=self.world_time(robot_name=robot_name)
+                self.debug_data["cluster_sol_time"][robot_name] = \
+                    control_cluster.solution_time()
+                
+        self.step_counter +=1
+    
+    def _post_world_step(self) -> bool:
+
+        for i in range(len(self._robot_names)):
+            robot_name = self._robot_names[i]
+            self.cluster_sim_step_counters[robot_name]+=1
+        self.step_counter +=1
+                    
+    def _reset(self,
+            robot_name: str,
+            env_indxs: torch.Tensor = None,
+            randomize: bool = False,
+            reset_cluster: bool = False,
+            reset_cluster_counter = False,
+            acquire_offsets: bool = False):
+        
+        # resets the state of target robot and env to the defaults
+        self._reset_state(env_indxs=env_indxs, 
+            robot_name=robot_name,
+            randomize=randomize)
+        
+        # and jnt imp. controllers
+        self._reset_jnt_imp_control(robot_name=robot_name,
+                env_indxs=env_indxs)
+        
+        # read reset state
+        self._read_root_state_from_robot(robot_name=robot_name,
+                env_indxs=env_indxs)
+        self._read_jnts_state_from_robot(robot_name=robot_name,
+            env_indxs=env_indxs)
+
+        if self._jnt_vel_filter[robot_name] is not None:
+            self._jnt_vel_filter[robot_name].reset(idxs=env_indxs)
+
+        if acquire_offsets:
+            self._update_root_offsets(robot_name=robot_name,
+                    env_indxs=env_indxs)
+            
+        if reset_cluster: # reset controllers remotely
+            reset_ok=self._reset_cluster(env_indxs=env_indxs,
+                robot_name=robot_name,
+                reset_cluster_counter=reset_cluster_counter)
+            if not reset_ok:
+                return False
+            
+        return True
+        
+    def _randomize_yaw(self,
+            robot_name: str,
+            env_indxs: torch.Tensor = None):
+
+        root_q_default = self._root_q_default[robot_name]
+        if env_indxs is None:
+            env_indxs = torch.arange(root_q_default.shape[0])
+
+        num_indices = env_indxs.shape[0]
+        yaw_angles = torch.rand((num_indices,), 
+                        device=root_q_default.device) * 2 * torch.pi  # uniformly distributed random angles
+        
+        # Compute cos and sin once
+        cos_half = torch.cos(yaw_angles / 2)
+        root_q_default[env_indxs, :] = torch.stack((cos_half, 
+                                torch.zeros_like(cos_half),
+                                torch.zeros_like(cos_half), 
+                                torch.sin(yaw_angles / 2)), dim=1).reshape(num_indices, 4)
+
+    def _deactivate(self,
+        robot_name: str,
+        env_indxs: torch.Tensor = None):
+        
+        # deactivate jnt imp controllers for given robots and envs (makes the robot fall)
+        self._jnt_imp_controllers[robot_name].deactivate(robot_indxs=env_indxs)
+    
+    def _n_contacts(self, robot_name: str) -> List[int]:
+        return self._num_contacts[robot_name]
+    
+    def root_p(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None):
+
+        if env_idxs is None:
+            return self._root_p[robot_name]
+        else:
+            return self._root_p[robot_name][env_idxs, :]
+
+    def root_p_rel(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None):
+
+        if env_idxs is None:
+            rel_pos = torch.sub(self.root_p(robot_name=robot_name),
+                self._root_pos_offsets[robot_name])
+        else:
+            rel_pos = torch.sub(self.root_p(robot_name=robot_name,
+                                                env_idxs=env_idxs), 
+                    self._root_pos_offsets[robot_name][env_idxs, :])
+        return rel_pos
+    
+    def root_q(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None):
+
+        if env_idxs is None:
+            return self._root_q[robot_name]
+        else:
+            return self._root_q[robot_name][env_idxs, :]
+
+    def root_q_rel(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None):
+
+        if env_idxs is None:
+            return quaternion_difference(self._root_q_offsets[robot_name], 
+                                self.root_q(robot_name=robot_name))
+        rel_q = quaternion_difference(self._root_q_offsets[robot_name][env_idxs, :], 
+                            self.root_q(robot_name=robot_name,
+                                            env_idxs=env_idxs))
+        return rel_q
+
+    def _quat_to_yaw_wxyz(self, q: torch.Tensor, out: torch.Tensor = None):
+        # Quaternion convention is w, x, y, z.
+        w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3]
+        num = 2.0 * (w * z + x * y)
+        den = 1.0 - 2.0 * (y * y + z * z)
+        if out is None:
+            return torch.atan2(num, den)
+        return torch.atan2(num, den, out=out)
+
+    def _yaw_to_quat_wxyz(self, yaw: torch.Tensor, like_q: torch.Tensor,
+            out: torch.Tensor = None):
+        q = out
+        if q is None:
+            q = torch.zeros((yaw.shape[0], 4), dtype=like_q.dtype, device=like_q.device)
+        else:
+            q.zero_()
+        q[:, 0] = torch.cos(yaw / 2.0)
+        q[:, 3] = torch.sin(yaw / 2.0)
+        return q
+
+    def _quat_conjugate_wxyz(self, q: torch.Tensor, out: torch.Tensor = None):
+        qi = out
+        if qi is None:
+            qi = torch.empty_like(q)
+        qi[:, :] = q
+        qi[:, 1:] = -qi[:, 1:]
+        return qi
+
+    def _quat_multiply_wxyz(self, q1: torch.Tensor, q2: torch.Tensor,
+            out: torch.Tensor = None):
+        q_out = out
+        if q_out is None:
+            q_out = torch.empty_like(q1)
+        w1, x1, y1, z1 = q1[:, 0], q1[:, 1], q1[:, 2], q1[:, 3]
+        w2, x2, y2, z2 = q2[:, 0], q2[:, 1], q2[:, 2], q2[:, 3]
+        q_out[:, 0] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+        q_out[:, 1] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+        q_out[:, 2] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+        q_out[:, 3] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+        return q_out
+
+    def _normalize_quat_wxyz(self, q: torch.Tensor, out: torch.Tensor = None):
+        q_norm = out
+        if q_norm is None:
+            q_norm = torch.empty_like(q)
+        q_norm[:, :] = q
+        q_norm /= torch.clamp(torch.norm(q_norm, dim=1, keepdim=True), min=1e-9)
+        return q_norm
+
+    def root_q_yaw_rel(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None):
+        
+        # Return quaternion with startup yaw removed while preserving current pitch/roll.
+        if env_idxs is None:
+            ws = self._root_q_yaw_rel_ws[robot_name]
+            q_abs = self._root_q[robot_name]
+            yaw_start = self._root_q_offsets_yaw[robot_name]
+
+            self._normalize_quat_wxyz(q=q_abs, out=ws["q_abs_unit"])
+            self._quat_to_yaw_wxyz(q=ws["q_abs_unit"], out=ws["yaw_abs"])
+
+            torch.sub(ws["yaw_abs"], yaw_start, out=ws["yaw_rel"])
+            torch.sin(ws["yaw_rel"], out=ws["yaw_sin"])
+            torch.cos(ws["yaw_rel"], out=ws["yaw_cos"])
+            torch.atan2(ws["yaw_sin"], ws["yaw_cos"], out=ws["yaw_rel"])
+
+            # Build pure-yaw quaternions for:
+            # 1) the current absolute heading and 2) the startup-relative heading.
+            self._yaw_to_quat_wxyz(yaw=ws["yaw_abs"], like_q=ws["q_abs_unit"], out=ws["q_yaw_abs"])
+            self._yaw_to_quat_wxyz(yaw=ws["yaw_rel"], like_q=ws["q_abs_unit"], out=ws["q_yaw_rel"])
+
+            # Isolate pitch/roll by removing the absolute yaw from the current orientation.
+            # For unit quaternions q_pr = q_yaw_abs^{-1} * q_abs.
+            self._quat_conjugate_wxyz(q=ws["q_yaw_abs"], out=ws["q_yaw_abs_conj"])
+            self._quat_multiply_wxyz(q1=ws["q_yaw_abs_conj"], q2=ws["q_abs_unit"], out=ws["q_pr"])
+            # Recompose orientation with relative yaw + current pitch/roll.
+            self._quat_multiply_wxyz(q1=ws["q_yaw_rel"], q2=ws["q_pr"], out=ws["q_rel"])
+
+            return self._normalize_quat_wxyz(q=ws["q_rel"], out=ws["q_rel"])
+
+        q_abs = self.root_q(robot_name=robot_name, env_idxs=env_idxs)
+        q_abs = self._normalize_quat_wxyz(q=q_abs, out=q_abs)
+
+        yaw_abs = self._quat_to_yaw_wxyz(q_abs)
+        yaw_start = self._root_q_offsets_yaw[robot_name][env_idxs]
+        yaw_rel = yaw_abs - yaw_start
+        yaw_rel = torch.atan2(torch.sin(yaw_rel), torch.cos(yaw_rel))
+
+        q_yaw_abs = self._yaw_to_quat_wxyz(yaw_abs, like_q=q_abs)
+        q_yaw_rel = self._yaw_to_quat_wxyz(yaw_rel, like_q=q_abs)
+        q_pr = self._quat_multiply_wxyz(self._quat_conjugate_wxyz(q_yaw_abs), q_abs)
+        q_rel = self._quat_multiply_wxyz(q_yaw_rel, q_pr)
+
+        return self._normalize_quat_wxyz(q_rel)
+    
+    def root_v(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None,
+            base_loc: bool = True):
+
+        root_v=self._root_v[robot_name]
+        if base_loc:
+            root_v=self._root_v_base_loc[robot_name]
+        if env_idxs is None:
+            return root_v
+        else:
+            return root_v[env_idxs, :]
+    
+    def root_omega(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None,
+            base_loc: bool = True):
+
+        root_omega=self._root_omega[robot_name]
+        if base_loc:
+            root_omega=self._root_omega_base_loc[robot_name]
+        if env_idxs is None:
+            return root_omega
+        else:
+            return root_omega[env_idxs, :]
+    
+    def root_a(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None,
+            base_loc: bool = True):
+
+        root_a=self._root_a[robot_name]
+        if base_loc:
+            root_a=self._root_a_base_loc[robot_name]
+        if env_idxs is None:
+            return root_a
+        else:
+            return root_a[env_idxs, :]
+    
+    def root_alpha(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None,
+            base_loc: bool = True):
+
+        root_alpha=self._root_alpha[robot_name]
+        if base_loc:
+            root_alpha=self._root_alpha_base_loc[robot_name]
+        if env_idxs is None:
+            return root_alpha
+        else:
+            return root_alpha[env_idxs, :]
+        
+    def gravity(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None,
+            base_loc: bool = True):
+
+        gravity_loc=self._gravity_normalized[robot_name]
+        if base_loc:
+            gravity_loc=self._gravity_normalized_base_loc[robot_name]
+        if env_idxs is None:
+            return gravity_loc
+        else:
+            return gravity_loc[env_idxs, :]
+    
+    def jnts_q(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None):
+        
+        if env_idxs is None:
+            return self._jnts_q[robot_name]
+        else:
+            return self._jnts_q[robot_name][env_idxs, :]
+
+    def jnts_v(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None):
+
+        if env_idxs is None:
+            return self._jnts_v[robot_name]
+        else:
+            return self._jnts_v[robot_name][env_idxs, :]
+
+    def jnts_eff(self,
+            robot_name: str,
+            env_idxs: torch.Tensor = None): # (measured) efforts
+
+        if env_idxs is None:
+            return self._jnts_eff[robot_name]
+        else:
+            return self._jnts_eff[robot_name][env_idxs, :]
+
+    def _wait_for_remote_step_req(self,
+            robot_name: str):
+        if not self._remote_steppers[robot_name].wait(self._timeout):
+            Journal.log(self.__class__.__name__,
+                "_wait_for_remote_step_req",
+                "Didn't receive any remote step req within timeout!",
+                LogType.EXCEP,
+                throw_when_excep = False)
+            return False
+        return True
+    
+    def _process_remote_reset_req(self,
+            robot_name: str):
+        
+        if not self._remote_resetters[robot_name].wait(self._timeout):
+            Journal.log(self.__class__.__name__,
+                "_process_remote_reset_req",
+                "Didn't receive any remote reset req within timeout!",
+                LogType.EXCEP,
+                throw_when_excep = False)
+            return False
+            
+        reset_requests = self._remote_reset_requests[robot_name]
+        reset_requests.synch_all(read=True, retry=True) # read reset requests from shared mem
+        to_be_reset = reset_requests.to_be_reset(gpu=self._use_gpu)
+        if to_be_reset is not None:
+            reset_ok=self._reset(env_indxs=to_be_reset,
+                robot_name=robot_name,
+                reset_cluster=True,
+                reset_cluster_counter=False,
+                randomize=True)
+            if not reset_ok:
+                return False
+            self._set_startup_jnt_imp_gains(robot_name=robot_name,
+                env_indxs=to_be_reset) # set gains to startup config (usually lower gains)
+        
+        control_cluster = self.cluster_servers[robot_name]
+        control_cluster.activate_controllers(idxs=to_be_reset) # activate controllers
+        # (necessary if failed)
+
+        self._remote_resetters[robot_name].ack() # signal reset performed
+
+        return True
+    
+    def _update_jnt_imp_cntrl_shared_data(self):
+        if self._debug:
+            for i in range(0, len(self._robot_names)):
+                robot_name = self._robot_names[i]
+                # updating all the jnt impedance data - > this may introduce some overhead
+                imp_data = self._jnt_imp_cntrl_shared_data[robot_name].imp_data_view
+                # set data
+                imp_data.set(data_type="pos_err",
+                        data=self._jnt_imp_controllers[robot_name].pos_err(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="vel_err",
+                        data=self._jnt_imp_controllers[robot_name].vel_err(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="pos_gains",
+                        data=self._jnt_imp_controllers[robot_name].pos_gains(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="vel_gains",
+                        data=self._jnt_imp_controllers[robot_name].vel_gains(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="eff_ff",
+                        data=self._jnt_imp_controllers[robot_name].eff_ref(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="pos",
+                        data=self._jnt_imp_controllers[robot_name].pos(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="pos_ref",
+                        data=self._jnt_imp_controllers[robot_name].pos_ref(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="vel",
+                        data=self._jnt_imp_controllers[robot_name].vel(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="vel_ref",
+                        data=self._jnt_imp_controllers[robot_name].vel_ref(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="eff",
+                        data=self._jnt_imp_controllers[robot_name].eff(),
+                        gpu=self._use_gpu)
+                imp_data.set(data_type="imp_eff",
+                        data=self._jnt_imp_controllers[robot_name].imp_eff(),
+                        gpu=self._use_gpu)
+                # copy from GPU to CPU if using gpu
+                if self._use_gpu:
+                    imp_data.synch_mirror(from_gpu=True,non_blocking=True)
+                    # even if it's from GPU->CPu we can use non-blocking since it's just for db 
+                    # purposes
+                # write copies to shared memory
+                imp_data.synch_all(read=False, retry=False)
+
+    def _set_startup_jnt_imp_gains(self,
+            robot_name:str, 
+            env_indxs: torch.Tensor = None):
+        
+        startup_p_gains=self._jnt_imp_controllers[robot_name].startup_p_gains()
+        startup_d_gains=self._jnt_imp_controllers[robot_name].startup_d_gains()
+
+        if env_indxs is not None:
+            self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs,
+                pos_gains=startup_p_gains[env_indxs, :], 
+                vel_gains=startup_d_gains[env_indxs, :])
+        else:
+            self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs,
+                pos_gains=startup_p_gains[:, :], 
+                vel_gains=startup_d_gains[:, :])
+            
+    def _write_state_to_jnt_imp(self,
+        robot_name: str):
+     
+        # always update ,imp. controller internal state (jnt imp control is supposed to be
+        # always running)
+        self._jnt_imp_controllers[robot_name].update_state(pos=self.jnts_q(robot_name=robot_name), 
+            vel = self.jnts_v(robot_name=robot_name),
+            eff = self.jnts_eff(robot_name=robot_name))
+
+    def _set_cluster_actions(self, 
+        robot_name):
+        control_cluster = self.cluster_servers[robot_name]
+        actions=control_cluster.get_actions()
+        active_controllers=control_cluster.get_active_controllers(gpu=self._use_gpu)
+        
+        if active_controllers is not None:
+            self._jnt_imp_controllers[robot_name].set_refs(
+                pos_ref=actions.jnts_state.get(data_type="q", gpu=self._use_gpu)[active_controllers, :], 
+                vel_ref=actions.jnts_state.get(data_type="v", gpu=self._use_gpu)[active_controllers, :], 
+                eff_ref=actions.jnts_state.get(data_type="eff", gpu=self._use_gpu)[active_controllers, :],
+                robot_indxs=active_controllers)            
+    
+    def _jnt_imp_reset_overrride(self, robot_name:str):
+        # to be overriden
+        pass
+
+    def _apply_cmds_to_jnt_imp_control(self, robot_name:str):
+
+        self._jnt_imp_controllers[robot_name].apply_cmds()
+
+    def _update_root_offsets(self, 
+                    robot_name: str,
+                    env_indxs: torch.Tensor = None):
+        
+        if self._debug:
+            for_robots = ""
+            if env_indxs is not None:
+                if not isinstance(env_indxs, torch.Tensor):                
+                    msg = "Provided env_indxs should be a torch tensor of indexes!"
+                    Journal.log(self.__class__.__name__,
+                        "update_root_offsets",
+                        msg,
+                        LogType.EXCEP,
+                        throw_when_excep = True)    
+                if self._use_gpu:
+                    if not env_indxs.device.type == "cuda":
+                            error = "Provided env_indxs should be on GPU!"
+                            Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)
+                else:
+                    if not env_indxs.device.type == "cpu":
+                        error = "Provided env_indxs should be on CPU!"
+                        Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)
+                for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist())
+            if self._verbose:
+                Journal.log(self.__class__.__name__,
+                    "update_root_offsets",
+                    f"updating root offsets " + for_robots,
+                    LogType.STAT,
+                    throw_when_excep = True)
+
+        # only planar position used
+        if env_indxs is None:
+            self._root_pos_offsets[robot_name][:, 0:2]  = self._root_p[robot_name][:, 0:2]
+            self._normalize_quat_wxyz(q=self._root_q[robot_name], out=self._root_q_offsets[robot_name])
+            self._quat_to_yaw_wxyz(q=self._root_q_offsets[robot_name],
+                out=self._root_q_offsets_yaw[robot_name])
+            
+        else:
+            self._root_pos_offsets[robot_name][env_indxs, 0:2]  = self._root_p[robot_name][env_indxs, 0:2]
+            q_root_norm=self._normalize_quat_wxyz(self._root_q[robot_name][env_indxs, :])
+            self._root_q_offsets[robot_name][env_indxs, :]  = q_root_norm
+            self._root_q_offsets_yaw[robot_name][env_indxs] = self._quat_to_yaw_wxyz(q=q_root_norm)
+
+    def _reset_jnt_imp_control(self, 
+        robot_name: str,
+        env_indxs: torch.Tensor = None):
+        
+        if self._debug:
+            for_robots = ""
+            if env_indxs is not None:
+                if not isinstance(env_indxs, torch.Tensor):
+                    Journal.log(self.__class__.__name__,
+                        "reset_jnt_imp_control",
+                        "Provided env_indxs should be a torch tensor of indexes!",
+                        LogType.EXCEP,
+                        throw_when_excep = True)
+                if self._use_gpu:
+                    if not env_indxs.device.type == "cuda":
+                            error = "Provided env_indxs should be on GPU!"
+                            Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)
+                else:
+                    if not env_indxs.device.type == "cpu":
+                        error = "Provided env_indxs should be on CPU!"
+                        Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)
+                for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs)
+                                
+            if self._verbose:
+                Journal.log(self.__class__.__name__,
+                    "reset_jnt_imp_control",
+                    f"resetting joint impedances " + for_robots,
+                    LogType.STAT,
+                    throw_when_excep = True)
+
+        # resets all internal data, refs to defaults
+        self._jnt_imp_controllers[robot_name].reset(robot_indxs=env_indxs)
+        
+        #restore jnt imp refs to homing            
+        if env_indxs is None:                               
+            self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[:, :],
+                robot_indxs = None)
+        else:
+            self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[env_indxs, :],
+                robot_indxs = env_indxs)
+
+        # self._write_state_to_jnt_imp(robot_name=robot_name)
+        # actually applies reset commands to the articulation
+        self._write_state_to_jnt_imp(robot_name=robot_name)
+        self._jnt_imp_reset_overrride(robot_name=robot_name)
+        self._apply_cmds_to_jnt_imp_control(robot_name=robot_name)
+
+    def _synch_default_root_states(self,
+            robot_name: str,
+            env_indxs: torch.Tensor = None):
+
+        if self._debug:
+            for_robots = ""
+            if env_indxs is not None:
+                if not isinstance(env_indxs, torch.Tensor):
+                    msg = "Provided env_indxs should be a torch tensor of indexes!"
+                    Journal.log(self.__class__.__name__,
+                        "synch_default_root_states",
+                        msg,
+                        LogType.EXCEP,
+                        throw_when_excep = True)  
+                if self._use_gpu:
+                    if not env_indxs.device.type == "cuda":
+                            error = "Provided env_indxs should be on GPU!"
+                            Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)
+                else:
+                    if not env_indxs.device.type == "cpu":
+                        error = "Provided env_indxs should be on CPU!"
+                        Journal.log(self.__class__.__name__,
+                            "_step_jnt_imp_control",
+                            error,
+                            LogType.EXCEP,
+                            True)  
+                for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist())
+            if self._verbose:
+                Journal.log(self.__class__.__name__,
+                            "synch_default_root_states",
+                            f"updating default root states " + for_robots,
+                            LogType.STAT,
+                            throw_when_excep = True)
+
+        if env_indxs is None:
+            self._root_p_default[robot_name][:, :] = self._root_p[robot_name]
+            self._root_q_default[robot_name][:, :] = self._root_q[robot_name]
+        else:
+            self._root_p_default[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :]
+            self._root_q_default[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :]
+
+    def _generate_rob_descriptions(self, 
+                    robot_name: str, 
+                    urdf_path: str,
+                    srdf_path: str):
+        
+        custom_xacro_args=extract_custom_xacro_args(self._env_opts)
+        Journal.log(self.__class__.__name__,
+                    "_generate_rob_descriptions",
+                    "generating URDF for robot "+ f"{robot_name}, from URDF {urdf_path}...",
+                    LogType.STAT,
+                    throw_when_excep = True)
+        xrdf_cmds=self._xrdf_cmds(robot_name=robot_name)
+        xrdf_cmds=merge_xacro_cmds(prev_cmds=xrdf_cmds,
+            new_cmds=custom_xacro_args)
+        self._urdf_dump_paths[robot_name]=generate_urdf(robot_name=robot_name, 
+            xacro_path=urdf_path,
+            dump_path=self._descr_dump_path,
+            xrdf_cmds=xrdf_cmds)
+        Journal.log(self.__class__.__name__,
+                    "_generate_rob_descriptions",
+                    "generating SRDF for robot "+ f"{robot_name}, from SRDF {srdf_path}...",
+                    LogType.STAT,
+                    throw_when_excep = True)
+        # we also generate SRDF files, which are useful for control
+        self._srdf_dump_paths[robot_name]=generate_srdf(robot_name=robot_name, 
+            xacro_path=srdf_path,
+            dump_path=self._descr_dump_path,
+            xrdf_cmds=xrdf_cmds)
+    
+    def _xrdf_cmds(self, robot_name:str):
+        urdfpath=self._robot_urdf_paths[robot_name]
+        # we assume directory tree of the robot package is like
+        # robot-ros-pkg/robot_urdf/urdf/robot.urdf.xacro
+        parts = urdfpath.split('/')
+        urdf_descr_root_path = '/'.join(parts[:-2])
+        cmds = get_xrdf_cmds(urdf_descr_root_path=urdf_descr_root_path) 
+        return cmds
+    
+    @abstractmethod
+    def current_tstep(self) -> int:
+        pass
+    
+    @abstractmethod
+    def world_time(self, robot_name: str) -> float:
+        return self.cluster_sim_step_counters[robot_name]*self.physics_dt()
+    
+    @abstractmethod
+    def is_running(self) -> bool:
+        pass
+    
+    @abstractmethod
+    def _get_contact_f(self, 
+        robot_name: str, 
+        contact_link: str,
+        env_indxs: torch.Tensor) -> torch.Tensor:
+        return None
+    
+    @abstractmethod
+    def physics_dt(self) -> float:
+        pass
+    
+    @abstractmethod
+    def rendering_dt(self) -> float:
+        pass
+    
+    @abstractmethod
+    def set_physics_dt(self, physics_dt:float):
+        pass
+
+    @abstractmethod
+    def set_rendering_dt(self, rendering_dt:float):
+        pass
+
+    @abstractmethod
+    def _robot_jnt_names(self, robot_name: str) -> List[str]:
+        pass
+    
+    @abstractmethod
+    def _read_root_state_from_robot(self,
+        robot_name: str,
+        env_indxs: torch.Tensor = None):
+        # IMPORTANT: Child interfaces should provide root quaternions in w, x, y, z convention.
+        pass
+    
+    @abstractmethod
+    def _read_jnts_state_from_robot(self,
+        robot_name: str,
+        env_indxs: torch.Tensor = None):
+        pass
+
+    @abstractmethod
+    def _init_robots_state(self):
+        pass
+
+    @abstractmethod
+    def _reset_state(self,
+            robot_name: str,
+            env_indxs: torch.Tensor = None,
+            randomize: bool = False):
+        pass
+    
+    @abstractmethod
+    def _init_world(self):
+        pass
+
+    @abstractmethod
+    def _reset_sim(self) -> None:
+        pass
+    
+    @abstractmethod
+    def _set_jnts_to_homing(self, robot_name: str):
+        pass
+
+    @abstractmethod
+    def _set_root_to_defconfig(self, robot_name: str):
+        pass
+
+    @abstractmethod
+    def _parse_env_opts(self):
+        pass
+    
+    @abstractmethod
+    def _pre_setup(self):
+        pass
+
+    @abstractmethod
+    def _generate_jnt_imp_control(self) -> JntImpCntrlChild:
+        pass
+
+    @abstractmethod
+    def _render_sim(self, mode:str="human") -> None:
+        pass
+
+    @abstractmethod
+    def _close(self) -> None:
+        pass
+
+    @abstractmethod
+    def _step_world(self) -> None:
+        pass
diff --git a/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/xbot2_basic.yaml b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/xbot2_basic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ade37f40e9eeccec3b1c6dea4e50b2bf84caec7b
--- /dev/null
+++ b/bundles/b2w/d2026_03_28_h11_m11_s07-B2WPartialCloopWheels_FakePosTrackingEnv/xbot2_basic.yaml
@@ -0,0 +1,48 @@
+XBotInterface:
+  urdf_path: $PWD/kyon.urdf
+  srdf_path: $PWD/kyon.srdf
+
+ModelInterface:
+  model_type: RBDL
+  is_model_floating_base: true
+
+# defaults
+motor_pd:
+  hip_roll*: [500, 10]
+  hip_pitch*: [500, 10]
+  knee_pitch*: [500, 10]
+  wheel_joint*: [0, 30]
+
+# startup pd config
+startup_motor_pd:
+  hip_roll*: [200, 30]
+  hip_pitch*: [200, 30]
+  knee_pitch*: [200, 30]
+  wheel_joint*: [0, 30]
+  
+# hal
+xbotcore_device_configs:
+    sim: $PWD/hal/kyon_gz.yaml
+    dummy: $PWD/hal/kyon_dummy.yaml
+
+# threads
+xbotcore_threads:
+    rt_main:  {sched: fifo , prio: 60, period: 0.001}
+    nrt_main: {sched: other, prio: 0 , period: 0.005}
+
+# plugins
+xbotcore_plugins:
+
+    homing:
+        thread: rt_main
+        type: homing
+
+    ros_io: {thread: nrt_main, type: ros_io}
+
+    ros_control: {thread: nrt_main, type: ros_control, params: {autostart: {type: bool, value: true}}}
+
+# global parameters
+xbotcore_param:
+  /xbot/hal/joint_safety/filter_autostart: {value: true, type: bool}
+  /xbot/hal/joint_safety/filter_cutoff_hz: {value: 2.0, type: double}
+  /xbot/hal/enable_safety: {value: false, type: bool}
\ No newline at end of file
diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/metadata.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f016635e773c4d6e9f6796aaee342efefde1841
--- /dev/null
+++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/metadata.yaml
@@ -0,0 +1,74 @@
+rosbag2_bagfile_information:
+  version: 5
+  storage_identifier: sqlite3
+  duration:
+    nanoseconds: 89899902344
+  starting_time:
+    nanoseconds_since_epoch: 49804687
+  message_count: 116015
+  topics_with_message_count:
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_robot_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14501
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14501
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14502
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_contacts
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14502
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_rhc_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14502
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_hl_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14502
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_robot_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14502
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_heightmap
+        type: visualization_msgs/msg/Marker
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 14502
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_HandShake
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 1
+  compression_format: zstd
+  compression_mode: FILE
+  relative_file_paths:
+    - rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd
+  files:
+    - path: rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3
+      starting_time:
+        nanoseconds_since_epoch: 49804687
+      duration:
+        nanoseconds: 89899902344
+      message_count: 116015
\ No newline at end of file
diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..a96f1e57621af4c692f8adf95c2b0fee1881d283
--- /dev/null
+++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79/rosbag_centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID_2026-01-20_09-24-36_79_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:035860e2f2276589edddabd0c2112bc5058cf078fb93c939352f6106dde04369
+size 5205351
diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/metadata.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1eb44b52ed33927dacdeab7262228b997fc20b4
--- /dev/null
+++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/metadata.yaml
@@ -0,0 +1,68 @@
+rosbag2_bagfile_information:
+  version: 5
+  storage_identifier: sqlite3
+  duration:
+    nanoseconds: 89960205078
+  starting_time:
+    nanoseconds_since_epoch: 40039062
+  message_count: 114220
+  topics_with_message_count:
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_robot_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 16317
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_robot_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 16317
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 16317
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 16317
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_contacts
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 16317
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_rhc_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 16317
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_hl_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 16317
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_HandShake
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 1
+  compression_format: zstd
+  compression_mode: FILE
+  relative_file_paths:
+    - rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd
+  files:
+    - path: rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3
+      starting_time:
+        nanoseconds_since_epoch: 40039062
+      duration:
+        nanoseconds: 89960205078
+      message_count: 114220
\ No newline at end of file
diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3 b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3
new file mode 100644
index 0000000000000000000000000000000000000000..da0649b6341767e8ea8fb31dcddb4c61f8370856
--- /dev/null
+++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15d925353f9f409466fee90dbd502eb6f19f089a2f1035a53b168b0786c641e4
+size 39010304
diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..385acbea8c65090a30584ea7841ff7ce9ac7463e
--- /dev/null
+++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5/rosbag_centauro_big_wheels_ub_2026_02_21_13_59_20_ID_2026-02-22_10-51-13_5_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9052d81fd0fa7a91866b7860bd22c2f3876a6c75adc93dff5fb0f87374c9bf65
+size 2238099
diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/metadata.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1d7119b777a73c216216399651751589d831f31
--- /dev/null
+++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/metadata.yaml
@@ -0,0 +1,68 @@
+rosbag2_bagfile_information:
+  version: 5
+  storage_identifier: sqlite3
+  duration:
+    nanoseconds: 89959960938
+  starting_time:
+    nanoseconds_since_epoch: 40039062
+  message_count: 120835
+  topics_with_message_count:
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_robot_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 17262
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_robot_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 17262
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 17262
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_q
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 17262
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_contacts
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 17262
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_rhc_actuated_jointnames
+        type: std_msgs/msg/String
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 17262
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_hl_refs
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 17262
+    - topic_metadata:
+        name: /MPCViz_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_HandShake
+        type: std_msgs/msg/Float64MultiArray
+        serialization_format: cdr
+        offered_qos_profiles: "- history: 3\n  depth: 0\n  reliability: 1\n  durability: 1\n  deadline:\n    sec: 9223372036\n    nsec: 854775807\n  lifespan:\n    sec: 9223372036\n    nsec: 854775807\n  liveliness: 1\n  liveliness_lease_duration:\n    sec: 9223372036\n    nsec: 854775807\n  avoid_ros_namespace_conventions: false"
+      message_count: 1
+  compression_format: zstd
+  compression_mode: FILE
+  relative_file_paths:
+    - rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd
+  files:
+    - path: rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3
+      starting_time:
+        nanoseconds_since_epoch: 40039062
+      duration:
+        nanoseconds: 89959960938
+      message_count: 120835
\ No newline at end of file
diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd
new file mode 100644
index 0000000000000000000000000000000000000000..d277e22eee06530a112d409061f64d97d6e9217c
--- /dev/null
+++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4/rosbag_centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID_2026-03-08_10-22-32_4_0.db3.zstd
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aebb896e85c9a107c8e248d5dcde3ff446baeeb7324380315d15299dc54c067e
+size 2349264