hma / sim /synthetic_data_gen_learned_sim.py
LeroyWaa's picture
draft
246c106
import h5py
import tqdm
import numpy as np
import torch
import cv2
import json
import imageio
from sim.simulator import GenieSimulator, ReplaySimulator
from sim.policy import ReplayPolicy
from diffusion_policy.util.pytorch_util import dict_apply
SYNTHETIC_DATA_RATIO = 0.25
DP_RES = 84
def load_demo(demo_idx: int):
with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f:
demo = f['data'][f'demo_{demo_idx}']
actions = demo['actions'][:].astype(np.float32)
frames = demo['obs']['agentview_image'][:].astype(np.uint8)
assert len(actions) == len(frames)
return frames, actions
def main():
demos = dict()
demos_len = []
start_idx = int(200 * (1 - SYNTHETIC_DATA_RATIO))
end_idx = 200
print(f"Generating {end_idx - start_idx} synthetic demos")
# copy actual data
for demo_idx in tqdm.tqdm(range(start_idx)):
with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f:
demo = f['data'][f'demo_{demo_idx}']
actions = demo['actions'][:].astype(np.float32)
frames = demo['obs']['agentview_image'][:].astype(np.uint8)
assert len(actions) == len(frames)
assert frames.shape[-3:] == (DP_RES, DP_RES, 3), frames.shape
demos[f"demo_{demo_idx}"] = {
"obs": {
"agentview_image": frames
},
"actions": actions
}
demos_len.append(len(actions))
print(f"Loaded {len(demos)} actual demos")
genie_simulator = GenieSimulator(
image_encoder_type='temporalvae',
image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
quantize=False,
backbone_type="stmar",
backbone_ckpt="data/mar_ckpt/robomimic_best",
prompt_horizon=11,
action_stride=1,
domain='robomimic',
)
# generate synthetic data
# synthetic data in training set only
for demo_idx in tqdm.tqdm(range(start_idx, end_idx)):
frames, actions = load_demo(demo_idx)
replay_policy = ReplayPolicy(actions, 1, 11)
replay_simulator = ReplaySimulator(frames, 11)
assert len(replay_policy) == len(replay_simulator), \
(len(replay_policy), len(replay_simulator))
# prompt genie
genie_simulator.set_initial_state((
replay_simulator.prompt(),
replay_policy.prompt()
))
image = genie_simulator.reset()
this_demo = {
"obs": {
"agentview_image": []
},
"actions": []
}
for _ in range(len(replay_policy)):
action = replay_policy.generate_action(None)
this_demo['obs']['agentview_image'].append(cv2.resize(image, (DP_RES, DP_RES)))
this_demo['actions'].append(action[0])
image = genie_simulator.step(action)['pred_next_frame']
this_demo = dict_apply(this_demo, lambda x: np.array(x))
demos[f"demo_{demo_idx}"] = this_demo
demos_len.append(len(this_demo['actions']))
with h5py.File(f'data/robomimic_synthetic/robomimic_synthetic{end_idx - start_idx}.hdf5', 'w') as f:
"""
saving format:
data (group)
total (attribute) - number of state-action samples in the dataset
env_args (attribute) - a json string that contains metadata on the environment and relevant arguments used for collecting data. Three keys: env_name, the name of the environment or task to create, env_type, one of robomimic’s supported environment types, and env_kwargs, a dictionary of keyword-arguments to be passed into the environment of type env_name.
demo_0 (group) - group for the first trajectory (every trajectory has a group)
num_samples (attribute) - the number of state-action samples in this trajectory
model_file (attribute) - the xml string corresponding to the MJCF MuJoCo model. Only present for robosuite datasets.
states (dataset) - flattened raw MuJoCo states, ordered by time. Shape (N, D) where N is the length of the trajectory, and D is the dimension of the state vector. Should be empty or have dummy values for non-robosuite datasets.
actions (dataset) - environment actions, ordered by time. Shape (N, A) where N is the length of the trajectory, and A is the action space dimension
rewards (dataset) - environment rewards, ordered by time. Shape (N,) where N is the length of the trajectory.
dones (dataset) - done signal, equal to 1 if playing the corresponding action in the state should terminate the episode. Shape (N,) where N is the length of the trajectory.
obs (group) - group for the observation keys. Each key is stored as a dataset.
<obs_key_1> (dataset) - the first observation key. Note that the name of this dataset and shape will vary. As an example, the name could be “agentview_image”, and the shape could be (N, 84, 84, 3).
...
next_obs (group) - group for the next observations.
<obs_key_1> (dataset) - the first observation key.
...
demo_1 (group) - group for the second trajectory
...
https://robomimic.github.io/docs/datasets/overview.html
"""
data_group = f.create_group('data')
data_group.attrs['total'] = sum(demos_len)
data_group.attrs['env_args'] = json.dumps({
'env_name': 'Lift',
'type': 1,
'env_kwargs': {
'has_renderer': False,
'has_offscreen_renderer': True,
'ignore_done': True,
'use_object_obs': False,
'use_camera_obs': True,
'control_freq': 20,
'controller_configs': {
'type': 'OSC_POSE',
'input_max': 1,
'input_min': -1,
'output_max': [0.05, 0.05, 0.05, 0.5, 0.5, 0.5],
'output_min': [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5],
'kp': 150,
'damping': 1,
'impedance_mode': 'fixed',
'kp_limits': [0, 300],
'damping_limits': [0, 10],
'position_limits': None,
'orientation_limits': None,
'uncouple_pos_ori': True,
'control_delta': True,
'interpolation': None,
'ramp_ratio': 0.2
},
'robots': ['Panda'],
'camera_depths': False,
'camera_heights': 84,
'camera_widths': 84,
'reward_shaping': False,
'camera_names': ['agentview', 'robot0_eye_in_hand'],
'render_gpu_device_id': 0
},
'use_image_obs': True
})
for demo_idx, demo_data in demos.items():
demo_group = data_group.create_group(demo_idx)
demo_group.attrs['num_samples'] = len(demo_data['actions'])
demo_group.create_dataset('actions', data=demo_data['actions'])
obs_group = demo_group.create_group('obs')
for key, value in demo_data['obs'].items():
obs_group.create_dataset(key, data=value)
if __name__ == '__main__':
main()