import h5py import tqdm import numpy as np import torch import cv2 import json import imageio from sim.simulator import GenieSimulator, ReplaySimulator from sim.policy import ReplayPolicy from diffusion_policy.util.pytorch_util import dict_apply SYNTHETIC_DATA_RATIO = 0.25 DP_RES = 84 def load_demo(demo_idx: int): with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f: demo = f['data'][f'demo_{demo_idx}'] actions = demo['actions'][:].astype(np.float32) frames = demo['obs']['agentview_image'][:].astype(np.uint8) assert len(actions) == len(frames) return frames, actions def main(): demos = dict() demos_len = [] start_idx = int(200 * (1 - SYNTHETIC_DATA_RATIO)) end_idx = 200 print(f"Generating {end_idx - start_idx} synthetic demos") # copy actual data for demo_idx in tqdm.tqdm(range(start_idx)): with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f: demo = f['data'][f'demo_{demo_idx}'] actions = demo['actions'][:].astype(np.float32) frames = demo['obs']['agentview_image'][:].astype(np.uint8) assert len(actions) == len(frames) assert frames.shape[-3:] == (DP_RES, DP_RES, 3), frames.shape demos[f"demo_{demo_idx}"] = { "obs": { "agentview_image": frames }, "actions": actions } demos_len.append(len(actions)) print(f"Loaded {len(demos)} actual demos") genie_simulator = GenieSimulator( image_encoder_type='temporalvae', image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid', quantize=False, backbone_type="stmar", backbone_ckpt="data/mar_ckpt/robomimic_best", prompt_horizon=11, action_stride=1, domain='robomimic', ) # generate synthetic data # synthetic data in training set only for demo_idx in tqdm.tqdm(range(start_idx, end_idx)): frames, actions = load_demo(demo_idx) replay_policy = ReplayPolicy(actions, 1, 11) replay_simulator = ReplaySimulator(frames, 11) assert len(replay_policy) == len(replay_simulator), \ (len(replay_policy), len(replay_simulator)) # prompt genie genie_simulator.set_initial_state(( replay_simulator.prompt(), replay_policy.prompt() )) image = genie_simulator.reset() this_demo = { "obs": { "agentview_image": [] }, "actions": [] } for _ in range(len(replay_policy)): action = replay_policy.generate_action(None) this_demo['obs']['agentview_image'].append(cv2.resize(image, (DP_RES, DP_RES))) this_demo['actions'].append(action[0]) image = genie_simulator.step(action)['pred_next_frame'] this_demo = dict_apply(this_demo, lambda x: np.array(x)) demos[f"demo_{demo_idx}"] = this_demo demos_len.append(len(this_demo['actions'])) with h5py.File(f'data/robomimic_synthetic/robomimic_synthetic{end_idx - start_idx}.hdf5', 'w') as f: """ saving format: data (group) total (attribute) - number of state-action samples in the dataset env_args (attribute) - a json string that contains metadata on the environment and relevant arguments used for collecting data. Three keys: env_name, the name of the environment or task to create, env_type, one of robomimic’s supported environment types, and env_kwargs, a dictionary of keyword-arguments to be passed into the environment of type env_name. demo_0 (group) - group for the first trajectory (every trajectory has a group) num_samples (attribute) - the number of state-action samples in this trajectory model_file (attribute) - the xml string corresponding to the MJCF MuJoCo model. Only present for robosuite datasets. states (dataset) - flattened raw MuJoCo states, ordered by time. Shape (N, D) where N is the length of the trajectory, and D is the dimension of the state vector. Should be empty or have dummy values for non-robosuite datasets. actions (dataset) - environment actions, ordered by time. Shape (N, A) where N is the length of the trajectory, and A is the action space dimension rewards (dataset) - environment rewards, ordered by time. Shape (N,) where N is the length of the trajectory. dones (dataset) - done signal, equal to 1 if playing the corresponding action in the state should terminate the episode. Shape (N,) where N is the length of the trajectory. obs (group) - group for the observation keys. Each key is stored as a dataset. (dataset) - the first observation key. Note that the name of this dataset and shape will vary. As an example, the name could be “agentview_image”, and the shape could be (N, 84, 84, 3). ... next_obs (group) - group for the next observations. (dataset) - the first observation key. ... demo_1 (group) - group for the second trajectory ... """ data_group = f.create_group('data') data_group.attrs['total'] = sum(demos_len) data_group.attrs['env_args'] = json.dumps({ 'env_name': 'Lift', 'type': 1, 'env_kwargs': { 'has_renderer': False, 'has_offscreen_renderer': True, 'ignore_done': True, 'use_object_obs': False, 'use_camera_obs': True, 'control_freq': 20, 'controller_configs': { 'type': 'OSC_POSE', 'input_max': 1, 'input_min': -1, 'output_max': [0.05, 0.05, 0.05, 0.5, 0.5, 0.5], 'output_min': [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5], 'kp': 150, 'damping': 1, 'impedance_mode': 'fixed', 'kp_limits': [0, 300], 'damping_limits': [0, 10], 'position_limits': None, 'orientation_limits': None, 'uncouple_pos_ori': True, 'control_delta': True, 'interpolation': None, 'ramp_ratio': 0.2 }, 'robots': ['Panda'], 'camera_depths': False, 'camera_heights': 84, 'camera_widths': 84, 'reward_shaping': False, 'camera_names': ['agentview', 'robot0_eye_in_hand'], 'render_gpu_device_id': 0 }, 'use_image_obs': True }) for demo_idx, demo_data in demos.items(): demo_group = data_group.create_group(demo_idx) demo_group.attrs['num_samples'] = len(demo_data['actions']) demo_group.create_dataset('actions', data=demo_data['actions']) obs_group = demo_group.create_group('obs') for key, value in demo_data['obs'].items(): obs_group.create_dataset(key, data=value) if __name__ == '__main__': main()