import h5py
import tqdm
import numpy as np
import torch
import cv2
import json
import imageio
from sim.simulator import GenieSimulator, ReplaySimulator
from sim.policy import ReplayPolicy
from diffusion_policy.util.pytorch_util import dict_apply


SYNTHETIC_DATA_RATIO = 0.25
DP_RES = 84


def load_demo(demo_idx: int):
    with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f:
        demo = f['data'][f'demo_{demo_idx}']
        actions = demo['actions'][:].astype(np.float32)
        frames = demo['obs']['agentview_image'][:].astype(np.uint8)
    assert len(actions) == len(frames)
    return frames, actions


def main():

    demos = dict()
    demos_len = []
    start_idx = int(200 * (1 - SYNTHETIC_DATA_RATIO))
    end_idx = 200
    print(f"Generating {end_idx - start_idx} synthetic demos")

    # copy actual data
    for demo_idx in tqdm.tqdm(range(start_idx)):
        with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f:
            demo = f['data'][f'demo_{demo_idx}']
            actions = demo['actions'][:].astype(np.float32)
            frames = demo['obs']['agentview_image'][:].astype(np.uint8)
        assert len(actions) == len(frames)
        assert frames.shape[-3:] == (DP_RES, DP_RES, 3), frames.shape
        demos[f"demo_{demo_idx}"] = {
            "obs": {
                "agentview_image": frames
            },
            "actions": actions
        }
        demos_len.append(len(actions))
    print(f"Loaded {len(demos)} actual demos")

    genie_simulator = GenieSimulator(
        image_encoder_type='temporalvae',
        image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
        quantize=False,
        backbone_type="stmar",
        backbone_ckpt="data/mar_ckpt/robomimic_best",
        prompt_horizon=11,
        action_stride=1,
        domain='robomimic',
    )

    # generate synthetic data
    # synthetic data in training set only
    for demo_idx in tqdm.tqdm(range(start_idx, end_idx)):
        frames, actions = load_demo(demo_idx)
        replay_policy = ReplayPolicy(actions, 1, 11)
        replay_simulator = ReplaySimulator(frames, 11)
        assert len(replay_policy) == len(replay_simulator), \
            (len(replay_policy), len(replay_simulator))

        # prompt genie
        genie_simulator.set_initial_state((
            replay_simulator.prompt(),
            replay_policy.prompt()
        ))
        image = genie_simulator.reset()

        this_demo = {
            "obs": {
                "agentview_image": []
            },
            "actions": []
        }
    
        for _ in range(len(replay_policy)):
            action = replay_policy.generate_action(None)
            this_demo['obs']['agentview_image'].append(cv2.resize(image, (DP_RES, DP_RES)))
            this_demo['actions'].append(action[0])
            image = genie_simulator.step(action)['pred_next_frame']
        
        this_demo = dict_apply(this_demo, lambda x: np.array(x))
        demos[f"demo_{demo_idx}"] = this_demo
        demos_len.append(len(this_demo['actions']))

    with h5py.File(f'data/robomimic_synthetic/robomimic_synthetic{end_idx - start_idx}.hdf5', 'w') as f:
        """
        saving format:
        data (group)
            total (attribute) - number of state-action samples in the dataset
            env_args (attribute) - a json string that contains metadata on the environment and relevant arguments used for collecting data. Three keys: env_name, the name of the environment or task to create, env_type, one of robomimic’s supported environment types, and env_kwargs, a dictionary of keyword-arguments to be passed into the environment of type env_name.
            demo_0 (group) - group for the first trajectory (every trajectory has a group)
                num_samples (attribute) - the number of state-action samples in this trajectory
                model_file (attribute) - the xml string corresponding to the MJCF MuJoCo model. Only present for robosuite datasets.
                states (dataset) - flattened raw MuJoCo states, ordered by time. Shape (N, D) where N is the length of the trajectory, and D is the dimension of the state vector. Should be empty or have dummy values for non-robosuite datasets.
                actions (dataset) - environment actions, ordered by time. Shape (N, A) where N is the length of the trajectory, and A is the action space dimension
                rewards (dataset) - environment rewards, ordered by time. Shape (N,) where N is the length of the trajectory.
                dones (dataset) - done signal, equal to 1 if playing the corresponding action in the state should terminate the episode. Shape (N,) where N is the length of the trajectory.
                obs (group) - group for the observation keys. Each key is stored as a dataset.
                    <obs_key_1> (dataset) - the first observation key. Note that the name of this dataset and shape will vary. As an example, the name could be “agentview_image”, and the shape could be (N, 84, 84, 3).
                    ...
                next_obs (group) - group for the next observations.
                    <obs_key_1> (dataset) - the first observation key.
                    ...
            demo_1 (group) - group for the second trajectory
            ...
        https://robomimic.github.io/docs/datasets/overview.html
        """

        data_group = f.create_group('data')
        data_group.attrs['total'] = sum(demos_len)
        data_group.attrs['env_args'] = json.dumps({
            'env_name': 'Lift', 
            'type': 1, 
            'env_kwargs': {
                'has_renderer': False, 
                'has_offscreen_renderer': True, 
                'ignore_done': True, 
                'use_object_obs': False, 
                'use_camera_obs': True, 
                'control_freq': 20, 
                'controller_configs': {
                    'type': 'OSC_POSE', 
                    'input_max': 1, 
                    'input_min': -1, 
                    'output_max': [0.05, 0.05, 0.05, 0.5, 0.5, 0.5], 
                    'output_min': [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5], 
                    'kp': 150, 
                    'damping': 1, 
                    'impedance_mode': 'fixed', 
                    'kp_limits': [0, 300], 
                    'damping_limits': [0, 10], 
                    'position_limits': None, 
                    'orientation_limits': None, 
                    'uncouple_pos_ori': True, 
                    'control_delta': True, 
                    'interpolation': None, 
                    'ramp_ratio': 0.2
                }, 
                'robots': ['Panda'], 
                'camera_depths': False, 
                'camera_heights': 84, 
                'camera_widths': 84, 
                'reward_shaping': False, 
                'camera_names': ['agentview', 'robot0_eye_in_hand'], 
                'render_gpu_device_id': 0
            }, 
            'use_image_obs': True
        })
        for demo_idx, demo_data in demos.items():
            demo_group = data_group.create_group(demo_idx)
            demo_group.attrs['num_samples'] = len(demo_data['actions'])
            demo_group.create_dataset('actions', data=demo_data['actions'])
            obs_group = demo_group.create_group('obs')
            for key, value in demo_data['obs'].items():
                obs_group.create_dataset(key, data=value)


if __name__ == '__main__':
    main()