resume: false device: cuda use_amp: true seed: 1 dataset_repo_id: lerobot/pusht_keypoints video_backend: pyav training: offline_steps: 0 num_workers: 12 batch_size: 256 eval_freq: 10000 log_freq: 250 save_checkpoint: true save_freq: 10000 online_steps: 1000000 online_steps_between_rollouts: 1000 online_rollout_n_episodes: 10 online_rollout_batch_size: 10 online_sampling_ratio: 1.0 online_env_seed: 10000 online_buffer_capacity: 40000 online_buffer_seed_size: 0 do_online_rollout_async: false image_transforms: enable: false max_num_transforms: 3 random_order: false brightness: weight: 1 min_max: - 0.8 - 1.2 contrast: weight: 1 min_max: - 0.8 - 1.2 saturation: weight: 1 min_max: - 0.5 - 1.5 hue: weight: 1 min_max: - -0.05 - 0.05 sharpness: weight: 1 min_max: - 0.8 - 1.2 grad_clip_norm: 10.0 lr: 0.0003 delta_timestamps: observation.environment_state: - 0.0 - 0.1 - 0.2 - 0.3 - 0.4 - 0.5 observation.state: - 0.0 - 0.1 - 0.2 - 0.3 - 0.4 - 0.5 action: - 0.0 - 0.1 - 0.2 - 0.3 - 0.4 next.reward: - 0.0 - 0.1 - 0.2 - 0.3 - 0.4 eval: n_episodes: 50 batch_size: 50 use_async_envs: true wandb: enable: true disable_artifact: true project: lerobot notes: '' fps: 10 env: name: pusht task: PushT-v0 image_size: 96 state_dim: 2 action_dim: 2 fps: ${fps} episode_length: 300 gym: obs_type: environment_state_agent_pos render_mode: rgb_array visualization_width: 384 visualization_height: 384 policy: name: tdmpc pretrained_model_path: null n_action_repeats: 1 horizon: 5 n_action_steps: 5 input_shapes: observation.environment_state: - 16 observation.state: - ${env.state_dim} output_shapes: action: - ${env.action_dim} input_normalization_modes: observation.environment_state: min_max observation.state: min_max output_normalization_modes: action: min_max image_encoder_hidden_dim: 32 state_encoder_hidden_dim: 256 latent_dim: 50 q_ensemble_size: 5 mlp_dim: 512 discount: 0.98 use_mpc: true cem_iterations: 6 max_std: 2.0 min_std: 0.05 n_gaussian_samples: 512 n_pi_samples: 51 uncertainty_regularizer_coeff: 1.0 n_elites: 50 elite_weighting_temperature: 0.5 gaussian_mean_momentum: 0.1 max_random_shift_ratio: 0.0476 reward_coeff: 0.5 expectile_weight: 0.9 value_coeff: 0.1 consistency_coeff: 20.0 advantage_scaling: 3.0 pi_coeff: 0.5 temporal_decay_coeff: 0.5 target_model_momentum: 0.995