tdmpc_pusht_keypoints / config.yaml
alexandersoare's picture
Upload folder using huggingface_hub
9c64ed5 verified
resume: false
device: cuda
use_amp: true
seed: 1
dataset_repo_id: lerobot/pusht_keypoints
video_backend: pyav
training:
offline_steps: 0
num_workers: 12
batch_size: 256
eval_freq: 10000
log_freq: 250
save_checkpoint: true
save_freq: 10000
online_steps: 1000000
online_steps_between_rollouts: 1000
online_rollout_n_episodes: 10
online_rollout_batch_size: 10
online_sampling_ratio: 1.0
online_env_seed: 10000
online_buffer_capacity: 40000
online_buffer_seed_size: 0
do_online_rollout_async: false
image_transforms:
enable: false
max_num_transforms: 3
random_order: false
brightness:
weight: 1
min_max:
- 0.8
- 1.2
contrast:
weight: 1
min_max:
- 0.8
- 1.2
saturation:
weight: 1
min_max:
- 0.5
- 1.5
hue:
weight: 1
min_max:
- -0.05
- 0.05
sharpness:
weight: 1
min_max:
- 0.8
- 1.2
grad_clip_norm: 10.0
lr: 0.0003
delta_timestamps:
observation.environment_state:
- 0.0
- 0.1
- 0.2
- 0.3
- 0.4
- 0.5
observation.state:
- 0.0
- 0.1
- 0.2
- 0.3
- 0.4
- 0.5
action:
- 0.0
- 0.1
- 0.2
- 0.3
- 0.4
next.reward:
- 0.0
- 0.1
- 0.2
- 0.3
- 0.4
eval:
n_episodes: 50
batch_size: 50
use_async_envs: true
wandb:
enable: true
disable_artifact: true
project: lerobot
notes: ''
fps: 10
env:
name: pusht
task: PushT-v0
image_size: 96
state_dim: 2
action_dim: 2
fps: ${fps}
episode_length: 300
gym:
obs_type: environment_state_agent_pos
render_mode: rgb_array
visualization_width: 384
visualization_height: 384
policy:
name: tdmpc
pretrained_model_path: null
n_action_repeats: 1
horizon: 5
n_action_steps: 5
input_shapes:
observation.environment_state:
- 16
observation.state:
- ${env.state_dim}
output_shapes:
action:
- ${env.action_dim}
input_normalization_modes:
observation.environment_state: min_max
observation.state: min_max
output_normalization_modes:
action: min_max
image_encoder_hidden_dim: 32
state_encoder_hidden_dim: 256
latent_dim: 50
q_ensemble_size: 5
mlp_dim: 512
discount: 0.98
use_mpc: true
cem_iterations: 6
max_std: 2.0
min_std: 0.05
n_gaussian_samples: 512
n_pi_samples: 51
uncertainty_regularizer_coeff: 1.0
n_elites: 50
elite_weighting_temperature: 0.5
gaussian_mean_momentum: 0.1
max_random_shift_ratio: 0.0476
reward_coeff: 0.5
expectile_weight: 0.9
value_coeff: 0.1
consistency_coeff: 20.0
advantage_scaling: 3.0
pi_coeff: 0.5
temporal_decay_coeff: 0.5
target_model_momentum: 0.995