|
action_optimizer: |
|
_target_: mbrl.planning.CEMOptimizer |
|
alpha: 0.1 |
|
clipped_normal: false |
|
device: cpu:0 |
|
elite_ratio: 0.1 |
|
lower_bound: ??? |
|
num_iterations: 5 |
|
population_size: 350 |
|
return_mean_elites: true |
|
upper_bound: ??? |
|
algorithm: |
|
agent: |
|
_target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC |
|
action_space: |
|
_target_: gym.env.Box |
|
high: |
|
- 1.0 |
|
low: |
|
- -1.0 |
|
shape: |
|
- 1 |
|
args: |
|
alpha: 0.2 |
|
automatic_entropy_tuning: true |
|
device: cpu:0 |
|
gamma: 0.99 |
|
hidden_size: 256 |
|
lr: 0.0003 |
|
policy: Gaussian |
|
target_entropy: -0.05 |
|
target_update_interval: 4 |
|
tau: 0.005 |
|
num_inputs: 4 |
|
freq_train_model: 200 |
|
initial_exploration_steps: 5000 |
|
learned_rewards: true |
|
name: mbpo |
|
normalize: true |
|
normalize_double_precision: true |
|
num_eval_episodes: 1 |
|
random_initial_explore: false |
|
real_data_ratio: 0.0 |
|
sac_samples_action: true |
|
target_is_delta: true |
|
debug_mode: false |
|
device: cpu:0 |
|
dynamics_model: |
|
_target_: mbrl.models.GaussianMLP |
|
activation_fn_cfg: |
|
_target_: torch.nn.SiLU |
|
deterministic: false |
|
device: cpu:0 |
|
ensemble_size: 7 |
|
hid_size: 200 |
|
in_size: 5 |
|
learn_logvar_bounds: false |
|
num_layers: 4 |
|
out_size: 5 |
|
propagation_method: random_model |
|
experiment: default |
|
log_frequency_agent: 1000 |
|
overrides: |
|
cem_alpha: 0.1 |
|
cem_clipped_normal: false |
|
cem_elite_ratio: 0.1 |
|
cem_num_iters: 5 |
|
cem_population_size: 350 |
|
effective_model_rollouts_per_step: 400 |
|
env: cartpole_continuous |
|
epoch_length: 200 |
|
freq_train_model: 200 |
|
model_batch_size: 256 |
|
model_lr: 0.001 |
|
model_wd: 5.0e-05 |
|
num_elites: 5 |
|
num_epochs_to_retain_sac_buffer: 1 |
|
num_sac_updates_per_step: 20 |
|
num_steps: 5000 |
|
patience: 5 |
|
planning_horizon: 15 |
|
rollout_schedule: |
|
- 1 |
|
- 15 |
|
- 1 |
|
- 1 |
|
sac_alpha: 0.2 |
|
sac_automatic_entropy_tuning: true |
|
sac_batch_size: 256 |
|
sac_gamma: 0.99 |
|
sac_hidden_size: 256 |
|
sac_lr: 0.0003 |
|
sac_policy: Gaussian |
|
sac_target_entropy: -0.05 |
|
sac_target_update_interval: 4 |
|
sac_tau: 0.005 |
|
sac_updates_every_steps: 1 |
|
trial_length: 200 |
|
validation_ratio: 0.2 |
|
root_dir: ./logs |
|
save_video: false |
|
seed: 0 |
|
|