|
!!python/object/apply:collections.OrderedDict |
|
- - - batch_size |
|
- 256 |
|
- - buffer_size |
|
- 200000 |
|
- - callback |
|
- - utils.callbacks.ParallelTrainCallback: |
|
gradient_steps: 200 |
|
- utils.callbacks.LapTimeCallback |
|
- - ent_coef |
|
- auto |
|
- - env_wrapper |
|
- - gym.wrappers.time_limit.TimeLimit: |
|
max_episode_steps: 10000 |
|
- ae.wrapper.AutoencoderWrapper |
|
- utils.wrappers.HistoryWrapper: |
|
horizon: 2 |
|
- - gamma |
|
- 0.99 |
|
- - gradient_steps |
|
- 256 |
|
- - learning_rate |
|
- 0.00073 |
|
- - learning_starts |
|
- 500 |
|
- - n_timesteps |
|
- 2000000.0 |
|
- - normalize |
|
- '{''norm_obs'': True, ''norm_reward'': False}' |
|
- - policy |
|
- MlpPolicy |
|
- - policy_kwargs |
|
- dict(log_std_init=-3, net_arch=[256, 256], n_critics=2, use_expln=True) |
|
- - sde_sample_freq |
|
- 16 |
|
- - tau |
|
- 0.02 |
|
- - train_freq |
|
- 200 |
|
- - use_sde |
|
- true |
|
- - use_sde_at_warmup |
|
- true |
|
|