|
!!python/object/apply:collections.OrderedDict |
|
- - - batch_size |
|
- 512 |
|
- - clip_range |
|
- 0.2 |
|
- - ent_coef |
|
- 6.4940755116195606e-06 |
|
- - gae_lambda |
|
- 0.98 |
|
- - gamma |
|
- 0.99 |
|
- - learning_rate |
|
- 0.0004476103728105138 |
|
- - max_grad_norm |
|
- 1 |
|
- - n_envs |
|
- 16 |
|
- - n_epochs |
|
- 20 |
|
- - n_steps |
|
- 256 |
|
- - n_timesteps |
|
- 1000000.0 |
|
- - normalize |
|
- dict(norm_obs=False, norm_reward=True) |
|
- - policy |
|
- imitation.policies.base.MlpPolicyWithNormalizeFeaturesExtractor |
|
- - policy_kwargs |
|
- dict(activation_fn=nn.Tanh, net_arch=[dict(pi=[64, 64], vf=[64, 64])]) |
|
- - vf_coef |
|
- 0.25988158989488963 |
|
|