!!python/object/apply:collections.OrderedDict | |
- - - alive_bonus_offset | |
- -1 | |
- - delta_std | |
- 0.025 | |
- - learning_rate | |
- 0.03 | |
- - n_delta | |
- 40 | |
- - n_timesteps | |
- 75000000.0 | |
- - n_top | |
- 30 | |
- - normalize | |
- dict(norm_obs=True, norm_reward=False) | |
- - policy | |
- MlpPolicy | |
- - policy_kwargs | |
- dict(net_arch=[64, 64]) | |
- - zero_policy | |
- false | |