CartPole-v1: &cartpole-defaults n_timesteps: !!float 5e5 env_hyperparams: n_envs: 8 CartPole-v0: <<: *cartpole-defaults MountainCar-v0: n_timesteps: !!float 1e6 env_hyperparams: n_envs: 16 normalize: true MountainCarContinuous-v0: n_timesteps: !!float 1e5 env_hyperparams: n_envs: 4 normalize: true # policy_hyperparams: # use_sde: true # log_std_init: 0.0 # init_layers_orthogonal: false algo_hyperparams: n_steps: 100 sde_sample_freq: 16 Acrobot-v1: n_timesteps: !!float 5e5 env_hyperparams: normalize: true n_envs: 16 # Tuned LunarLander-v2: device: cpu n_timesteps: !!float 1e6 env_hyperparams: n_envs: 4 normalize: true algo_hyperparams: n_steps: 2 gamma: 0.9955517404308908 gae_lambda: 0.9875340918797773 learning_rate: 0.0013814130817068916 learning_rate_decay: linear ent_coef: !!float 3.388369146384422e-7 ent_coef_decay: none max_grad_norm: 3.33982095073364 normalize_advantage: true vf_coef: 0.1667838310548184 BipedalWalker-v3: n_timesteps: !!float 5e6 env_hyperparams: n_envs: 16 normalize: true policy_hyperparams: use_sde: true log_std_init: -2 init_layers_orthogonal: false algo_hyperparams: ent_coef: 0 max_grad_norm: 0.5 n_steps: 8 gae_lambda: 0.9 vf_coef: 0.4 gamma: 0.99 learning_rate: !!float 9.6e-4 learning_rate_decay: linear HalfCheetahBulletEnv-v0: &pybullet-defaults n_timesteps: !!float 2e6 env_hyperparams: n_envs: 4 normalize: true policy_hyperparams: use_sde: true log_std_init: -2 init_layers_orthogonal: false algo_hyperparams: &pybullet-algo-defaults n_steps: 8 ent_coef: 0 max_grad_norm: 0.5 gae_lambda: 0.9 gamma: 0.99 vf_coef: 0.4 learning_rate: !!float 9.6e-4 learning_rate_decay: linear AntBulletEnv-v0: <<: *pybullet-defaults Walker2DBulletEnv-v0: <<: *pybullet-defaults HopperBulletEnv-v0: <<: *pybullet-defaults # Tuned CarRacing-v0: n_timesteps: !!float 4e6 env_hyperparams: n_envs: 4 frame_stack: 4 normalize: true normalize_kwargs: norm_obs: false norm_reward: true policy_hyperparams: use_sde: true log_std_init: -4.839609092563 init_layers_orthogonal: true activation_fn: tanh share_features_extractor: false cnn_flatten_dim: 256 hidden_sizes: [256] algo_hyperparams: n_steps: 64 learning_rate: 0.000018971962220405576 gamma: 0.9942776405534832 gae_lambda: 0.9549244758833236 ent_coef: 0.0000015666550584860516 ent_coef_decay: linear vf_coef: 0.12164696385898476 max_grad_norm: 2.2574480552177127 normalize_advantage: false use_rms_prop: false sde_sample_freq: 16 _atari: &atari-defaults n_timesteps: !!float 1e7 env_hyperparams: &atari-env-defaults n_envs: 16 frame_stack: 4 no_reward_timeout_steps: 1000 no_reward_fire_steps: 500 vec_env_class: async policy_hyperparams: &atari-policy-defaults activation_fn: relu algo_hyperparams: ent_coef: 0.01 vf_coef: 0.25