CartPole-v1: &cartpole-defaults n_timesteps: !!float 1e5 env_hyperparams: n_envs: 8 algo_hyperparams: n_steps: 32 batch_size: 256 n_epochs: 20 gae_lambda: 0.8 gamma: 0.98 ent_coef: 0.0 learning_rate: 0.001 learning_rate_decay: linear clip_range: 0.2 clip_range_decay: linear eval_params: step_freq: !!float 2.5e4 n_episodes: 10 save_best: true CartPole-v0: <<: *cartpole-defaults n_timesteps: !!float 5e4 MountainCar-v0: n_timesteps: !!float 1e6 env_hyperparams: normalize: true n_envs: 16 algo_hyperparams: n_steps: 16 n_epochs: 4 gae_lambda: 0.98 gamma: 0.99 ent_coef: 0.0 MountainCarContinuous-v0: n_timesteps: !!float 1e5 env_hyperparams: normalize: true n_envs: 4 policy_hyperparams: init_layers_orthogonal: false # log_std_init: -3.29 algo_hyperparams: n_steps: 512 batch_size: 256 n_epochs: 10 learning_rate: !!float 7.77e-5 ent_coef: 0.01 # 0.00429 ent_coef_decay: linear clip_range: 0.1 gae_lambda: 0.9 max_grad_norm: 5 vf_coef: 0.19 # use_sde: true eval_params: step_freq: 5000 n_episodes: 10 save_best: true Acrobot-v1: n_timesteps: !!float 1e6 env_hyperparams: n_envs: 16 normalize: true algo_hyperparams: n_steps: 256 n_epochs: 4 gae_lambda: 0.94 gamma: 0.99 ent_coef: 0.0 LunarLander-v2: n_timesteps: !!float 1e6 env_hyperparams: n_envs: 16 algo_hyperparams: n_steps: 1024 batch_size: 64 n_epochs: 4 gae_lambda: 0.98 gamma: 0.999 ent_coef: 0.01 ent_coef_decay: linear normalize_advantage: false eval_params: step_freq: !!float 5e4 n_episodes: 10 save_best: true CarRacing-v0: n_timesteps: !!float 4e6 env_hyperparams: n_envs: 8 frame_stack: 4 policy_hyperparams: use_sde: true log_std_init: -2 init_layers_orthogonal: false activation_fn: relu share_features_extractor: false cnn_feature_dim: 256 algo_hyperparams: n_steps: 512 batch_size: 128 n_epochs: 10 learning_rate: !!float 1e-4 learning_rate_decay: linear gamma: 0.99 gae_lambda: 0.95 ent_coef: 0.0 sde_sample_freq: 4 max_grad_norm: 0.5 vf_coef: 0.5 clip_range: 0.2 # BreakoutNoFrameskip-v4 # PongNoFrameskip-v4 # SpaceInvadersNoFrameskip-v4 # QbertNoFrameskip-v4 atari: &atari-defaults n_timesteps: !!float 1e7 policy_hyperparams: activation_fn: relu env_hyperparams: &atari-env-defaults n_envs: 8 frame_stack: 4 no_reward_timeout_steps: 1000 no_reward_fire_steps: 500 vec_env_class: subproc algo_hyperparams: n_steps: 128 batch_size: 256 n_epochs: 4 learning_rate: !!float 2.5e-4 learning_rate_decay: linear clip_range: 0.1 clip_range_decay: linear vf_coef: 0.5 ent_coef: 0.01 eval_params: deterministic: false HalfCheetahBulletEnv-v0: &pybullet-defaults n_timesteps: !!float 2e6 env_hyperparams: &pybullet-env-defaults n_envs: 16 normalize: true policy_hyperparams: &pybullet-policy-defaults pi_hidden_sizes: [256, 256] v_hidden_sizes: [256, 256] activation_fn: relu algo_hyperparams: &pybullet-algo-defaults n_steps: 512 batch_size: 128 n_epochs: 20 gamma: 0.99 gae_lambda: 0.9 ent_coef: 0.0 sde_sample_freq: 4 max_grad_norm: 0.5 vf_coef: 0.5 learning_rate: !!float 3e-5 clip_range: 0.4 AntBulletEnv-v0: <<: *pybullet-defaults policy_hyperparams: <<: *pybullet-policy-defaults algo_hyperparams: <<: *pybullet-algo-defaults Walker2DBulletEnv-v0: <<: *pybullet-defaults algo_hyperparams: <<: *pybullet-algo-defaults clip_range_decay: linear HopperBulletEnv-v0: <<: *pybullet-defaults algo_hyperparams: <<: *pybullet-algo-defaults clip_range_decay: linear HumanoidBulletEnv-v0: <<: *pybullet-defaults n_timesteps: !!float 1e7 env_hyperparams: <<: *pybullet-env-defaults n_envs: 8 policy_hyperparams: <<: *pybullet-policy-defaults # log_std_init: -1 algo_hyperparams: <<: *pybullet-algo-defaults n_steps: 2048 batch_size: 64 n_epochs: 10 gae_lambda: 0.95 learning_rate: !!float 2.5e-4 clip_range: 0.2