CartPole-v1: &cartpole-defaults n_timesteps: !!float 4e5 policy_hyperparams: hidden_sizes: [32] algo_hyperparams: steps_per_epoch: 4096 pi_lr: 0.01 gamma: 0.99 lam: 1 val_lr: 0.01 train_v_iters: 80 eval_params: step_freq: !!float 2.5e4 n_episodes: 10 save_best: true CartPole-v0: <<: *cartpole-defaults n_timesteps: !!float 1e5 algo_hyperparams: steps_per_epoch: 1024 pi_lr: 0.01 gamma: 0.99 lam: 1 val_lr: 0.01 train_v_iters: 80 Acrobot-v1: n_timesteps: !!float 2e5 policy_hyperparams: hidden_sizes: [32, 32] algo_hyperparams: steps_per_epoch: 2048 pi_lr: 0.005 gamma: 0.99 lam: 0.97 val_lr: 0.01 train_v_iters: 80 max_grad_norm: 0.5 eval_params: step_freq: !!float 4e4 n_episodes: 10 save_best: true LunarLander-v2: n_timesteps: !!float 4e6 policy_hyperparams: hidden_sizes: [256, 256] algo_hyperparams: steps_per_epoch: 2048 pi_lr: 0.0001 gamma: 0.999 lam: 0.97 val_lr: 0.0001 train_v_iters: 80 max_grad_norm: 0.5 eval_params: step_freq: !!float 5e4 n_episodes: 10 save_best: true CarRacing-v0: n_timesteps: !!float 4e6 env_hyperparams: frame_stack: 4 n_envs: 4 vec_env_class: "dummy" policy_hyperparams: hidden_sizes: [256, 256] algo_hyperparams: steps_per_epoch: 4000 pi_lr: !!float 7e-5 gamma: 0.99 lam: 0.95 val_lr: !!float 1e-4 train_v_iters: 40 max_grad_norm: 0.5 eval_params: step_freq: !!float 5e4 n_episodes: 10 save_best: true HalfCheetahBulletEnv-v0: &pybullet-defaults n_timesteps: !!float 2e6 policy_hyperparams: hidden_sizes: [64, 64] init_layers_orthogonal: true algo_hyperparams: steps_per_epoch: 4000 pi_lr: !!float 3e-4 gamma: 0.99 lam: 0.97 val_lr: !!float 1e-3 train_v_iters: 80 max_grad_norm: 0.5 eval_params: step_freq: !!float 1e5 n_episodes: 10 save_best: true HopperBulletEnv-v0: <<: *pybullet-defaults AntBulletEnv-v0: <<: *pybullet-defaults policy_hyperparams: hidden_sizes: [400, 300] algo_hyperparams: pi_lr: !!float 7e-4 gamma: 0.99 lam: 0.97 val_lr: !!float 7e-3 train_v_iters: 80 max_grad_norm: 0.5 FrozenLake-v1: n_timesteps: !!float 8e5 env_params: make_kwargs: map_name: 8x8 is_slippery: true policy_hyperparams: hidden_sizes: [64] algo_hyperparams: steps_per_epoch: 2048 pi_lr: 0.01 gamma: 0.99 lam: 0.98 val_lr: 0.01 train_v_iters: 80 max_grad_norm: 0.5 eval_params: step_freq: !!float 5e4 n_episodes: 10 save_best: true SpaceInvadersNoFrameskip-v4: &atari-defaults n_timesteps: !!float 1e7 env_hyperparams: frame_stack: 4 no_reward_timeout_steps: 1_000 n_envs: 8 vec_env_class: "subproc" policy_hyperparams: hidden_sizes: [256, 256] algo_hyperparams: steps_per_epoch: 4096 pi_lr: !!float 1e-4 gamma: 0.99 lam: 0.95 val_lr: !!float 2e-4 train_v_iters: 80 max_grad_norm: 0.5 eval_params: step_freq: !!float 1e5 n_episodes: 10 save_best: true