sgoodfriend's picture
PPO playing HopperBulletEnv-v0 from https://github.com/sgoodfriend/rl-algo-impls/tree/983cb75e43e51cf4ef57f177194ab9a4a1a8808b
bf22389
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 4e5
algo_hyperparams:
n_steps: 4096
pi_lr: 0.01
gamma: 0.99
gae_lambda: 1
val_lr: 0.01
train_v_iters: 80
eval_hyperparams:
step_freq: !!float 2.5e4
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 1e5
algo_hyperparams:
n_steps: 1024
pi_lr: 0.01
gamma: 0.99
gae_lambda: 1
val_lr: 0.01
train_v_iters: 80
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
normalize: true
n_envs: 16
algo_hyperparams:
n_steps: 200
pi_lr: 0.005
gamma: 0.99
gae_lambda: 0.97
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
MountainCarContinuous-v0:
n_timesteps: !!float 3e5
env_hyperparams:
normalize: true
n_envs: 4
# policy_hyperparams:
# init_layers_orthogonal: false
# log_std_init: -3.29
# use_sde: true
algo_hyperparams:
n_steps: 1000
pi_lr: !!float 5e-4
gamma: 0.99
gae_lambda: 0.9
val_lr: !!float 1e-3
train_v_iters: 80
max_grad_norm: 5
eval_hyperparams:
step_freq: 5000
Acrobot-v1:
n_timesteps: !!float 2e5
algo_hyperparams:
n_steps: 2048
pi_lr: 0.005
gamma: 0.99
gae_lambda: 0.97
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
LunarLander-v2:
n_timesteps: !!float 4e6
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
n_steps: 2048
pi_lr: 0.0001
gamma: 0.999
gae_lambda: 0.97
val_lr: 0.0001
train_v_iters: 80
max_grad_norm: 0.5
eval_hyperparams:
deterministic: false
BipedalWalker-v3:
n_timesteps: !!float 10e6
env_hyperparams:
n_envs: 16
normalize: true
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
n_steps: 1600
gae_lambda: 0.95
gamma: 0.99
pi_lr: !!float 1e-4
val_lr: !!float 1e-4
train_v_iters: 80
max_grad_norm: 0.5
eval_hyperparams:
deterministic: false
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
frame_stack: 4
n_envs: 4
vec_env_class: sync
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
activation_fn: relu
cnn_flatten_dim: 256
hidden_sizes: [256]
algo_hyperparams:
n_steps: 1000
pi_lr: !!float 5e-5
gamma: 0.99
gae_lambda: 0.95
val_lr: !!float 1e-4
train_v_iters: 40
max_grad_norm: 0.5
sde_sample_freq: 4
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams: &pybullet-env-defaults
normalize: true
policy_hyperparams: &pybullet-policy-defaults
hidden_sizes: [256, 256]
algo_hyperparams: &pybullet-algo-defaults
n_steps: 4000
pi_lr: !!float 3e-4
gamma: 0.99
gae_lambda: 0.97
val_lr: !!float 1e-3
train_v_iters: 80
max_grad_norm: 0.5
AntBulletEnv-v0:
<<: *pybullet-defaults
policy_hyperparams:
<<: *pybullet-policy-defaults
hidden_sizes: [400, 300]
algo_hyperparams:
<<: *pybullet-algo-defaults
pi_lr: !!float 7e-4
val_lr: !!float 7e-3
HopperBulletEnv-v0:
<<: *pybullet-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
FrozenLake-v1:
n_timesteps: !!float 8e5
env_params:
make_kwargs:
map_name: 8x8
is_slippery: true
policy_hyperparams:
hidden_sizes: [64]
algo_hyperparams:
n_steps: 2048
pi_lr: 0.01
gamma: 0.99
gae_lambda: 0.98
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
eval_hyperparams:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
_atari: &atari-defaults
n_timesteps: !!float 10e6
env_hyperparams:
n_envs: 2
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: async
policy_hyperparams:
activation_fn: relu
algo_hyperparams:
n_steps: 3072
pi_lr: !!float 5e-5
gamma: 0.99
gae_lambda: 0.95
val_lr: !!float 1e-4
train_v_iters: 80
max_grad_norm: 0.5
ent_coef: 0.01
eval_hyperparams:
deterministic: false