sgoodfriend's picture
A2C playing CartPole-v1 from https://github.com/sgoodfriend/rl-algo-impls/tree/0760ef7d52b17f30219a27c18ba52c8895025ae3
464d478
raw
history blame
2.61 kB
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 5e5
env_hyperparams:
n_envs: 8
CartPole-v0:
<<: *cartpole-defaults
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
normalize: true
MountainCarContinuous-v0:
n_timesteps: !!float 1e5
env_hyperparams:
n_envs: 4
normalize: true
# policy_hyperparams:
# use_sde: true
# log_std_init: 0.0
# init_layers_orthogonal: false
algo_hyperparams:
n_steps: 100
sde_sample_freq: 16
Acrobot-v1:
n_timesteps: !!float 5e5
env_hyperparams:
normalize: true
n_envs: 16
LunarLander-v2:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 8
normalize: true
algo_hyperparams:
n_steps: 5
gamma: 0.995
learning_rate: !!float 8.3e-4
learning_rate_decay: linear
ent_coef: !!float 1e-5
BipedalWalker-v3:
n_timesteps: !!float 5e6
env_hyperparams:
n_envs: 16
normalize: true
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
algo_hyperparams:
ent_coef: 0
max_grad_norm: 0.5
n_steps: 8
gae_lambda: 0.9
vf_coef: 0.4
gamma: 0.99
learning_rate: !!float 9.6e-4
learning_rate_decay: linear
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams:
n_envs: 4
normalize: true
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
algo_hyperaparms: &pybullet-algo-defaults
n_steps: 8
ent_coef: 0
max_grad_norm: 0.5
gae_lambda: 0.9
gamma: 0.99
vf_coef: 0.4
learning_rate: !!float 9.6e-4
learning_rate_decay: linear
AntBulletEnv-v0:
<<: *pybullet-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
HopperBulletEnv-v0:
<<: *pybullet-defaults
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
n_envs: 8
frame_stack: 4
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
activation_fn: relu
share_features_extractor: false
cnn_feature_dim: 256
hidden_sizes: [256]
algo_hyperparams:
n_steps: 8
learning_rate: !!float 5e-5
learning_rate_decay: linear
gamma: 0.99
gae_lambda: 0.95
ent_coef: 0
sde_sample_freq: 4
_atari: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams: &atari-env-defaults
n_envs: 16
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: async
policy_hyperparams: &atari-policy-defaults
activation_fn: relu
algo_hyperparams:
ent_coef: 0.01
vf_coef: 0.25