File size: 3,004 Bytes
db8a108 05b94c0 db8a108 05b94c0 db8a108 05b94c0 db8a108 7c70ebe db8a108 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 5e4
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 2.3e-3
batch_size: 64
buffer_size: 100000
learning_starts: 1000
gamma: 0.99
target_update_interval: 10
train_freq: 256
gradient_steps: 128
exploration_fraction: 0.16
exploration_final_eps: 0.04
eval_hyperparams:
step_freq: !!float 1e4
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 4e4
MountainCar-v0:
n_timesteps: !!float 1.2e5
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 4e-3
batch_size: 128
buffer_size: 10000
learning_starts: 1000
gamma: 0.98
target_update_interval: 600
train_freq: 16
gradient_steps: 8
exploration_fraction: 0.2
exploration_final_eps: 0.07
Acrobot-v1:
n_timesteps: !!float 1e5
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 6.3e-4
batch_size: 128
buffer_size: 50000
learning_starts: 0
gamma: 0.99
target_update_interval: 250
train_freq: 4
gradient_steps: -1
exploration_fraction: 0.12
exploration_final_eps: 0.1
LunarLander-v2:
n_timesteps: !!float 5e5
env_hyperparams:
rolling_length: 50
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
learning_rate: !!float 1e-4
batch_size: 256
buffer_size: 100000
learning_starts: 10000
gamma: 0.99
target_update_interval: 250
train_freq: 8
gradient_steps: -1
exploration_fraction: 0.12
exploration_final_eps: 0.1
max_grad_norm: 0.5
eval_hyperparams:
step_freq: 25_000
_atari: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams:
frame_stack: 4
no_reward_timeout_steps: 1_000
no_reward_fire_steps: 500
n_envs: 8
vec_env_class: async
algo_hyperparams:
buffer_size: 100000
learning_rate: !!float 1e-4
batch_size: 32
learning_starts: 100000
target_update_interval: 1000
train_freq: 8
gradient_steps: 2
exploration_fraction: 0.1
exploration_final_eps: 0.01
eval_hyperparams:
deterministic: false
PongNoFrameskip-v4:
<<: *atari-defaults
n_timesteps: !!float 2.5e6
_impala-atari: &impala-atari-defaults
<<: *atari-defaults
policy_hyperparams:
cnn_style: impala
cnn_flatten_dim: 256
init_layers_orthogonal: true
cnn_layers_init_orthogonal: false
impala-PongNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: PongNoFrameskip-v4
n_timesteps: !!float 2.5e6
impala-BreakoutNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: BreakoutNoFrameskip-v4
impala-SpaceInvadersNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: SpaceInvadersNoFrameskip-v4
impala-QbertNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: QbertNoFrameskip-v4
|