|
{ |
|
"---- Shared parameters ---": "----------------", |
|
"gamma": 0.99, |
|
"replay_buffer_size": 1000000, |
|
"n_initial_samples": 20000, |
|
"n_epochs": 200, |
|
"n_training_steps_per_epoch": 250000, |
|
"n_training_steps_per_online_update": 4, |
|
"horizon": 27000, |
|
"starting_eps": 1, |
|
"ending_eps": 0.01, |
|
"duration_eps": 250000, |
|
"batch_size": 32, |
|
"---- i-DQN ---": "----------------------------", |
|
"idqn_learning_rate": 6.25e-5, |
|
"idqn_optimizer_eps": 1.5e-4, |
|
"idqn_n_step_return": 1, |
|
"idqn_n_training_steps_per_target_update": 30, |
|
"idqn_n_training_steps_per_window_shift": 6000, |
|
"idqn_head_behaviorial_policy": "uniform", |
|
"idqn_shared_network": true, |
|
"---- i-IQN ---": "----------------------------", |
|
"iiqn_learning_rate": 0.00005, |
|
"iiqn_optimizer_eps": 0.0003125, |
|
"iiqn_n_step_return": 3, |
|
"iiqn_n_training_steps_per_target_update": 30, |
|
"iiqn_n_training_steps_per_window_shift": 6000, |
|
"iiqn_head_behaviorial_policy": "uniform", |
|
"iiqn_n_quantiles_policy": 32, |
|
"iiqn_n_quantiles": 64, |
|
"iiqn_n_quantiles_target": 64, |
|
"iiqn_shared_network": true |
|
} |