behaviors: | |
Kyle: | |
trainer_type: sac | |
hyperparameters: | |
learning_rate: 0.0003 | |
learning_rate_schedule: linear | |
batch_size: 1000 | |
buffer_size: 1000000 | |
tau: 0.005 | |
steps_per_update: 30.0 | |
init_entcoef: 1.0 | |
reward_signal_steps_per_update: 30.0 | |
network_settings: | |
normalize: true | |
hidden_units: 256 | |
num_layers: 3 | |
vis_encode_type: simple | |
memory: | |
sequence_length: 1000 | |
memory_size: 256 | |
reward_signals: | |
extrinsic: | |
gamma: 0.995 | |
strength: 1.0 | |
curiosity: | |
strength: 0.1 | |
gamma: 0.995 | |
network_settings: | |
hidden_units: 256 | |
learning_rate: 0.0001 | |
keep_checkpoints: 40 | |
checkpoint_interval: 500000 | |
max_steps: 20000000 | |
time_horizon: 1000 | |
summary_freq: 10000 | |