#-*-Python-*- # Create replay_buffer agent/CircularBuffer.buffer_size = 200000 meta/CircularBuffer.buffer_size = 200000 agent/CircularBuffer.scope = "agent" meta/CircularBuffer.scope = "meta" # Config train train_uvf.environment = @create_maze_env() train_uvf.agent_class = %AGENT_CLASS train_uvf.meta_agent_class = %META_CLASS train_uvf.state_preprocess_class = %STATE_PREPROCESS_CLASS train_uvf.inverse_dynamics_class = %INVERSE_DYNAMICS_CLASS train_uvf.replay_buffer = @agent/CircularBuffer() train_uvf.meta_replay_buffer = @meta/CircularBuffer() train_uvf.critic_optimizer = @critic/AdamOptimizer() train_uvf.actor_optimizer = @actor/AdamOptimizer() train_uvf.meta_critic_optimizer = @meta_critic/AdamOptimizer() train_uvf.meta_actor_optimizer = @meta_actor/AdamOptimizer() train_uvf.repr_optimizer = @repr/AdamOptimizer() train_uvf.num_episodes_train = 25000 train_uvf.batch_size = 100 train_uvf.initial_episodes = 5 train_uvf.gamma = 0.99 train_uvf.meta_gamma = 0.99 train_uvf.reward_scale_factor = 1.0 train_uvf.target_update_period = 2 train_uvf.num_updates_per_observation = 1 train_uvf.num_collect_per_update = 1 train_uvf.num_collect_per_meta_update = 10 train_uvf.debug_summaries = False train_uvf.log_every_n_steps = 1000 train_uvf.save_policy_every_n_steps =100000 # Config Optimizers critic/AdamOptimizer.learning_rate = 0.001 critic/AdamOptimizer.beta1 = 0.9 critic/AdamOptimizer.beta2 = 0.999 actor/AdamOptimizer.learning_rate = 0.0001 actor/AdamOptimizer.beta1 = 0.9 actor/AdamOptimizer.beta2 = 0.999 meta_critic/AdamOptimizer.learning_rate = 0.001 meta_critic/AdamOptimizer.beta1 = 0.9 meta_critic/AdamOptimizer.beta2 = 0.999 meta_actor/AdamOptimizer.learning_rate = 0.0001 meta_actor/AdamOptimizer.beta1 = 0.9 meta_actor/AdamOptimizer.beta2 = 0.999 repr/AdamOptimizer.learning_rate = 0.0001 repr/AdamOptimizer.beta1 = 0.9 repr/AdamOptimizer.beta2 = 0.999