general_cfg: algo_name: PPO collect_traj: false device: cuda env_name: gym load_checkpoint: true load_model_step: best load_path: Train_single_Hopper-v4_PPO_20230526-135633 max_episode: 100 max_step: 1000 mode: test model_save_fre: 500 mp_backend: single n_learners: 1 n_workers: 2 online_eval: true online_eval_episode: 10 seed: 1 share_buffer: true algo_cfg: actor_hidden_dim: 256 actor_layers: - activation: relu layer_size: - 256 layer_type: linear - activation: relu layer_size: - 256 layer_type: linear actor_lr: 0.0003 batch_size: 256 buffer_size: 100000 buffer_type: ONPOLICY_QUE continuous: true critic_hidden_dim: 256 critic_layers: - activation: relu layer_size: - 256 layer_type: linear - activation: relu layer_size: - 256 layer_type: linear critic_loss_coef: 0.5 critic_lr: 0.001 entropy_coef: 0.01 eps_clip: 0.2 epsilon_decay: 500 epsilon_end: 0.01 epsilon_start: 0.95 gamma: 0.95 independ_actor: true k_epochs: 8 kl_alpha: 2 kl_beta: 1.5 kl_lambda: 0.5 kl_target: 0.1 lr: 0.0001 min_policy: 0 ppo_type: clip sgd_batch_size: 128 share_optimizer: false target_update: 4 env_cfg: id: Hopper-v4 ignore_params: - wrapper - ignore_params new_step_api: true render_mode: null wrapper: null