general_cfg: algo_name: A2C collect_traj: false device: cpu env_name: gym interact_summary_fre: 1 load_checkpoint: false load_model_step: best load_path: Train_single_CartPole-v1_DQN_20230515-211721 max_episode: 500 max_step: 200 mode: train model_save_fre: 10 model_summary_fre: 1 mp_backend: single n_learners: 1 n_workers: 2 online_eval: true online_eval_episode: 10 seed: 1 share_buffer: true algo_cfg: action_type: continuous actor_hidden_dim: 256 actor_layers: - activation: relu layer_size: - 256 layer_type: linear actor_lr: 0.0001 batch_size: 256 buffer_type: ONPOLICY_QUE critic_hidden_dim: 256 critic_layers: - activation: relu layer_size: - 256 layer_type: linear critic_loss_coef: 0.5 critic_lr: 0.005 entropy_coef: 0.01 gamma: 0.9 independ_actor: true k_epochs: 4 lr: 0.0001 min_policy: 0 n_steps_per_learn: 1 sgd_batch_size: 32 share_optimizer: false env_cfg: id: Pendulum-v1 ignore_params: - wrapper - ignore_params render_mode: null wrapper: null