general_cfg: algo_name: DDPG collect_traj: false device: cuda env_name: gym load_checkpoint: false load_model_step: best load_path: Train_single_CartPole-v1_DQN_20230515-211721 max_episode: 200 max_step: 200 mode: train model_save_fre: 2000 mp_backend: ray n_learners: 1 n_workers: 4 online_eval: true online_eval_episode: 20 seed: 10 share_buffer: true algo_cfg: action_type: dpg actor_layers: - activation: relu layer_size: - 256 layer_type: linear - activation: relu layer_size: - 256 layer_type: linear actor_lr: 0.0001 batch_size: 128 buffer_size: 8000 buffer_type: REPLAY_QUE critic_layers: - activation: relu layer_size: - 256 layer_type: linear - activation: relu layer_size: - 256 layer_type: linear critic_lr: 0.001 gamma: 0.99 policy_loss_weight: 0.002 tau: 0.001 value_max: .inf value_min: -.inf env_cfg: id: Pendulum-v1 ignore_params: - wrapper - ignore_params render_mode: null wrapper: null