johnjim0816 commited on
Commit
7e0d2ec
β€’
1 Parent(s): ccb908b

update CartPole-v1 PPO

Browse files
Files changed (45) hide show
  1. CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/config.yaml +0 -35
  2. CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/logs/log.txt +0 -51
  3. CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/learning_curve.png +0 -0
  4. CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/res.csv +0 -11
  5. CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/config.yaml +0 -31
  6. CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/logs/log.txt +0 -52
  7. CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/actor.pth +0 -3
  8. CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/critic.pth +0 -3
  9. CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/learning_curve.png +0 -0
  10. CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/res.csv +0 -11
  11. CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/config.yaml +0 -32
  12. CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/logs/log.txt +0 -53
  13. CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/actor.pth +0 -3
  14. CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/critic.pth +0 -3
  15. CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/learning_curve.png +0 -0
  16. CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/res.csv +0 -11
  17. CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/config.yaml +65 -0
  18. CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/logs/log.txt +69 -0
  19. CartPole-v1/{Train_CartPole-v1_mp_PPO_20230401-223204/tb_logs/events.out.tfevents.1680359524.dell-Precision-5820-Tower.31414.0 β†’ Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/interact/events.out.tfevents.1684302533.JMac.local.61381.0} +2 -2
  20. CartPole-v1/{Test_CartPole-v1_PPO_20230401-223412/tb_logs/events.out.tfevents.1680359652.dell-Precision-5820-Tower.4337.0 β†’ Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/model/events.out.tfevents.1684302533.JMac.local.61381.1} +1 -1
  21. CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/config.yaml +0 -31
  22. CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/logs/log.txt +0 -252
  23. CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/actor.pth +0 -3
  24. CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/critic.pth +0 -3
  25. CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/learning_curve.png +0 -0
  26. CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/res.csv +0 -201
  27. CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/config.yaml +0 -32
  28. CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/logs/log.txt +0 -43
  29. CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/actor.pth +0 -3
  30. CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/critic.pth +0 -3
  31. CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/learning_curve.png +0 -0
  32. CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/res.csv +0 -302
  33. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/config.yaml +65 -0
  34. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/logs/log.txt +270 -0
  35. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/10 +0 -0
  36. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/20 +0 -0
  37. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/30 +0 -0
  38. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/40 +0 -0
  39. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/50 +0 -0
  40. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/60 +0 -0
  41. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/70 +0 -0
  42. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/80 +0 -0
  43. CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/best +0 -0
  44. CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/actor.pth β†’ Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/interact/events.out.tfevents.1684302280.JMac.local.60840.0} +2 -2
  45. CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/critic.pth β†’ Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/model/events.out.tfevents.1684302280.JMac.local.60840.1} +2 -2
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/config.yaml DELETED
@@ -1,35 +0,0 @@
1
- general_cfg:
2
- algo_name: PPO
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_PPO_20221217-204003
9
- max_steps: 200
10
- mode: test
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 200
18
- wrapper: null
19
- algo_cfg:
20
- actor_hidden_dim: 256
21
- actor_lr: 0.0003
22
- continuous: false
23
- critic_hidden_dim: 256
24
- critic_lr: 0.001
25
- entropy_coef: 0.01
26
- eps_clip: 0.2
27
- gamma: 0.99
28
- k_epochs: 4
29
- kl_alpha: 2
30
- kl_beta: 1.5
31
- kl_lambda: 0.5
32
- kl_target: 0.01
33
- ppo_type: kl
34
- sgd_batch_size: 64
35
- train_batch_size: 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/logs/log.txt DELETED
@@ -1,51 +0,0 @@
1
- 2022-12-17 20:42:14 - r - INFO: - Hyperparameters:
2
- 2022-12-17 20:42:14 - r - INFO: - ================================================================================
3
- 2022-12-17 20:42:14 - r - INFO: - Name Value Type
4
- 2022-12-17 20:42:14 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2022-12-17 20:42:14 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2022-12-17 20:42:14 - r - INFO: - wrapper None <class 'str'>
7
- 2022-12-17 20:42:14 - r - INFO: - render 0 <class 'bool'>
8
- 2022-12-17 20:42:14 - r - INFO: - algo_name PPO <class 'str'>
9
- 2022-12-17 20:42:14 - r - INFO: - mode test <class 'str'>
10
- 2022-12-17 20:42:14 - r - INFO: - seed 1 <class 'int'>
11
- 2022-12-17 20:42:14 - r - INFO: - device cuda <class 'str'>
12
- 2022-12-17 20:42:14 - r - INFO: - train_eps 200 <class 'int'>
13
- 2022-12-17 20:42:14 - r - INFO: - test_eps 10 <class 'int'>
14
- 2022-12-17 20:42:14 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2022-12-17 20:42:14 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2022-12-17 20:42:14 - r - INFO: - max_steps 200 <class 'int'>
17
- 2022-12-17 20:42:14 - r - INFO: - load_checkpoint 1 <class 'bool'>
18
- 2022-12-17 20:42:14 - r - INFO: - load_path Train_CartPole-v1_PPO_20221217-204003 <class 'str'>
19
- 2022-12-17 20:42:14 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2022-12-17 20:42:14 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2022-12-17 20:42:14 - r - INFO: - ppo_type kl <class 'str'>
22
- 2022-12-17 20:42:14 - r - INFO: - continuous 0 <class 'bool'>
23
- 2022-12-17 20:42:14 - r - INFO: - gamma 0.99 <class 'float'>
24
- 2022-12-17 20:42:14 - r - INFO: - k_epochs 4 <class 'int'>
25
- 2022-12-17 20:42:14 - r - INFO: - actor_lr 0.0003 <class 'float'>
26
- 2022-12-17 20:42:14 - r - INFO: - critic_lr 0.001 <class 'float'>
27
- 2022-12-17 20:42:14 - r - INFO: - eps_clip 0.2 <class 'float'>
28
- 2022-12-17 20:42:14 - r - INFO: - entropy_coef 0.01 <class 'float'>
29
- 2022-12-17 20:42:14 - r - INFO: - train_batch_size 100 <class 'int'>
30
- 2022-12-17 20:42:14 - r - INFO: - sgd_batch_size 64 <class 'int'>
31
- 2022-12-17 20:42:14 - r - INFO: - actor_hidden_dim 256 <class 'int'>
32
- 2022-12-17 20:42:14 - r - INFO: - critic_hidden_dim 256 <class 'int'>
33
- 2022-12-17 20:42:14 - r - INFO: - kl_alpha 2 <class 'int'>
34
- 2022-12-17 20:42:14 - r - INFO: - kl_beta 1.5 <class 'float'>
35
- 2022-12-17 20:42:14 - r - INFO: - kl_lambda 0.5 <class 'float'>
36
- 2022-12-17 20:42:14 - r - INFO: - kl_target 0.01 <class 'float'>
37
- 2022-12-17 20:42:14 - r - INFO: - ================================================================================
38
- 2022-12-17 20:42:15 - r - INFO: - n_states: 4, n_actions: 2
39
- 2022-12-17 20:42:16 - r - INFO: - Start testing!
40
- 2022-12-17 20:42:16 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cuda
41
- 2022-12-17 20:42:17 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
42
- 2022-12-17 20:42:17 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
43
- 2022-12-17 20:42:18 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
44
- 2022-12-17 20:42:18 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
45
- 2022-12-17 20:42:18 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
46
- 2022-12-17 20:42:19 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
47
- 2022-12-17 20:42:19 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
48
- 2022-12-17 20:42:19 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
49
- 2022-12-17 20:42:19 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
50
- 2022-12-17 20:42:20 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
51
- 2022-12-17 20:42:20 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/learning_curve.png DELETED
Binary file (25.5 kB)
 
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/config.yaml DELETED
@@ -1,31 +0,0 @@
1
- general_cfg:
2
- algo_name: PPO
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_PPO_20230220-212959
9
- max_steps: 200
10
- mode: test
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 200
18
- wrapper: null
19
- algo_cfg:
20
- actor_hidden_dim: 256
21
- actor_lr: 0.0003
22
- continuous: false
23
- critic_hidden_dim: 256
24
- critic_lr: 0.001
25
- entropy_coef: 0.01
26
- eps_clip: 0.2
27
- gamma: 0.99
28
- k_epochs: 4
29
- ppo_type: clip
30
- sgd_batch_size: 128
31
- train_batch_size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/logs/log.txt DELETED
@@ -1,52 +0,0 @@
1
- 2023-02-20 21:31:53 - r - INFO: - Hyperparameters:
2
- 2023-02-20 21:31:53 - r - INFO: - ================================================================================
3
- 2023-02-20 21:31:53 - r - INFO: - Name Value Type
4
- 2023-02-20 21:31:53 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-02-20 21:31:53 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-02-20 21:31:53 - r - INFO: - wrapper None <class 'str'>
7
- 2023-02-20 21:31:53 - r - INFO: - render 0 <class 'bool'>
8
- 2023-02-20 21:31:53 - r - INFO: - algo_name PPO <class 'str'>
9
- 2023-02-20 21:31:53 - r - INFO: - mode test <class 'str'>
10
- 2023-02-20 21:31:53 - r - INFO: - seed 1 <class 'int'>
11
- 2023-02-20 21:31:53 - r - INFO: - device cuda <class 'str'>
12
- 2023-02-20 21:31:53 - r - INFO: - train_eps 200 <class 'int'>
13
- 2023-02-20 21:31:53 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-02-20 21:31:53 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-02-20 21:31:53 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-02-20 21:31:53 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-02-20 21:31:53 - r - INFO: - load_checkpoint 1 <class 'bool'>
18
- 2023-02-20 21:31:53 - r - INFO: - load_path Train_CartPole-v1_PPO_20230220-212959 <class 'str'>
19
- 2023-02-20 21:31:53 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-02-20 21:31:53 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-02-20 21:31:53 - r - INFO: - ppo_type clip <class 'str'>
22
- 2023-02-20 21:31:53 - r - INFO: - continuous 0 <class 'bool'>
23
- 2023-02-20 21:31:53 - r - INFO: - gamma 0.99 <class 'float'>
24
- 2023-02-20 21:31:53 - r - INFO: - k_epochs 4 <class 'int'>
25
- 2023-02-20 21:31:53 - r - INFO: - actor_lr 0.0003 <class 'float'>
26
- 2023-02-20 21:31:53 - r - INFO: - critic_lr 0.001 <class 'float'>
27
- 2023-02-20 21:31:53 - r - INFO: - eps_clip 0.2 <class 'float'>
28
- 2023-02-20 21:31:53 - r - INFO: - entropy_coef 0.01 <class 'float'>
29
- 2023-02-20 21:31:53 - r - INFO: - train_batch_size 256 <class 'int'>
30
- 2023-02-20 21:31:53 - r - INFO: - sgd_batch_size 128 <class 'int'>
31
- 2023-02-20 21:31:53 - r - INFO: - actor_hidden_dim 256 <class 'int'>
32
- 2023-02-20 21:31:53 - r - INFO: - critic_hidden_dim 256 <class 'int'>
33
- 2023-02-20 21:31:53 - r - INFO: - task_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153 <class 'str'>
34
- 2023-02-20 21:31:53 - r - INFO: - model_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/models <class 'str'>
35
- 2023-02-20 21:31:53 - r - INFO: - res_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/results <class 'str'>
36
- 2023-02-20 21:31:53 - r - INFO: - log_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/logs <class 'str'>
37
- 2023-02-20 21:31:53 - r - INFO: - traj_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/traj <class 'str'>
38
- 2023-02-20 21:31:53 - r - INFO: - ================================================================================
39
- 2023-02-20 21:31:53 - r - INFO: - n_states: 4, n_actions: 2
40
- 2023-02-20 21:31:54 - r - INFO: - Start testing!
41
- 2023-02-20 21:31:54 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cuda
42
- 2023-02-20 21:31:55 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
43
- 2023-02-20 21:31:55 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
44
- 2023-02-20 21:31:56 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
45
- 2023-02-20 21:31:56 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
46
- 2023-02-20 21:31:56 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
47
- 2023-02-20 21:31:56 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
48
- 2023-02-20 21:31:56 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
49
- 2023-02-20 21:31:57 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
50
- 2023-02-20 21:31:57 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
51
- 2023-02-20 21:31:57 - r - INFO: - Episode: 10/10, Reward: 189.000, Step: 189
52
- 2023-02-20 21:31:57 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/actor.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d07e4388597f766e04099380da27cb55fd877e8d26cdd14eab48bc097525216
3
- size 272215
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/critic.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6743877f9534c272b5e6d8bae3cbc87b1fa32bb21af2935c28e503122e042c2d
3
- size 271191
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/learning_curve.png DELETED
Binary file (24.9 kB)
 
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,189.0,189
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/config.yaml DELETED
@@ -1,32 +0,0 @@
1
- general_cfg:
2
- algo_name: PPO
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_mp_PPO_20230401-223204
9
- max_steps: 200
10
- mode: test
11
- n_workers: 1
12
- new_step_api: true
13
- render: false
14
- save_fig: true
15
- seed: 1
16
- show_fig: false
17
- test_eps: 10
18
- train_eps: 300
19
- wrapper: null
20
- algo_cfg:
21
- actor_hidden_dim: 256
22
- actor_lr: 0.0003
23
- continuous: false
24
- critic_hidden_dim: 256
25
- critic_lr: 0.001
26
- entropy_coef: 0.01
27
- eps_clip: 0.2
28
- gamma: 0.99
29
- k_epochs: 4
30
- ppo_type: clip
31
- sgd_batch_size: 128
32
- train_batch_size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/logs/log.txt DELETED
@@ -1,53 +0,0 @@
1
- 2023-04-01 22:34:12 - r - INFO: - Hyperparameters:
2
- 2023-04-01 22:34:12 - r - INFO: - ================================================================================
3
- 2023-04-01 22:34:12 - r - INFO: - Name Value Type
4
- 2023-04-01 22:34:12 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-01 22:34:12 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-01 22:34:12 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-01 22:34:12 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-01 22:34:12 - r - INFO: - algo_name PPO <class 'str'>
9
- 2023-04-01 22:34:12 - r - INFO: - mode test <class 'str'>
10
- 2023-04-01 22:34:12 - r - INFO: - seed 1 <class 'int'>
11
- 2023-04-01 22:34:12 - r - INFO: - device cpu <class 'str'>
12
- 2023-04-01 22:34:12 - r - INFO: - train_eps 300 <class 'int'>
13
- 2023-04-01 22:34:12 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-04-01 22:34:12 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-04-01 22:34:12 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-04-01 22:34:12 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-04-01 22:34:12 - r - INFO: - load_checkpoint 1 <class 'bool'>
18
- 2023-04-01 22:34:12 - r - INFO: - load_path Train_CartPole-v1_mp_PPO_20230401-223204 <class 'str'>
19
- 2023-04-01 22:34:12 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-04-01 22:34:12 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-04-01 22:34:12 - r - INFO: - n_workers 1 <class 'int'>
22
- 2023-04-01 22:34:12 - r - INFO: - ppo_type clip <class 'str'>
23
- 2023-04-01 22:34:12 - r - INFO: - continuous 0 <class 'bool'>
24
- 2023-04-01 22:34:12 - r - INFO: - gamma 0.99 <class 'float'>
25
- 2023-04-01 22:34:12 - r - INFO: - k_epochs 4 <class 'int'>
26
- 2023-04-01 22:34:12 - r - INFO: - actor_lr 0.0003 <class 'float'>
27
- 2023-04-01 22:34:12 - r - INFO: - critic_lr 0.001 <class 'float'>
28
- 2023-04-01 22:34:12 - r - INFO: - eps_clip 0.2 <class 'float'>
29
- 2023-04-01 22:34:12 - r - INFO: - entropy_coef 0.01 <class 'float'>
30
- 2023-04-01 22:34:12 - r - INFO: - train_batch_size 256 <class 'int'>
31
- 2023-04-01 22:34:12 - r - INFO: - sgd_batch_size 128 <class 'int'>
32
- 2023-04-01 22:34:12 - r - INFO: - actor_hidden_dim 256 <class 'int'>
33
- 2023-04-01 22:34:12 - r - INFO: - critic_hidden_dim 256 <class 'int'>
34
- 2023-04-01 22:34:12 - r - INFO: - task_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412 <class 'str'>
35
- 2023-04-01 22:34:12 - r - INFO: - res_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/results <class 'str'>
36
- 2023-04-01 22:34:12 - r - INFO: - log_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/logs <class 'str'>
37
- 2023-04-01 22:34:12 - r - INFO: - traj_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/traj <class 'str'>
38
- 2023-04-01 22:34:12 - r - INFO: - tb_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/tb_logs <class 'str'>
39
- 2023-04-01 22:34:12 - r - INFO: - ================================================================================
40
- 2023-04-01 22:34:12 - r - INFO: - n_states: 4, n_actions: 2
41
- 2023-04-01 22:34:12 - r - INFO: - Start testing!
42
- 2023-04-01 22:34:12 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cpu
43
- 2023-04-01 22:34:12 - r - INFO: - Episode: 1/10, Reward: 136.000, Step: 136
44
- 2023-04-01 22:34:12 - r - INFO: - Episode: 2/10, Reward: 136.000, Step: 136
45
- 2023-04-01 22:34:12 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
46
- 2023-04-01 22:34:12 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
47
- 2023-04-01 22:34:12 - r - INFO: - Episode: 5/10, Reward: 187.000, Step: 187
48
- 2023-04-01 22:34:13 - r - INFO: - Episode: 6/10, Reward: 192.000, Step: 192
49
- 2023-04-01 22:34:13 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
50
- 2023-04-01 22:34:13 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
51
- 2023-04-01 22:34:13 - r - INFO: - Episode: 9/10, Reward: 159.000, Step: 159
52
- 2023-04-01 22:34:13 - r - INFO: - Episode: 10/10, Reward: 124.000, Step: 124
53
- 2023-04-01 22:34:13 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/actor.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6a3e48d551bcba327ff4c5d3cc464a6a94b83eda543a54d231016e021e8cbd3
3
- size 272151
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/critic.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4db7aeb3805e1deb11428a34a600a40068a0f711986f38fdf9e0f9895f8a45c
3
- size 271127
 
 
 
 
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/learning_curve.png DELETED
Binary file (38.8 kB)
 
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,136.0,136
3
- 1,136.0,136
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,187.0,187
7
- 5,192.0,192
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,159.0,159
11
- 9,124.0,124
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/config.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general_cfg:
2
+ algo_name: PPO
3
+ collect_traj: false
4
+ device: cpu
5
+ env_name: gym
6
+ load_checkpoint: true
7
+ load_model_step: best
8
+ load_path: Train_single_CartPole-v1_PPO_20230517-134440
9
+ max_episode: 10
10
+ max_step: 200
11
+ mode: test
12
+ model_save_fre: 10
13
+ mp_backend: single
14
+ n_workers: 2
15
+ online_eval: true
16
+ online_eval_episode: 10
17
+ seed: 1
18
+ algo_cfg:
19
+ actor_hidden_dim: 256
20
+ actor_layers:
21
+ - activation: relu
22
+ layer_dim:
23
+ - 256
24
+ layer_type: linear
25
+ - activation: relu
26
+ layer_dim:
27
+ - 256
28
+ layer_type: linear
29
+ actor_lr: 0.0003
30
+ batch_size: 256
31
+ buffer_type: ONPOLICY_QUE
32
+ continuous: false
33
+ critic_hidden_dim: 256
34
+ critic_layers:
35
+ - activation: relu
36
+ layer_dim:
37
+ - 256
38
+ layer_type: linear
39
+ - activation: relu
40
+ layer_dim:
41
+ - 256
42
+ layer_type: linear
43
+ critic_loss_coef: 0.5
44
+ critic_lr: 0.001
45
+ entropy_coef: 0.01
46
+ eps_clip: 0.2
47
+ gamma: 0.99
48
+ independ_actor: true
49
+ k_epochs: 4
50
+ kl_alpha: 2
51
+ kl_beta: 1.5
52
+ kl_lambda: 0.5
53
+ kl_target: 0.1
54
+ lr: 0.0001
55
+ min_policy: 0
56
+ ppo_type: clip
57
+ sgd_batch_size: 128
58
+ share_optimizer: false
59
+ env_cfg:
60
+ id: CartPole-v1
61
+ ignore_params:
62
+ - wrapper
63
+ - ignore_params
64
+ render_mode: null
65
+ wrapper: null
CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/logs/log.txt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - General Configs:
2
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
3
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - Name Value Type
4
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - algo_name PPO <class 'str'>
6
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - mode test <class 'str'>
7
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - max_episode 10 <class 'int'>
10
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - mp_backend single <class 'str'>
13
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - model_save_fre 10 <class 'int'>
17
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
18
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_PPO_20230517-134440 <class 'str'>
19
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - load_model_step best <class 'str'>
20
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
21
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - Algo Configs:
22
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
23
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - Name Value Type
24
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - independ_actor 1 <class 'bool'>
25
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - share_optimizer 0 <class 'bool'>
26
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ppo_type clip <class 'str'>
27
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - eps_clip 0.2 <class 'float'>
28
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - kl_target 0.1 <class 'float'>
29
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - kl_lambda 0.5 <class 'float'>
30
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - kl_beta 1.5 <class 'float'>
31
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - kl_alpha 2 <class 'int'>
32
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - continuous 0 <class 'bool'>
33
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
34
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - k_epochs 4 <class 'int'>
35
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
36
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - actor_lr 0.0003 <class 'float'>
37
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - critic_lr 0.001 <class 'float'>
38
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - critic_loss_coef 0.5 <class 'float'>
39
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - entropy_coef 0.01 <class 'float'>
40
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - batch_size 256 <class 'int'>
41
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - sgd_batch_size 128 <class 'int'>
42
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - actor_hidden_dim 256 <class 'int'>
43
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - critic_hidden_dim 256 <class 'int'>
44
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - min_policy 0 <class 'int'>
45
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - actor_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
46
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - critic_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
47
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - buffer_type ONPOLICY_QUE <class 'str'>
48
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
49
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - Env Configs:
50
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
51
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - Name Value Type
52
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
53
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - render_mode None <class 'str'>
54
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - wrapper None <class 'str'>
55
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
56
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
57
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
58
+ 2023-05-17 13:48:53 - SimpleLog - INFO: - Start testing!
59
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
60
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
61
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
62
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
63
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
64
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
65
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
66
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
67
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
68
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
69
+ 2023-05-17 13:48:54 - SimpleLog - INFO: - Finish testing! total time consumed: 0.50s
CartPole-v1/{Train_CartPole-v1_mp_PPO_20230401-223204/tb_logs/events.out.tfevents.1680359524.dell-Precision-5820-Tower.31414.0 β†’ Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/interact/events.out.tfevents.1684302533.JMac.local.61381.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cfeaaafbbeba1e7f3b3b9e7741cd5d212f6edfa464aa67711ba39abad513a3b
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:192f3f9444f62d66ccd1d47939a013e4b5a4c4a0aafcbd0862831a91c15db487
3
+ size 1056
CartPole-v1/{Test_CartPole-v1_PPO_20230401-223412/tb_logs/events.out.tfevents.1680359652.dell-Precision-5820-Tower.4337.0 β†’ Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/model/events.out.tfevents.1684302533.JMac.local.61381.1} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ddf29ee69cf62909c79397c263b36e21520613cb43c5bfe70a96b2ac2ff0871
3
  size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ba2d347be4135f91135b543380189593fd17038b8a32b037a3d7ab5938a2f7a
3
  size 40
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/config.yaml DELETED
@@ -1,31 +0,0 @@
1
- general_cfg:
2
- algo_name: PPO
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_DQN_20221026-054757
9
- max_steps: 200
10
- mode: train
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 200
18
- wrapper: null
19
- algo_cfg:
20
- actor_hidden_dim: 256
21
- actor_lr: 0.0003
22
- continuous: false
23
- critic_hidden_dim: 256
24
- critic_lr: 0.001
25
- entropy_coef: 0.01
26
- eps_clip: 0.2
27
- gamma: 0.99
28
- k_epochs: 4
29
- ppo_type: clip
30
- sgd_batch_size: 128
31
- train_batch_size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/logs/log.txt DELETED
@@ -1,252 +0,0 @@
1
- 2023-02-20 21:29:59 - r - INFO: - Hyperparameters:
2
- 2023-02-20 21:29:59 - r - INFO: - ================================================================================
3
- 2023-02-20 21:29:59 - r - INFO: - Name Value Type
4
- 2023-02-20 21:29:59 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-02-20 21:29:59 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-02-20 21:29:59 - r - INFO: - wrapper None <class 'str'>
7
- 2023-02-20 21:29:59 - r - INFO: - render 0 <class 'bool'>
8
- 2023-02-20 21:29:59 - r - INFO: - algo_name PPO <class 'str'>
9
- 2023-02-20 21:29:59 - r - INFO: - mode train <class 'str'>
10
- 2023-02-20 21:29:59 - r - INFO: - seed 1 <class 'int'>
11
- 2023-02-20 21:29:59 - r - INFO: - device cuda <class 'str'>
12
- 2023-02-20 21:29:59 - r - INFO: - train_eps 200 <class 'int'>
13
- 2023-02-20 21:29:59 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-02-20 21:29:59 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-02-20 21:29:59 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-02-20 21:29:59 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-02-20 21:29:59 - r - INFO: - load_checkpoint 0 <class 'bool'>
18
- 2023-02-20 21:29:59 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
19
- 2023-02-20 21:29:59 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-02-20 21:29:59 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-02-20 21:29:59 - r - INFO: - ppo_type clip <class 'str'>
22
- 2023-02-20 21:29:59 - r - INFO: - continuous 0 <class 'bool'>
23
- 2023-02-20 21:29:59 - r - INFO: - gamma 0.99 <class 'float'>
24
- 2023-02-20 21:29:59 - r - INFO: - k_epochs 4 <class 'int'>
25
- 2023-02-20 21:29:59 - r - INFO: - actor_lr 0.0003 <class 'float'>
26
- 2023-02-20 21:29:59 - r - INFO: - critic_lr 0.001 <class 'float'>
27
- 2023-02-20 21:29:59 - r - INFO: - eps_clip 0.2 <class 'float'>
28
- 2023-02-20 21:29:59 - r - INFO: - entropy_coef 0.01 <class 'float'>
29
- 2023-02-20 21:29:59 - r - INFO: - train_batch_size 256 <class 'int'>
30
- 2023-02-20 21:29:59 - r - INFO: - sgd_batch_size 128 <class 'int'>
31
- 2023-02-20 21:29:59 - r - INFO: - actor_hidden_dim 256 <class 'int'>
32
- 2023-02-20 21:29:59 - r - INFO: - critic_hidden_dim 256 <class 'int'>
33
- 2023-02-20 21:29:59 - r - INFO: - task_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959 <class 'str'>
34
- 2023-02-20 21:29:59 - r - INFO: - model_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/models <class 'str'>
35
- 2023-02-20 21:29:59 - r - INFO: - res_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/results <class 'str'>
36
- 2023-02-20 21:29:59 - r - INFO: - log_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/logs <class 'str'>
37
- 2023-02-20 21:29:59 - r - INFO: - traj_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/traj <class 'str'>
38
- 2023-02-20 21:29:59 - r - INFO: - ================================================================================
39
- 2023-02-20 21:29:59 - r - INFO: - n_states: 4, n_actions: 2
40
- 2023-02-20 21:29:59 - r - INFO: - Start training!
41
- 2023-02-20 21:29:59 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cuda
42
- 2023-02-20 21:30:01 - r - INFO: - Episode: 1/200, Reward: 25.000, Step: 25
43
- 2023-02-20 21:30:01 - r - INFO: - Episode: 2/200, Reward: 15.000, Step: 15
44
- 2023-02-20 21:30:01 - r - INFO: - Episode: 3/200, Reward: 13.000, Step: 13
45
- 2023-02-20 21:30:01 - r - INFO: - Episode: 4/200, Reward: 14.000, Step: 14
46
- 2023-02-20 21:30:01 - r - INFO: - Episode: 5/200, Reward: 25.000, Step: 25
47
- 2023-02-20 21:30:01 - r - INFO: - Current episode 5 has the best eval reward: 23.900
48
- 2023-02-20 21:30:01 - r - INFO: - Episode: 6/200, Reward: 41.000, Step: 41
49
- 2023-02-20 21:30:01 - r - INFO: - Episode: 7/200, Reward: 33.000, Step: 33
50
- 2023-02-20 21:30:01 - r - INFO: - Episode: 8/200, Reward: 12.000, Step: 12
51
- 2023-02-20 21:30:01 - r - INFO: - Episode: 9/200, Reward: 20.000, Step: 20
52
- 2023-02-20 21:30:01 - r - INFO: - Episode: 10/200, Reward: 33.000, Step: 33
53
- 2023-02-20 21:30:02 - r - INFO: - Current episode 10 has the best eval reward: 27.400
54
- 2023-02-20 21:30:02 - r - INFO: - Episode: 11/200, Reward: 19.000, Step: 19
55
- 2023-02-20 21:30:02 - r - INFO: - Episode: 12/200, Reward: 35.000, Step: 35
56
- 2023-02-20 21:30:02 - r - INFO: - Episode: 13/200, Reward: 35.000, Step: 35
57
- 2023-02-20 21:30:02 - r - INFO: - Episode: 14/200, Reward: 9.000, Step: 9
58
- 2023-02-20 21:30:02 - r - INFO: - Episode: 15/200, Reward: 32.000, Step: 32
59
- 2023-02-20 21:30:02 - r - INFO: - Current episode 15 has the best eval reward: 37.600
60
- 2023-02-20 21:30:02 - r - INFO: - Episode: 16/200, Reward: 29.000, Step: 29
61
- 2023-02-20 21:30:02 - r - INFO: - Episode: 17/200, Reward: 17.000, Step: 17
62
- 2023-02-20 21:30:02 - r - INFO: - Episode: 18/200, Reward: 17.000, Step: 17
63
- 2023-02-20 21:30:02 - r - INFO: - Episode: 19/200, Reward: 17.000, Step: 17
64
- 2023-02-20 21:30:02 - r - INFO: - Episode: 20/200, Reward: 20.000, Step: 20
65
- 2023-02-20 21:30:03 - r - INFO: - Episode: 21/200, Reward: 24.000, Step: 24
66
- 2023-02-20 21:30:03 - r - INFO: - Episode: 22/200, Reward: 44.000, Step: 44
67
- 2023-02-20 21:30:03 - r - INFO: - Episode: 23/200, Reward: 39.000, Step: 39
68
- 2023-02-20 21:30:03 - r - INFO: - Episode: 24/200, Reward: 48.000, Step: 48
69
- 2023-02-20 21:30:03 - r - INFO: - Episode: 25/200, Reward: 52.000, Step: 52
70
- 2023-02-20 21:30:03 - r - INFO: - Episode: 26/200, Reward: 32.000, Step: 32
71
- 2023-02-20 21:30:03 - r - INFO: - Episode: 27/200, Reward: 45.000, Step: 45
72
- 2023-02-20 21:30:04 - r - INFO: - Episode: 28/200, Reward: 68.000, Step: 68
73
- 2023-02-20 21:30:04 - r - INFO: - Episode: 29/200, Reward: 45.000, Step: 45
74
- 2023-02-20 21:30:04 - r - INFO: - Episode: 30/200, Reward: 16.000, Step: 16
75
- 2023-02-20 21:30:04 - r - INFO: - Current episode 30 has the best eval reward: 45.300
76
- 2023-02-20 21:30:04 - r - INFO: - Episode: 31/200, Reward: 38.000, Step: 38
77
- 2023-02-20 21:30:04 - r - INFO: - Episode: 32/200, Reward: 17.000, Step: 17
78
- 2023-02-20 21:30:04 - r - INFO: - Episode: 33/200, Reward: 35.000, Step: 35
79
- 2023-02-20 21:30:04 - r - INFO: - Episode: 34/200, Reward: 12.000, Step: 12
80
- 2023-02-20 21:30:04 - r - INFO: - Episode: 35/200, Reward: 56.000, Step: 56
81
- 2023-02-20 21:30:05 - r - INFO: - Episode: 36/200, Reward: 36.000, Step: 36
82
- 2023-02-20 21:30:05 - r - INFO: - Episode: 37/200, Reward: 15.000, Step: 15
83
- 2023-02-20 21:30:05 - r - INFO: - Episode: 38/200, Reward: 25.000, Step: 25
84
- 2023-02-20 21:30:05 - r - INFO: - Episode: 39/200, Reward: 28.000, Step: 28
85
- 2023-02-20 21:30:05 - r - INFO: - Episode: 40/200, Reward: 56.000, Step: 56
86
- 2023-02-20 21:30:05 - r - INFO: - Episode: 41/200, Reward: 18.000, Step: 18
87
- 2023-02-20 21:30:05 - r - INFO: - Episode: 42/200, Reward: 33.000, Step: 33
88
- 2023-02-20 21:30:05 - r - INFO: - Episode: 43/200, Reward: 30.000, Step: 30
89
- 2023-02-20 21:30:05 - r - INFO: - Episode: 44/200, Reward: 30.000, Step: 30
90
- 2023-02-20 21:30:06 - r - INFO: - Episode: 45/200, Reward: 28.000, Step: 28
91
- 2023-02-20 21:30:06 - r - INFO: - Episode: 46/200, Reward: 38.000, Step: 38
92
- 2023-02-20 21:30:06 - r - INFO: - Episode: 47/200, Reward: 70.000, Step: 70
93
- 2023-02-20 21:30:06 - r - INFO: - Episode: 48/200, Reward: 18.000, Step: 18
94
- 2023-02-20 21:30:06 - r - INFO: - Episode: 49/200, Reward: 16.000, Step: 16
95
- 2023-02-20 21:30:06 - r - INFO: - Episode: 50/200, Reward: 36.000, Step: 36
96
- 2023-02-20 21:30:07 - r - INFO: - Current episode 50 has the best eval reward: 48.700
97
- 2023-02-20 21:30:07 - r - INFO: - Episode: 51/200, Reward: 26.000, Step: 26
98
- 2023-02-20 21:30:07 - r - INFO: - Episode: 52/200, Reward: 34.000, Step: 34
99
- 2023-02-20 21:30:07 - r - INFO: - Episode: 53/200, Reward: 70.000, Step: 70
100
- 2023-02-20 21:30:07 - r - INFO: - Episode: 54/200, Reward: 39.000, Step: 39
101
- 2023-02-20 21:30:07 - r - INFO: - Episode: 55/200, Reward: 87.000, Step: 87
102
- 2023-02-20 21:30:08 - r - INFO: - Episode: 56/200, Reward: 75.000, Step: 75
103
- 2023-02-20 21:30:08 - r - INFO: - Episode: 57/200, Reward: 21.000, Step: 21
104
- 2023-02-20 21:30:08 - r - INFO: - Episode: 58/200, Reward: 72.000, Step: 72
105
- 2023-02-20 21:30:08 - r - INFO: - Episode: 59/200, Reward: 43.000, Step: 43
106
- 2023-02-20 21:30:08 - r - INFO: - Episode: 60/200, Reward: 48.000, Step: 48
107
- 2023-02-20 21:30:09 - r - INFO: - Episode: 61/200, Reward: 64.000, Step: 64
108
- 2023-02-20 21:30:09 - r - INFO: - Episode: 62/200, Reward: 135.000, Step: 135
109
- 2023-02-20 21:30:09 - r - INFO: - Episode: 63/200, Reward: 108.000, Step: 108
110
- 2023-02-20 21:30:09 - r - INFO: - Episode: 64/200, Reward: 38.000, Step: 38
111
- 2023-02-20 21:30:09 - r - INFO: - Episode: 65/200, Reward: 22.000, Step: 22
112
- 2023-02-20 21:30:10 - r - INFO: - Current episode 65 has the best eval reward: 68.000
113
- 2023-02-20 21:30:10 - r - INFO: - Episode: 66/200, Reward: 60.000, Step: 60
114
- 2023-02-20 21:30:10 - r - INFO: - Episode: 67/200, Reward: 74.000, Step: 74
115
- 2023-02-20 21:30:10 - r - INFO: - Episode: 68/200, Reward: 93.000, Step: 93
116
- 2023-02-20 21:30:10 - r - INFO: - Episode: 69/200, Reward: 55.000, Step: 55
117
- 2023-02-20 21:30:10 - r - INFO: - Episode: 70/200, Reward: 48.000, Step: 48
118
- 2023-02-20 21:30:11 - r - INFO: - Episode: 71/200, Reward: 29.000, Step: 29
119
- 2023-02-20 21:30:11 - r - INFO: - Episode: 72/200, Reward: 59.000, Step: 59
120
- 2023-02-20 21:30:11 - r - INFO: - Episode: 73/200, Reward: 35.000, Step: 35
121
- 2023-02-20 21:30:11 - r - INFO: - Episode: 74/200, Reward: 40.000, Step: 40
122
- 2023-02-20 21:30:11 - r - INFO: - Episode: 75/200, Reward: 113.000, Step: 113
123
- 2023-02-20 21:30:12 - r - INFO: - Episode: 76/200, Reward: 114.000, Step: 114
124
- 2023-02-20 21:30:12 - r - INFO: - Episode: 77/200, Reward: 52.000, Step: 52
125
- 2023-02-20 21:30:13 - r - INFO: - Episode: 78/200, Reward: 139.000, Step: 139
126
- 2023-02-20 21:30:13 - r - INFO: - Episode: 79/200, Reward: 138.000, Step: 138
127
- 2023-02-20 21:30:13 - r - INFO: - Episode: 80/200, Reward: 54.000, Step: 54
128
- 2023-02-20 21:30:14 - r - INFO: - Current episode 80 has the best eval reward: 110.900
129
- 2023-02-20 21:30:14 - r - INFO: - Episode: 81/200, Reward: 156.000, Step: 156
130
- 2023-02-20 21:30:15 - r - INFO: - Episode: 82/200, Reward: 140.000, Step: 140
131
- 2023-02-20 21:30:15 - r - INFO: - Episode: 83/200, Reward: 144.000, Step: 144
132
- 2023-02-20 21:30:15 - r - INFO: - Episode: 84/200, Reward: 118.000, Step: 118
133
- 2023-02-20 21:30:15 - r - INFO: - Episode: 85/200, Reward: 156.000, Step: 156
134
- 2023-02-20 21:30:16 - r - INFO: - Episode: 86/200, Reward: 135.000, Step: 135
135
- 2023-02-20 21:30:17 - r - INFO: - Episode: 87/200, Reward: 144.000, Step: 144
136
- 2023-02-20 21:30:17 - r - INFO: - Episode: 88/200, Reward: 160.000, Step: 160
137
- 2023-02-20 21:30:17 - r - INFO: - Episode: 89/200, Reward: 30.000, Step: 30
138
- 2023-02-20 21:30:17 - r - INFO: - Episode: 90/200, Reward: 194.000, Step: 194
139
- 2023-02-20 21:30:19 - r - INFO: - Current episode 90 has the best eval reward: 169.300
140
- 2023-02-20 21:30:20 - r - INFO: - Episode: 91/200, Reward: 200.000, Step: 200
141
- 2023-02-20 21:30:20 - r - INFO: - Episode: 92/200, Reward: 200.000, Step: 200
142
- 2023-02-20 21:30:20 - r - INFO: - Episode: 93/200, Reward: 160.000, Step: 160
143
- 2023-02-20 21:30:21 - r - INFO: - Episode: 94/200, Reward: 200.000, Step: 200
144
- 2023-02-20 21:30:21 - r - INFO: - Episode: 95/200, Reward: 59.000, Step: 59
145
- 2023-02-20 21:30:23 - r - INFO: - Episode: 96/200, Reward: 200.000, Step: 200
146
- 2023-02-20 21:30:23 - r - INFO: - Episode: 97/200, Reward: 182.000, Step: 182
147
- 2023-02-20 21:30:23 - r - INFO: - Episode: 98/200, Reward: 125.000, Step: 125
148
- 2023-02-20 21:30:23 - r - INFO: - Episode: 99/200, Reward: 140.000, Step: 140
149
- 2023-02-20 21:30:24 - r - INFO: - Episode: 100/200, Reward: 146.000, Step: 146
150
- 2023-02-20 21:30:25 - r - INFO: - Episode: 101/200, Reward: 130.000, Step: 130
151
- 2023-02-20 21:30:26 - r - INFO: - Episode: 102/200, Reward: 74.000, Step: 74
152
- 2023-02-20 21:30:26 - r - INFO: - Episode: 103/200, Reward: 167.000, Step: 167
153
- 2023-02-20 21:30:26 - r - INFO: - Episode: 104/200, Reward: 171.000, Step: 171
154
- 2023-02-20 21:30:26 - r - INFO: - Episode: 105/200, Reward: 150.000, Step: 150
155
- 2023-02-20 21:30:28 - r - INFO: - Episode: 106/200, Reward: 105.000, Step: 105
156
- 2023-02-20 21:30:28 - r - INFO: - Episode: 107/200, Reward: 65.000, Step: 65
157
- 2023-02-20 21:30:28 - r - INFO: - Episode: 108/200, Reward: 170.000, Step: 170
158
- 2023-02-20 21:30:29 - r - INFO: - Episode: 109/200, Reward: 172.000, Step: 172
159
- 2023-02-20 21:30:29 - r - INFO: - Episode: 110/200, Reward: 164.000, Step: 164
160
- 2023-02-20 21:30:30 - r - INFO: - Current episode 110 has the best eval reward: 180.000
161
- 2023-02-20 21:30:31 - r - INFO: - Episode: 111/200, Reward: 148.000, Step: 148
162
- 2023-02-20 21:30:31 - r - INFO: - Episode: 112/200, Reward: 116.000, Step: 116
163
- 2023-02-20 21:30:31 - r - INFO: - Episode: 113/200, Reward: 59.000, Step: 59
164
- 2023-02-20 21:30:31 - r - INFO: - Episode: 114/200, Reward: 200.000, Step: 200
165
- 2023-02-20 21:30:31 - r - INFO: - Episode: 115/200, Reward: 36.000, Step: 36
166
- 2023-02-20 21:30:33 - r - INFO: - Episode: 116/200, Reward: 200.000, Step: 200
167
- 2023-02-20 21:30:34 - r - INFO: - Episode: 117/200, Reward: 200.000, Step: 200
168
- 2023-02-20 21:30:34 - r - INFO: - Episode: 118/200, Reward: 158.000, Step: 158
169
- 2023-02-20 21:30:34 - r - INFO: - Episode: 119/200, Reward: 200.000, Step: 200
170
- 2023-02-20 21:30:34 - r - INFO: - Episode: 120/200, Reward: 200.000, Step: 200
171
- 2023-02-20 21:30:37 - r - INFO: - Current episode 120 has the best eval reward: 200.000
172
- 2023-02-20 21:30:37 - r - INFO: - Episode: 121/200, Reward: 200.000, Step: 200
173
- 2023-02-20 21:30:38 - r - INFO: - Episode: 122/200, Reward: 172.000, Step: 172
174
- 2023-02-20 21:30:38 - r - INFO: - Episode: 123/200, Reward: 137.000, Step: 137
175
- 2023-02-20 21:30:38 - r - INFO: - Episode: 124/200, Reward: 189.000, Step: 189
176
- 2023-02-20 21:30:38 - r - INFO: - Episode: 125/200, Reward: 200.000, Step: 200
177
- 2023-02-20 21:30:40 - r - INFO: - Episode: 126/200, Reward: 200.000, Step: 200
178
- 2023-02-20 21:30:41 - r - INFO: - Episode: 127/200, Reward: 197.000, Step: 197
179
- 2023-02-20 21:30:41 - r - INFO: - Episode: 128/200, Reward: 125.000, Step: 125
180
- 2023-02-20 21:30:41 - r - INFO: - Episode: 129/200, Reward: 194.000, Step: 194
181
- 2023-02-20 21:30:41 - r - INFO: - Episode: 130/200, Reward: 167.000, Step: 167
182
- 2023-02-20 21:30:43 - r - INFO: - Episode: 131/200, Reward: 135.000, Step: 135
183
- 2023-02-20 21:30:43 - r - INFO: - Episode: 132/200, Reward: 200.000, Step: 200
184
- 2023-02-20 21:30:44 - r - INFO: - Episode: 133/200, Reward: 200.000, Step: 200
185
- 2023-02-20 21:30:44 - r - INFO: - Episode: 134/200, Reward: 170.000, Step: 170
186
- 2023-02-20 21:30:44 - r - INFO: - Episode: 135/200, Reward: 195.000, Step: 195
187
- 2023-02-20 21:30:47 - r - INFO: - Episode: 136/200, Reward: 150.000, Step: 150
188
- 2023-02-20 21:30:47 - r - INFO: - Episode: 137/200, Reward: 187.000, Step: 187
189
- 2023-02-20 21:30:47 - r - INFO: - Episode: 138/200, Reward: 172.000, Step: 172
190
- 2023-02-20 21:30:47 - r - INFO: - Episode: 139/200, Reward: 124.000, Step: 124
191
- 2023-02-20 21:30:47 - r - INFO: - Episode: 140/200, Reward: 105.000, Step: 105
192
- 2023-02-20 21:30:49 - r - INFO: - Episode: 141/200, Reward: 49.000, Step: 49
193
- 2023-02-20 21:30:49 - r - INFO: - Episode: 142/200, Reward: 108.000, Step: 108
194
- 2023-02-20 21:30:49 - r - INFO: - Episode: 143/200, Reward: 117.000, Step: 117
195
- 2023-02-20 21:30:50 - r - INFO: - Episode: 144/200, Reward: 136.000, Step: 136
196
- 2023-02-20 21:30:50 - r - INFO: - Episode: 145/200, Reward: 120.000, Step: 120
197
- 2023-02-20 21:30:52 - r - INFO: - Episode: 146/200, Reward: 172.000, Step: 172
198
- 2023-02-20 21:30:52 - r - INFO: - Episode: 147/200, Reward: 134.000, Step: 134
199
- 2023-02-20 21:30:52 - r - INFO: - Episode: 148/200, Reward: 200.000, Step: 200
200
- 2023-02-20 21:30:53 - r - INFO: - Episode: 149/200, Reward: 200.000, Step: 200
201
- 2023-02-20 21:30:53 - r - INFO: - Episode: 150/200, Reward: 150.000, Step: 150
202
- 2023-02-20 21:30:55 - r - INFO: - Episode: 151/200, Reward: 190.000, Step: 190
203
- 2023-02-20 21:30:55 - r - INFO: - Episode: 152/200, Reward: 200.000, Step: 200
204
- 2023-02-20 21:30:56 - r - INFO: - Episode: 153/200, Reward: 200.000, Step: 200
205
- 2023-02-20 21:30:56 - r - INFO: - Episode: 154/200, Reward: 200.000, Step: 200
206
- 2023-02-20 21:30:56 - r - INFO: - Episode: 155/200, Reward: 179.000, Step: 179
207
- 2023-02-20 21:30:59 - r - INFO: - Episode: 156/200, Reward: 200.000, Step: 200
208
- 2023-02-20 21:30:59 - r - INFO: - Episode: 157/200, Reward: 200.000, Step: 200
209
- 2023-02-20 21:30:59 - r - INFO: - Episode: 158/200, Reward: 200.000, Step: 200
210
- 2023-02-20 21:31:00 - r - INFO: - Episode: 159/200, Reward: 200.000, Step: 200
211
- 2023-02-20 21:31:00 - r - INFO: - Episode: 160/200, Reward: 195.000, Step: 195
212
- 2023-02-20 21:31:02 - r - INFO: - Episode: 161/200, Reward: 195.000, Step: 195
213
- 2023-02-20 21:31:02 - r - INFO: - Episode: 162/200, Reward: 142.000, Step: 142
214
- 2023-02-20 21:31:03 - r - INFO: - Episode: 163/200, Reward: 200.000, Step: 200
215
- 2023-02-20 21:31:03 - r - INFO: - Episode: 164/200, Reward: 108.000, Step: 108
216
- 2023-02-20 21:31:03 - r - INFO: - Episode: 165/200, Reward: 200.000, Step: 200
217
- 2023-02-20 21:31:05 - r - INFO: - Episode: 166/200, Reward: 165.000, Step: 165
218
- 2023-02-20 21:31:05 - r - INFO: - Episode: 167/200, Reward: 153.000, Step: 153
219
- 2023-02-20 21:31:05 - r - INFO: - Episode: 168/200, Reward: 85.000, Step: 85
220
- 2023-02-20 21:31:05 - r - INFO: - Episode: 169/200, Reward: 139.000, Step: 139
221
- 2023-02-20 21:31:06 - r - INFO: - Episode: 170/200, Reward: 155.000, Step: 155
222
- 2023-02-20 21:31:08 - r - INFO: - Episode: 171/200, Reward: 166.000, Step: 166
223
- 2023-02-20 21:31:08 - r - INFO: - Episode: 172/200, Reward: 182.000, Step: 182
224
- 2023-02-20 21:31:08 - r - INFO: - Episode: 173/200, Reward: 190.000, Step: 190
225
- 2023-02-20 21:31:08 - r - INFO: - Episode: 174/200, Reward: 35.000, Step: 35
226
- 2023-02-20 21:31:09 - r - INFO: - Episode: 175/200, Reward: 124.000, Step: 124
227
- 2023-02-20 21:31:11 - r - INFO: - Episode: 176/200, Reward: 114.000, Step: 114
228
- 2023-02-20 21:31:11 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200
229
- 2023-02-20 21:31:11 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200
230
- 2023-02-20 21:31:12 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200
231
- 2023-02-20 21:31:12 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200
232
- 2023-02-20 21:31:14 - r - INFO: - Episode: 181/200, Reward: 110.000, Step: 110
233
- 2023-02-20 21:31:14 - r - INFO: - Episode: 182/200, Reward: 128.000, Step: 128
234
- 2023-02-20 21:31:14 - r - INFO: - Episode: 183/200, Reward: 107.000, Step: 107
235
- 2023-02-20 21:31:15 - r - INFO: - Episode: 184/200, Reward: 192.000, Step: 192
236
- 2023-02-20 21:31:15 - r - INFO: - Episode: 185/200, Reward: 106.000, Step: 106
237
- 2023-02-20 21:31:16 - r - INFO: - Episode: 186/200, Reward: 32.000, Step: 32
238
- 2023-02-20 21:31:16 - r - INFO: - Episode: 187/200, Reward: 107.000, Step: 107
239
- 2023-02-20 21:31:17 - r - INFO: - Episode: 188/200, Reward: 129.000, Step: 129
240
- 2023-02-20 21:31:17 - r - INFO: - Episode: 189/200, Reward: 122.000, Step: 122
241
- 2023-02-20 21:31:17 - r - INFO: - Episode: 190/200, Reward: 126.000, Step: 126
242
- 2023-02-20 21:31:18 - r - INFO: - Episode: 191/200, Reward: 120.000, Step: 120
243
- 2023-02-20 21:31:19 - r - INFO: - Episode: 192/200, Reward: 127.000, Step: 127
244
- 2023-02-20 21:31:19 - r - INFO: - Episode: 193/200, Reward: 132.000, Step: 132
245
- 2023-02-20 21:31:19 - r - INFO: - Episode: 194/200, Reward: 128.000, Step: 128
246
- 2023-02-20 21:31:19 - r - INFO: - Episode: 195/200, Reward: 142.000, Step: 142
247
- 2023-02-20 21:31:21 - r - INFO: - Episode: 196/200, Reward: 137.000, Step: 137
248
- 2023-02-20 21:31:21 - r - INFO: - Episode: 197/200, Reward: 125.000, Step: 125
249
- 2023-02-20 21:31:21 - r - INFO: - Episode: 198/200, Reward: 118.000, Step: 118
250
- 2023-02-20 21:31:22 - r - INFO: - Episode: 199/200, Reward: 158.000, Step: 158
251
- 2023-02-20 21:31:22 - r - INFO: - Episode: 200/200, Reward: 144.000, Step: 144
252
- 2023-02-20 21:31:23 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/actor.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d07e4388597f766e04099380da27cb55fd877e8d26cdd14eab48bc097525216
3
- size 272215
 
 
 
 
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/critic.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6743877f9534c272b5e6d8bae3cbc87b1fa32bb21af2935c28e503122e042c2d
3
- size 271191
 
 
 
 
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/learning_curve.png DELETED
Binary file (77.4 kB)
 
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/res.csv DELETED
@@ -1,201 +0,0 @@
1
- episodes,rewards,steps
2
- 0,25.0,25
3
- 1,15.0,15
4
- 2,13.0,13
5
- 3,14.0,14
6
- 4,25.0,25
7
- 5,41.0,41
8
- 6,33.0,33
9
- 7,12.0,12
10
- 8,20.0,20
11
- 9,33.0,33
12
- 10,19.0,19
13
- 11,35.0,35
14
- 12,35.0,35
15
- 13,9.0,9
16
- 14,32.0,32
17
- 15,29.0,29
18
- 16,17.0,17
19
- 17,17.0,17
20
- 18,17.0,17
21
- 19,20.0,20
22
- 20,24.0,24
23
- 21,44.0,44
24
- 22,39.0,39
25
- 23,48.0,48
26
- 24,52.0,52
27
- 25,32.0,32
28
- 26,45.0,45
29
- 27,68.0,68
30
- 28,45.0,45
31
- 29,16.0,16
32
- 30,38.0,38
33
- 31,17.0,17
34
- 32,35.0,35
35
- 33,12.0,12
36
- 34,56.0,56
37
- 35,36.0,36
38
- 36,15.0,15
39
- 37,25.0,25
40
- 38,28.0,28
41
- 39,56.0,56
42
- 40,18.0,18
43
- 41,33.0,33
44
- 42,30.0,30
45
- 43,30.0,30
46
- 44,28.0,28
47
- 45,38.0,38
48
- 46,70.0,70
49
- 47,18.0,18
50
- 48,16.0,16
51
- 49,36.0,36
52
- 50,26.0,26
53
- 51,34.0,34
54
- 52,70.0,70
55
- 53,39.0,39
56
- 54,87.0,87
57
- 55,75.0,75
58
- 56,21.0,21
59
- 57,72.0,72
60
- 58,43.0,43
61
- 59,48.0,48
62
- 60,64.0,64
63
- 61,135.0,135
64
- 62,108.0,108
65
- 63,38.0,38
66
- 64,22.0,22
67
- 65,60.0,60
68
- 66,74.0,74
69
- 67,93.0,93
70
- 68,55.0,55
71
- 69,48.0,48
72
- 70,29.0,29
73
- 71,59.0,59
74
- 72,35.0,35
75
- 73,40.0,40
76
- 74,113.0,113
77
- 75,114.0,114
78
- 76,52.0,52
79
- 77,139.0,139
80
- 78,138.0,138
81
- 79,54.0,54
82
- 80,156.0,156
83
- 81,140.0,140
84
- 82,144.0,144
85
- 83,118.0,118
86
- 84,156.0,156
87
- 85,135.0,135
88
- 86,144.0,144
89
- 87,160.0,160
90
- 88,30.0,30
91
- 89,194.0,194
92
- 90,200.0,200
93
- 91,200.0,200
94
- 92,160.0,160
95
- 93,200.0,200
96
- 94,59.0,59
97
- 95,200.0,200
98
- 96,182.0,182
99
- 97,125.0,125
100
- 98,140.0,140
101
- 99,146.0,146
102
- 100,130.0,130
103
- 101,74.0,74
104
- 102,167.0,167
105
- 103,171.0,171
106
- 104,150.0,150
107
- 105,105.0,105
108
- 106,65.0,65
109
- 107,170.0,170
110
- 108,172.0,172
111
- 109,164.0,164
112
- 110,148.0,148
113
- 111,116.0,116
114
- 112,59.0,59
115
- 113,200.0,200
116
- 114,36.0,36
117
- 115,200.0,200
118
- 116,200.0,200
119
- 117,158.0,158
120
- 118,200.0,200
121
- 119,200.0,200
122
- 120,200.0,200
123
- 121,172.0,172
124
- 122,137.0,137
125
- 123,189.0,189
126
- 124,200.0,200
127
- 125,200.0,200
128
- 126,197.0,197
129
- 127,125.0,125
130
- 128,194.0,194
131
- 129,167.0,167
132
- 130,135.0,135
133
- 131,200.0,200
134
- 132,200.0,200
135
- 133,170.0,170
136
- 134,195.0,195
137
- 135,150.0,150
138
- 136,187.0,187
139
- 137,172.0,172
140
- 138,124.0,124
141
- 139,105.0,105
142
- 140,49.0,49
143
- 141,108.0,108
144
- 142,117.0,117
145
- 143,136.0,136
146
- 144,120.0,120
147
- 145,172.0,172
148
- 146,134.0,134
149
- 147,200.0,200
150
- 148,200.0,200
151
- 149,150.0,150
152
- 150,190.0,190
153
- 151,200.0,200
154
- 152,200.0,200
155
- 153,200.0,200
156
- 154,179.0,179
157
- 155,200.0,200
158
- 156,200.0,200
159
- 157,200.0,200
160
- 158,200.0,200
161
- 159,195.0,195
162
- 160,195.0,195
163
- 161,142.0,142
164
- 162,200.0,200
165
- 163,108.0,108
166
- 164,200.0,200
167
- 165,165.0,165
168
- 166,153.0,153
169
- 167,85.0,85
170
- 168,139.0,139
171
- 169,155.0,155
172
- 170,166.0,166
173
- 171,182.0,182
174
- 172,190.0,190
175
- 173,35.0,35
176
- 174,124.0,124
177
- 175,114.0,114
178
- 176,200.0,200
179
- 177,200.0,200
180
- 178,200.0,200
181
- 179,200.0,200
182
- 180,110.0,110
183
- 181,128.0,128
184
- 182,107.0,107
185
- 183,192.0,192
186
- 184,106.0,106
187
- 185,32.0,32
188
- 186,107.0,107
189
- 187,129.0,129
190
- 188,122.0,122
191
- 189,126.0,126
192
- 190,120.0,120
193
- 191,127.0,127
194
- 192,132.0,132
195
- 193,128.0,128
196
- 194,142.0,142
197
- 195,137.0,137
198
- 196,125.0,125
199
- 197,118.0,118
200
- 198,158.0,158
201
- 199,144.0,144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/config.yaml DELETED
@@ -1,32 +0,0 @@
1
- general_cfg:
2
- algo_name: PPO
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_DQN_20221026-054757
9
- max_steps: 200
10
- mode: train
11
- n_workers: 3
12
- new_step_api: true
13
- render: false
14
- save_fig: true
15
- seed: 1
16
- show_fig: false
17
- test_eps: 10
18
- train_eps: 300
19
- wrapper: null
20
- algo_cfg:
21
- actor_hidden_dim: 256
22
- actor_lr: 0.0003
23
- continuous: false
24
- critic_hidden_dim: 256
25
- critic_lr: 0.001
26
- entropy_coef: 0.01
27
- eps_clip: 0.2
28
- gamma: 0.99
29
- k_epochs: 4
30
- ppo_type: clip
31
- sgd_batch_size: 128
32
- train_batch_size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/logs/log.txt DELETED
@@ -1,43 +0,0 @@
1
- 2023-04-01 22:32:04 - r - INFO: - Hyperparameters:
2
- 2023-04-01 22:32:04 - r - INFO: - ================================================================================
3
- 2023-04-01 22:32:04 - r - INFO: - Name Value Type
4
- 2023-04-01 22:32:04 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-01 22:32:04 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-01 22:32:04 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-01 22:32:04 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-01 22:32:04 - r - INFO: - algo_name PPO <class 'str'>
9
- 2023-04-01 22:32:04 - r - INFO: - mode train <class 'str'>
10
- 2023-04-01 22:32:04 - r - INFO: - seed 1 <class 'int'>
11
- 2023-04-01 22:32:04 - r - INFO: - device cpu <class 'str'>
12
- 2023-04-01 22:32:04 - r - INFO: - train_eps 300 <class 'int'>
13
- 2023-04-01 22:32:04 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-04-01 22:32:04 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-04-01 22:32:04 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-04-01 22:32:04 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-04-01 22:32:04 - r - INFO: - load_checkpoint 0 <class 'bool'>
18
- 2023-04-01 22:32:04 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
19
- 2023-04-01 22:32:04 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-04-01 22:32:04 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-04-01 22:32:04 - r - INFO: - n_workers 3 <class 'int'>
22
- 2023-04-01 22:32:04 - r - INFO: - ppo_type clip <class 'str'>
23
- 2023-04-01 22:32:04 - r - INFO: - continuous 0 <class 'bool'>
24
- 2023-04-01 22:32:04 - r - INFO: - gamma 0.99 <class 'float'>
25
- 2023-04-01 22:32:04 - r - INFO: - k_epochs 4 <class 'int'>
26
- 2023-04-01 22:32:04 - r - INFO: - actor_lr 0.0003 <class 'float'>
27
- 2023-04-01 22:32:04 - r - INFO: - critic_lr 0.001 <class 'float'>
28
- 2023-04-01 22:32:04 - r - INFO: - eps_clip 0.2 <class 'float'>
29
- 2023-04-01 22:32:04 - r - INFO: - entropy_coef 0.01 <class 'float'>
30
- 2023-04-01 22:32:04 - r - INFO: - train_batch_size 256 <class 'int'>
31
- 2023-04-01 22:32:04 - r - INFO: - sgd_batch_size 128 <class 'int'>
32
- 2023-04-01 22:32:04 - r - INFO: - actor_hidden_dim 256 <class 'int'>
33
- 2023-04-01 22:32:04 - r - INFO: - critic_hidden_dim 256 <class 'int'>
34
- 2023-04-01 22:32:04 - r - INFO: - task_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204 <class 'str'>
35
- 2023-04-01 22:32:04 - r - INFO: - res_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/results <class 'str'>
36
- 2023-04-01 22:32:04 - r - INFO: - log_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/logs <class 'str'>
37
- 2023-04-01 22:32:04 - r - INFO: - traj_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/traj <class 'str'>
38
- 2023-04-01 22:32:04 - r - INFO: - tb_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/tb_logs <class 'str'>
39
- 2023-04-01 22:32:04 - r - INFO: - ================================================================================
40
- 2023-04-01 22:32:04 - r - INFO: - n_states: 4, n_actions: 2
41
- 2023-04-01 22:32:04 - r - INFO: - Start training!
42
- 2023-04-01 22:32:04 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cpu
43
- 2023-04-01 22:33:16 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/actor.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6a3e48d551bcba327ff4c5d3cc464a6a94b83eda543a54d231016e021e8cbd3
3
- size 272151
 
 
 
 
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/critic.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4db7aeb3805e1deb11428a34a600a40068a0f711986f38fdf9e0f9895f8a45c
3
- size 271127
 
 
 
 
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/learning_curve.png DELETED
Binary file (86.7 kB)
 
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/res.csv DELETED
@@ -1,302 +0,0 @@
1
- episodes,rewards
2
- 0,39.0
3
- 1,10.0
4
- 2,11.0
5
- 3,10.0
6
- 4,30.0
7
- 5,28.0
8
- 6,9.0
9
- 7,12.0
10
- 8,13.0
11
- 9,16.0
12
- 10,15.0
13
- 11,16.0
14
- 12,38.0
15
- 13,33.0
16
- 14,11.0
17
- 15,20.0
18
- 16,14.0
19
- 17,43.0
20
- 18,22.0
21
- 19,11.0
22
- 20,34.0
23
- 21,12.0
24
- 22,29.0
25
- 23,12.0
26
- 24,39.0
27
- 25,11.0
28
- 26,13.0
29
- 27,25.0
30
- 28,26.0
31
- 29,36.0
32
- 30,24.0
33
- 31,18.0
34
- 32,40.0
35
- 33,13.0
36
- 34,30.0
37
- 35,24.0
38
- 36,22.0
39
- 37,10.0
40
- 38,32.0
41
- 39,40.0
42
- 40,15.0
43
- 41,57.0
44
- 42,45.0
45
- 43,47.0
46
- 44,38.0
47
- 45,53.0
48
- 46,13.0
49
- 47,26.0
50
- 48,19.0
51
- 49,16.0
52
- 50,30.0
53
- 51,12.0
54
- 52,18.0
55
- 53,62.0
56
- 54,16.0
57
- 55,46.0
58
- 56,40.0
59
- 57,25.0
60
- 58,41.0
61
- 59,82.0
62
- 60,18.0
63
- 61,29.0
64
- 62,43.0
65
- 63,26.0
66
- 64,28.0
67
- 65,17.0
68
- 66,48.0
69
- 67,31.0
70
- 68,35.0
71
- 69,31.0
72
- 70,43.0
73
- 71,49.0
74
- 72,25.0
75
- 73,65.0
76
- 74,16.0
77
- 75,37.0
78
- 76,44.0
79
- 77,37.0
80
- 78,16.0
81
- 79,73.0
82
- 80,23.0
83
- 81,34.0
84
- 82,20.0
85
- 83,55.0
86
- 84,18.0
87
- 85,16.0
88
- 86,56.0
89
- 87,22.0
90
- 88,40.0
91
- 89,40.0
92
- 90,63.0
93
- 91,52.0
94
- 92,43.0
95
- 93,38.0
96
- 94,34.0
97
- 95,61.0
98
- 96,34.0
99
- 97,32.0
100
- 98,47.0
101
- 99,21.0
102
- 100,24.0
103
- 101,54.0
104
- 102,52.0
105
- 103,13.0
106
- 104,26.0
107
- 105,64.0
108
- 106,63.0
109
- 107,88.0
110
- 108,65.0
111
- 109,114.0
112
- 110,84.0
113
- 111,58.0
114
- 112,148.0
115
- 113,32.0
116
- 114,85.0
117
- 115,67.0
118
- 116,45.0
119
- 117,170.0
120
- 118,137.0
121
- 119,159.0
122
- 120,60.0
123
- 121,200.0
124
- 122,78.0
125
- 123,58.0
126
- 124,60.0
127
- 125,191.0
128
- 126,61.0
129
- 127,99.0
130
- 128,26.0
131
- 129,79.0
132
- 130,109.0
133
- 131,173.0
134
- 132,61.0
135
- 133,92.0
136
- 134,175.0
137
- 135,106.0
138
- 136,32.0
139
- 137,84.0
140
- 138,105.0
141
- 139,78.0
142
- 140,78.0
143
- 141,60.0
144
- 142,93.0
145
- 143,69.0
146
- 144,85.0
147
- 145,106.0
148
- 146,148.0
149
- 147,93.0
150
- 148,119.0
151
- 149,139.0
152
- 150,124.0
153
- 151,173.0
154
- 152,200.0
155
- 153,26.0
156
- 154,151.0
157
- 155,122.0
158
- 156,182.0
159
- 157,62.0
160
- 158,38.0
161
- 159,145.0
162
- 160,75.0
163
- 161,78.0
164
- 162,200.0
165
- 163,189.0
166
- 164,94.0
167
- 165,122.0
168
- 166,95.0
169
- 167,200.0
170
- 168,154.0
171
- 169,200.0
172
- 170,200.0
173
- 171,125.0
174
- 172,200.0
175
- 173,187.0
176
- 174,165.0
177
- 175,154.0
178
- 176,200.0
179
- 177,200.0
180
- 178,180.0
181
- 179,200.0
182
- 180,191.0
183
- 181,168.0
184
- 182,200.0
185
- 183,78.0
186
- 184,124.0
187
- 185,137.0
188
- 186,165.0
189
- 187,77.0
190
- 188,144.0
191
- 189,200.0
192
- 190,74.0
193
- 191,200.0
194
- 192,99.0
195
- 193,183.0
196
- 194,200.0
197
- 195,49.0
198
- 196,127.0
199
- 197,148.0
200
- 198,188.0
201
- 199,200.0
202
- 200,113.0
203
- 201,200.0
204
- 202,146.0
205
- 203,130.0
206
- 204,128.0
207
- 205,200.0
208
- 206,35.0
209
- 207,58.0
210
- 208,130.0
211
- 209,66.0
212
- 210,62.0
213
- 211,163.0
214
- 212,147.0
215
- 213,106.0
216
- 214,200.0
217
- 215,114.0
218
- 216,147.0
219
- 217,101.0
220
- 218,175.0
221
- 219,88.0
222
- 220,144.0
223
- 221,200.0
224
- 222,200.0
225
- 223,199.0
226
- 224,200.0
227
- 225,199.0
228
- 226,142.0
229
- 227,200.0
230
- 228,200.0
231
- 229,164.0
232
- 230,200.0
233
- 231,181.0
234
- 232,200.0
235
- 233,200.0
236
- 234,200.0
237
- 235,105.0
238
- 236,200.0
239
- 237,200.0
240
- 238,200.0
241
- 239,169.0
242
- 240,200.0
243
- 241,179.0
244
- 242,108.0
245
- 243,177.0
246
- 244,132.0
247
- 245,116.0
248
- 246,75.0
249
- 247,161.0
250
- 248,192.0
251
- 249,184.0
252
- 250,200.0
253
- 251,115.0
254
- 252,200.0
255
- 253,200.0
256
- 254,129.0
257
- 255,40.0
258
- 256,80.0
259
- 257,190.0
260
- 258,200.0
261
- 259,200.0
262
- 260,151.0
263
- 261,138.0
264
- 262,187.0
265
- 263,163.0
266
- 264,101.0
267
- 265,116.0
268
- 266,147.0
269
- 267,80.0
270
- 268,200.0
271
- 269,104.0
272
- 270,200.0
273
- 271,176.0
274
- 272,157.0
275
- 273,200.0
276
- 274,200.0
277
- 275,125.0
278
- 276,77.0
279
- 277,200.0
280
- 278,200.0
281
- 279,200.0
282
- 280,200.0
283
- 281,200.0
284
- 282,174.0
285
- 283,200.0
286
- 284,200.0
287
- 285,25.0
288
- 286,150.0
289
- 287,200.0
290
- 288,200.0
291
- 289,200.0
292
- 290,163.0
293
- 291,200.0
294
- 292,200.0
295
- 293,200.0
296
- 294,200.0
297
- 295,200.0
298
- 296,200.0
299
- 297,123.0
300
- 298,154.0
301
- 299,200.0
302
- 300,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/config.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general_cfg:
2
+ algo_name: PPO
3
+ collect_traj: false
4
+ device: cpu
5
+ env_name: gym
6
+ load_checkpoint: false
7
+ load_model_step: best
8
+ load_path: Train_single_CartPole-v1_DQN_20230515-211721
9
+ max_episode: 200
10
+ max_step: 200
11
+ mode: train
12
+ model_save_fre: 10
13
+ mp_backend: single
14
+ n_workers: 2
15
+ online_eval: true
16
+ online_eval_episode: 10
17
+ seed: 1
18
+ algo_cfg:
19
+ actor_hidden_dim: 256
20
+ actor_layers:
21
+ - activation: relu
22
+ layer_dim:
23
+ - 256
24
+ layer_type: linear
25
+ - activation: relu
26
+ layer_dim:
27
+ - 256
28
+ layer_type: linear
29
+ actor_lr: 0.0003
30
+ batch_size: 256
31
+ buffer_type: ONPOLICY_QUE
32
+ continuous: false
33
+ critic_hidden_dim: 256
34
+ critic_layers:
35
+ - activation: relu
36
+ layer_dim:
37
+ - 256
38
+ layer_type: linear
39
+ - activation: relu
40
+ layer_dim:
41
+ - 256
42
+ layer_type: linear
43
+ critic_loss_coef: 0.5
44
+ critic_lr: 0.001
45
+ entropy_coef: 0.01
46
+ eps_clip: 0.2
47
+ gamma: 0.99
48
+ independ_actor: true
49
+ k_epochs: 4
50
+ kl_alpha: 2
51
+ kl_beta: 1.5
52
+ kl_lambda: 0.5
53
+ kl_target: 0.1
54
+ lr: 0.0001
55
+ min_policy: 0
56
+ ppo_type: clip
57
+ sgd_batch_size: 128
58
+ share_optimizer: false
59
+ env_cfg:
60
+ id: CartPole-v1
61
+ ignore_params:
62
+ - wrapper
63
+ - ignore_params
64
+ render_mode: null
65
+ wrapper: null
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/logs/log.txt ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - General Configs:
2
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
3
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - Name Value Type
4
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - algo_name PPO <class 'str'>
6
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - mode train <class 'str'>
7
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - max_episode 200 <class 'int'>
10
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - mp_backend single <class 'str'>
13
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - model_save_fre 10 <class 'int'>
17
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
18
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
19
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - load_model_step best <class 'str'>
20
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
21
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - Algo Configs:
22
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
23
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - Name Value Type
24
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - independ_actor 1 <class 'bool'>
25
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - share_optimizer 0 <class 'bool'>
26
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ppo_type clip <class 'str'>
27
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - eps_clip 0.2 <class 'float'>
28
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - kl_target 0.1 <class 'float'>
29
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - kl_lambda 0.5 <class 'float'>
30
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - kl_beta 1.5 <class 'float'>
31
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - kl_alpha 2 <class 'int'>
32
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - continuous 0 <class 'bool'>
33
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
34
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - k_epochs 4 <class 'int'>
35
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
36
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - actor_lr 0.0003 <class 'float'>
37
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - critic_lr 0.001 <class 'float'>
38
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - critic_loss_coef 0.5 <class 'float'>
39
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - entropy_coef 0.01 <class 'float'>
40
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - batch_size 256 <class 'int'>
41
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - sgd_batch_size 128 <class 'int'>
42
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - actor_hidden_dim 256 <class 'int'>
43
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - critic_hidden_dim 256 <class 'int'>
44
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - min_policy 0 <class 'int'>
45
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - actor_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
46
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - critic_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
47
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - buffer_type ONPOLICY_QUE <class 'str'>
48
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
49
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - Env Configs:
50
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
51
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - Name Value Type
52
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
53
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - render_mode None <class 'str'>
54
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - wrapper None <class 'str'>
55
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
56
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
57
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
58
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - Start training!
59
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 0, ep_reward: 13.0, ep_step: 13
60
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 1, ep_reward: 30.0, ep_step: 30
61
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 2, ep_reward: 15.0, ep_step: 15
62
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 3, ep_reward: 10.0, ep_step: 10
63
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 4, ep_reward: 38.0, ep_step: 38
64
+ 2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 5, ep_reward: 18.0, ep_step: 18
65
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 6, ep_reward: 20.0, ep_step: 20
66
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 7, ep_reward: 26.0, ep_step: 26
67
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 8, ep_reward: 9.0, ep_step: 9
68
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 9, ep_reward: 12.0, ep_step: 12
69
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 10, ep_reward: 47.0, ep_step: 47
70
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 11, ep_reward: 21.0, ep_step: 21
71
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 12, ep_reward: 27.0, ep_step: 27
72
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 13, ep_reward: 12.0, ep_step: 12
73
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 14, ep_reward: 37.0, ep_step: 37
74
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 15, ep_reward: 20.0, ep_step: 20
75
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 16, ep_reward: 36.0, ep_step: 36
76
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 17, ep_reward: 21.0, ep_step: 21
77
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 18, ep_reward: 40.0, ep_step: 40
78
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 19, ep_reward: 15.0, ep_step: 15
79
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 20, ep_reward: 14.0, ep_step: 14
80
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 21, ep_reward: 89.0, ep_step: 89
81
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 22, ep_reward: 27.0, ep_step: 27
82
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 23, ep_reward: 38.0, ep_step: 38
83
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 24, ep_reward: 29.0, ep_step: 29
84
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 25, ep_reward: 20.0, ep_step: 20
85
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 26, ep_reward: 50.0, ep_step: 50
86
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 27, ep_reward: 44.0, ep_step: 44
87
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 28, ep_reward: 30.0, ep_step: 30
88
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 29, ep_reward: 24.0, ep_step: 24
89
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 30, ep_reward: 19.0, ep_step: 19
90
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 31, ep_reward: 19.0, ep_step: 19
91
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 32, ep_reward: 28.0, ep_step: 28
92
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 33, ep_reward: 59.0, ep_step: 59
93
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 34, ep_reward: 22.0, ep_step: 22
94
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 35, ep_reward: 33.0, ep_step: 33
95
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 36, ep_reward: 18.0, ep_step: 18
96
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 37, ep_reward: 13.0, ep_step: 13
97
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 38, ep_reward: 68.0, ep_step: 68
98
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 39, ep_reward: 20.0, ep_step: 20
99
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 40, ep_reward: 17.0, ep_step: 17
100
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 41, ep_reward: 32.0, ep_step: 32
101
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 42, ep_reward: 17.0, ep_step: 17
102
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 43, ep_reward: 35.0, ep_step: 35
103
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 44, ep_reward: 18.0, ep_step: 18
104
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 45, ep_reward: 23.0, ep_step: 23
105
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 46, ep_reward: 15.0, ep_step: 15
106
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 47, ep_reward: 19.0, ep_step: 19
107
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 48, ep_reward: 31.0, ep_step: 31
108
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 49, ep_reward: 39.0, ep_step: 39
109
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 50, ep_reward: 26.0, ep_step: 26
110
+ 2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 51, ep_reward: 82.0, ep_step: 82
111
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 52, ep_reward: 80.0, ep_step: 80
112
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 53, ep_reward: 20.0, ep_step: 20
113
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 54, ep_reward: 39.0, ep_step: 39
114
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 55, ep_reward: 74.0, ep_step: 74
115
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 56, ep_reward: 72.0, ep_step: 72
116
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 57, ep_reward: 28.0, ep_step: 28
117
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 58, ep_reward: 65.0, ep_step: 65
118
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 59, ep_reward: 54.0, ep_step: 54
119
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 60, ep_reward: 79.0, ep_step: 79
120
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 61, ep_reward: 55.0, ep_step: 55
121
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 62, ep_reward: 43.0, ep_step: 43
122
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 63, ep_reward: 84.0, ep_step: 84
123
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 64, ep_reward: 39.0, ep_step: 39
124
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 65, ep_reward: 115.0, ep_step: 115
125
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 66, ep_reward: 87.0, ep_step: 87
126
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 67, ep_reward: 71.0, ep_step: 71
127
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 68, ep_reward: 52.0, ep_step: 52
128
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - update_step: 10, online_eval_reward: 147.000
129
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 147.000, save the best model!
130
+ 2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 69, ep_reward: 71.0, ep_step: 71
131
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 70, ep_reward: 48.0, ep_step: 48
132
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 71, ep_reward: 104.0, ep_step: 104
133
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 72, ep_reward: 30.0, ep_step: 30
134
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 73, ep_reward: 152.0, ep_step: 152
135
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 74, ep_reward: 51.0, ep_step: 51
136
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 75, ep_reward: 19.0, ep_step: 19
137
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 76, ep_reward: 44.0, ep_step: 44
138
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 77, ep_reward: 138.0, ep_step: 138
139
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 78, ep_reward: 26.0, ep_step: 26
140
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 79, ep_reward: 53.0, ep_step: 53
141
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 80, ep_reward: 108.0, ep_step: 108
142
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 81, ep_reward: 97.0, ep_step: 97
143
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 82, ep_reward: 87.0, ep_step: 87
144
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 83, ep_reward: 51.0, ep_step: 51
145
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 84, ep_reward: 101.0, ep_step: 101
146
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 85, ep_reward: 32.0, ep_step: 32
147
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 86, ep_reward: 76.0, ep_step: 76
148
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 87, ep_reward: 75.0, ep_step: 75
149
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 88, ep_reward: 44.0, ep_step: 44
150
+ 2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200
151
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 90, ep_reward: 126.0, ep_step: 126
152
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 91, ep_reward: 115.0, ep_step: 115
153
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 92, ep_reward: 29.0, ep_step: 29
154
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 93, ep_reward: 152.0, ep_step: 152
155
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 94, ep_reward: 78.0, ep_step: 78
156
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 95, ep_reward: 120.0, ep_step: 120
157
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 96, ep_reward: 140.0, ep_step: 140
158
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 97, ep_reward: 128.0, ep_step: 128
159
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - update_step: 20, online_eval_reward: 129.000
160
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 98, ep_reward: 102.0, ep_step: 102
161
+ 2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 99, ep_reward: 87.0, ep_step: 87
162
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 100, ep_reward: 107.0, ep_step: 107
163
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 101, ep_reward: 66.0, ep_step: 66
164
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 102, ep_reward: 192.0, ep_step: 192
165
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 103, ep_reward: 125.0, ep_step: 125
166
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 104, ep_reward: 113.0, ep_step: 113
167
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 105, ep_reward: 138.0, ep_step: 138
168
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 106, ep_reward: 112.0, ep_step: 112
169
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 107, ep_reward: 164.0, ep_step: 164
170
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 108, ep_reward: 108.0, ep_step: 108
171
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 109, ep_reward: 28.0, ep_step: 28
172
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 110, ep_reward: 200.0, ep_step: 200
173
+ 2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 111, ep_reward: 200.0, ep_step: 200
174
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 112, ep_reward: 200.0, ep_step: 200
175
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 113, ep_reward: 200.0, ep_step: 200
176
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 114, ep_reward: 151.0, ep_step: 151
177
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 115, ep_reward: 107.0, ep_step: 107
178
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 116, ep_reward: 66.0, ep_step: 66
179
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 117, ep_reward: 94.0, ep_step: 94
180
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 118, ep_reward: 25.0, ep_step: 25
181
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 119, ep_reward: 47.0, ep_step: 47
182
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - update_step: 30, online_eval_reward: 155.000
183
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 155.000, save the best model!
184
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 120, ep_reward: 136.0, ep_step: 136
185
+ 2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 121, ep_reward: 119.0, ep_step: 119
186
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 122, ep_reward: 136.0, ep_step: 136
187
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 123, ep_reward: 182.0, ep_step: 182
188
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 124, ep_reward: 99.0, ep_step: 99
189
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 125, ep_reward: 33.0, ep_step: 33
190
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 126, ep_reward: 161.0, ep_step: 161
191
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 127, ep_reward: 171.0, ep_step: 171
192
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 128, ep_reward: 172.0, ep_step: 172
193
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 129, ep_reward: 178.0, ep_step: 178
194
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 130, ep_reward: 167.0, ep_step: 167
195
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 131, ep_reward: 149.0, ep_step: 149
196
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 132, ep_reward: 127.0, ep_step: 127
197
+ 2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 133, ep_reward: 112.0, ep_step: 112
198
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 134, ep_reward: 52.0, ep_step: 52
199
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 135, ep_reward: 48.0, ep_step: 48
200
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 136, ep_reward: 110.0, ep_step: 110
201
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 137, ep_reward: 170.0, ep_step: 170
202
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 138, ep_reward: 147.0, ep_step: 147
203
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - update_step: 40, online_eval_reward: 200.000
204
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
205
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 139, ep_reward: 127.0, ep_step: 127
206
+ 2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 140, ep_reward: 164.0, ep_step: 164
207
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 141, ep_reward: 200.0, ep_step: 200
208
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 142, ep_reward: 103.0, ep_step: 103
209
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 143, ep_reward: 200.0, ep_step: 200
210
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 144, ep_reward: 200.0, ep_step: 200
211
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 145, ep_reward: 147.0, ep_step: 147
212
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 146, ep_reward: 200.0, ep_step: 200
213
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 147, ep_reward: 157.0, ep_step: 157
214
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 148, ep_reward: 200.0, ep_step: 200
215
+ 2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 149, ep_reward: 163.0, ep_step: 163
216
+ 2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 150, ep_reward: 200.0, ep_step: 200
217
+ 2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 151, ep_reward: 200.0, ep_step: 200
218
+ 2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 152, ep_reward: 146.0, ep_step: 146
219
+ 2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 153, ep_reward: 118.0, ep_step: 118
220
+ 2023-05-17 13:44:50 - SimpleLog - INFO: - update_step: 50, online_eval_reward: 200.000
221
+ 2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 154, ep_reward: 191.0, ep_step: 191
222
+ 2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 155, ep_reward: 200.0, ep_step: 200
223
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 156, ep_reward: 153.0, ep_step: 153
224
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 157, ep_reward: 200.0, ep_step: 200
225
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 158, ep_reward: 160.0, ep_step: 160
226
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 159, ep_reward: 137.0, ep_step: 137
227
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 160, ep_reward: 132.0, ep_step: 132
228
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 161, ep_reward: 147.0, ep_step: 147
229
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 162, ep_reward: 142.0, ep_step: 142
230
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 163, ep_reward: 133.0, ep_step: 133
231
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 164, ep_reward: 183.0, ep_step: 183
232
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 165, ep_reward: 161.0, ep_step: 161
233
+ 2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 166, ep_reward: 186.0, ep_step: 186
234
+ 2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 167, ep_reward: 200.0, ep_step: 200
235
+ 2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 168, ep_reward: 195.0, ep_step: 195
236
+ 2023-05-17 13:44:52 - SimpleLog - INFO: - update_step: 60, online_eval_reward: 200.000
237
+ 2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 169, ep_reward: 200.0, ep_step: 200
238
+ 2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 170, ep_reward: 200.0, ep_step: 200
239
+ 2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 171, ep_reward: 200.0, ep_step: 200
240
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 172, ep_reward: 200.0, ep_step: 200
241
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 173, ep_reward: 200.0, ep_step: 200
242
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 174, ep_reward: 200.0, ep_step: 200
243
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 175, ep_reward: 153.0, ep_step: 153
244
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 176, ep_reward: 158.0, ep_step: 158
245
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 177, ep_reward: 53.0, ep_step: 53
246
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 178, ep_reward: 157.0, ep_step: 157
247
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 179, ep_reward: 133.0, ep_step: 133
248
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 180, ep_reward: 126.0, ep_step: 126
249
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 181, ep_reward: 147.0, ep_step: 147
250
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 182, ep_reward: 145.0, ep_step: 145
251
+ 2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 183, ep_reward: 32.0, ep_step: 32
252
+ 2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 184, ep_reward: 136.0, ep_step: 136
253
+ 2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 185, ep_reward: 153.0, ep_step: 153
254
+ 2023-05-17 13:44:54 - SimpleLog - INFO: - update_step: 70, online_eval_reward: 200.000
255
+ 2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 186, ep_reward: 199.0, ep_step: 199
256
+ 2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 187, ep_reward: 200.0, ep_step: 200
257
+ 2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 188, ep_reward: 200.0, ep_step: 200
258
+ 2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 189, ep_reward: 193.0, ep_step: 193
259
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 190, ep_reward: 167.0, ep_step: 167
260
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 191, ep_reward: 200.0, ep_step: 200
261
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 192, ep_reward: 200.0, ep_step: 200
262
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 193, ep_reward: 200.0, ep_step: 200
263
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 194, ep_reward: 200.0, ep_step: 200
264
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 195, ep_reward: 200.0, ep_step: 200
265
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 196, ep_reward: 200.0, ep_step: 200
266
+ 2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 197, ep_reward: 200.0, ep_step: 200
267
+ 2023-05-17 13:44:56 - SimpleLog - INFO: - episode: 198, ep_reward: 200.0, ep_step: 200
268
+ 2023-05-17 13:44:56 - SimpleLog - INFO: - update_step: 80, online_eval_reward: 200.000
269
+ 2023-05-17 13:44:56 - SimpleLog - INFO: - episode: 199, ep_reward: 187.0, ep_step: 187
270
+ 2023-05-17 13:44:56 - SimpleLog - INFO: - Finish training! total time consumed: 15.62s
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/10 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/20 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/30 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/40 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/50 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/60 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/70 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/80 ADDED
Binary file (544 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/best ADDED
Binary file (544 kB). View file
 
CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/actor.pth β†’ Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/interact/events.out.tfevents.1684302280.JMac.local.60840.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffb5b77fd6cfb12b05d0565beee7740f4fa416356f845f856f5af472262d6726
3
- size 272215
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb3386d3c6b92bb605ad7e8d7b3df8091705eb6f30172bb0663894b85308602d
3
+ size 20980
CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/critic.pth β†’ Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/model/events.out.tfevents.1684302280.JMac.local.60840.1} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55e913c00d51a429d08ba5956d4ee79c8a4c4ad029072c539bb53b684c874cdd
3
- size 271191
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d25a9a4855f35bdfc28b0a6ac033431ec9e3076deb21795f21d721a1583fa9
3
+ size 12920