johnjim0816 commited on
Commit
8ba2a32
1 Parent(s): 54d82df

update Cartpole-v1 PER DQN

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/config.yaml +0 -48
  2. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/logs/log.txt +0 -54
  3. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/models/checkpoint.pt +0 -3
  4. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/learning_curve.png +0 -0
  5. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/res.csv +0 -11
  6. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/config.yaml +0 -55
  7. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/logs/log.txt +0 -58
  8. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/learning_curve.png +0 -0
  9. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/res.csv +0 -11
  10. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/config.yaml +0 -55
  11. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/logs/log.txt +0 -58
  12. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/models/checkpoint.pt +0 -3
  13. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/learning_curve.png +0 -0
  14. ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/res.csv +0 -11
  15. ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/logs/log.txt +0 -57
  16. ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/results/trajs_0.pkl +0 -3
  17. ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/model/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.1 +0 -3
  18. ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/videos/video.gif +0 -3
  19. ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926 → Test_single_CartPole-v1_PER_DQN_20230518-232330}/config.yaml +17 -13
  20. ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/logs/log.txt +61 -0
  21. ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/interact/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/interact/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.0} +1 -1
  22. ClassControl/CartPole-v1/{Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs/events.out.tfevents.1680275682.DESKTOP-H34HQIQ.290376.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/model/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.1} +1 -1
  23. ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/config.yaml +0 -48
  24. ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/logs/log.txt +0 -260
  25. ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/models/checkpoint.pt +0 -3
  26. ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/learning_curve.png +0 -0
  27. ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/res.csv +0 -201
  28. ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/tb_logs/events.out.tfevents.1680277069.DESKTOP-H34HQIQ.305216.0 +0 -3
  29. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/config.yaml +0 -48
  30. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/logs/log.txt +0 -267
  31. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/models/checkpoint.pt +0 -3
  32. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/learning_curve.png +0 -0
  33. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/res.csv +0 -201
  34. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs/events.out.tfevents.1680274695.DESKTOP-H34HQIQ.317208.0 +0 -3
  35. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/config.yaml +0 -55
  36. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/logs/log.txt +0 -48
  37. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/models/checkpoint.pt +0 -3
  38. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/learning_curve.png +0 -0
  39. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/res.csv +0 -202
  40. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/config.yaml +0 -55
  41. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/logs/log.txt +0 -48
  42. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/models/checkpoint.pt +0 -3
  43. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/learning_curve.png +0 -0
  44. ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/res.csv +0 -251
  45. ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/config.yaml +0 -45
  46. ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/logs/log.txt +0 -166
  47. ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1000 +0 -0
  48. ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1500 +0 -0
  49. ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2000 +0 -0
  50. ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2500 +0 -0
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/config.yaml DELETED
@@ -1,48 +0,0 @@
1
- general_cfg:
2
- algo_name: PER_DQN
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_PER_DQN_20230331-225815
9
- max_steps: 200
10
- mode: test
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 100
18
- wrapper: null
19
- algo_cfg:
20
- batch_size: 64
21
- buffer_size: 100000
22
- epsilon_decay: 500
23
- epsilon_end: 0.01
24
- epsilon_start: 0.95
25
- gamma: 0.99
26
- hidden_dim: 256
27
- lr: 0.0001
28
- per_alpha: 0.6
29
- per_beta: 0.4
30
- per_beta_annealing: 0.001
31
- per_epsilon: 0.01
32
- target_update: 4
33
- value_layers:
34
- - activation: relu
35
- layer_dim:
36
- - n_states
37
- - 256
38
- layer_type: linear
39
- - activation: relu
40
- layer_dim:
41
- - 256
42
- - 256
43
- layer_type: linear
44
- - activation: none
45
- layer_dim:
46
- - 256
47
- - n_actions
48
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/logs/log.txt DELETED
@@ -1,54 +0,0 @@
1
- 2023-03-31 23:14:42 - r - INFO: - Hyperparameters:
2
- 2023-03-31 23:14:42 - r - INFO: - ================================================================================
3
- 2023-03-31 23:14:42 - r - INFO: - Name Value Type
4
- 2023-03-31 23:14:42 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-03-31 23:14:42 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-03-31 23:14:42 - r - INFO: - wrapper None <class 'str'>
7
- 2023-03-31 23:14:42 - r - INFO: - render 0 <class 'bool'>
8
- 2023-03-31 23:14:42 - r - INFO: - algo_name PER_DQN <class 'str'>
9
- 2023-03-31 23:14:42 - r - INFO: - mode test <class 'str'>
10
- 2023-03-31 23:14:42 - r - INFO: - seed 1 <class 'int'>
11
- 2023-03-31 23:14:42 - r - INFO: - device cuda <class 'str'>
12
- 2023-03-31 23:14:42 - r - INFO: - train_eps 100 <class 'int'>
13
- 2023-03-31 23:14:42 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-03-31 23:14:42 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-03-31 23:14:42 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-03-31 23:14:42 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-03-31 23:14:42 - r - INFO: - load_checkpoint 1 <class 'bool'>
18
- 2023-03-31 23:14:42 - r - INFO: - load_path Train_CartPole-v1_PER_DQN_20230331-225815 <class 'str'>
19
- 2023-03-31 23:14:42 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-03-31 23:14:42 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-03-31 23:14:42 - r - INFO: - epsilon_start 0.95 <class 'float'>
22
- 2023-03-31 23:14:42 - r - INFO: - epsilon_end 0.01 <class 'float'>
23
- 2023-03-31 23:14:42 - r - INFO: - epsilon_decay 500 <class 'int'>
24
- 2023-03-31 23:14:42 - r - INFO: - hidden_dim 256 <class 'int'>
25
- 2023-03-31 23:14:42 - r - INFO: - gamma 0.99 <class 'float'>
26
- 2023-03-31 23:14:42 - r - INFO: - lr 0.0001 <class 'float'>
27
- 2023-03-31 23:14:42 - r - INFO: - buffer_size 100000 <class 'int'>
28
- 2023-03-31 23:14:42 - r - INFO: - per_alpha 0.6 <class 'float'>
29
- 2023-03-31 23:14:42 - r - INFO: - per_beta 0.4 <class 'float'>
30
- 2023-03-31 23:14:42 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
31
- 2023-03-31 23:14:42 - r - INFO: - per_epsilon 0.01 <class 'float'>
32
- 2023-03-31 23:14:42 - r - INFO: - batch_size 64 <class 'int'>
33
- 2023-03-31 23:14:42 - r - INFO: - target_update 4 <class 'int'>
34
- 2023-03-31 23:14:42 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
35
- 2023-03-31 23:14:42 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442 <class 'str'>
36
- 2023-03-31 23:14:42 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/results <class 'str'>
37
- 2023-03-31 23:14:42 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/logs <class 'str'>
38
- 2023-03-31 23:14:42 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/traj <class 'str'>
39
- 2023-03-31 23:14:42 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs <class 'str'>
40
- 2023-03-31 23:14:42 - r - INFO: - ================================================================================
41
- 2023-03-31 23:14:42 - r - INFO: - n_states: 4, n_actions: 2
42
- 2023-03-31 23:14:43 - r - INFO: - Start testing!
43
- 2023-03-31 23:14:43 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda
44
- 2023-03-31 23:14:44 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
45
- 2023-03-31 23:14:44 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
46
- 2023-03-31 23:14:44 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
47
- 2023-03-31 23:14:44 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
48
- 2023-03-31 23:14:44 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
49
- 2023-03-31 23:14:44 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
50
- 2023-03-31 23:14:44 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
51
- 2023-03-31 23:14:44 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
52
- 2023-03-31 23:14:44 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
53
- 2023-03-31 23:14:44 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
54
- 2023-03-31 23:14:44 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c438616b97ca890557a9e9b1cd42decfc5decc64e5aee660d89158290e92683d
3
- size 272471
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/learning_curve.png DELETED
Binary file (26.1 kB)
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/config.yaml DELETED
@@ -1,55 +0,0 @@
1
- general_cfg:
2
- algo_name: PER_DQN
3
- device: cpu
4
- env_name: gym
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_gym_PER_DQN_20230415-215002
9
- max_steps: 200
10
- mode: test
11
- mp_backend: mp
12
- n_workers: 1
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 200
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.99
29
- hidden_dim: 256
30
- lr: 0.0001
31
- per_alpha: 0.6
32
- per_beta: 0.4
33
- per_beta_annealing: 0.001
34
- per_epsilon: 0.01
35
- target_update: 4
36
- value_layers:
37
- - activation: relu
38
- layer_dim:
39
- - n_states
40
- - 256
41
- layer_type: linear
42
- - activation: relu
43
- layer_dim:
44
- - 256
45
- - 256
46
- layer_type: linear
47
- - activation: none
48
- layer_dim:
49
- - 256
50
- - n_actions
51
- layer_type: linear
52
- env_cfg:
53
- id: CartPole-v1
54
- new_step_api: true
55
- render_mode: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/logs/log.txt DELETED
@@ -1,58 +0,0 @@
1
- 2023-04-15 21:51:47 - r - INFO: - Hyperparameters:
2
- 2023-04-15 21:51:47 - r - INFO: - ================================================================================
3
- 2023-04-15 21:51:47 - r - INFO: - Name Value Type
4
- 2023-04-15 21:51:47 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-15 21:51:47 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-15 21:51:47 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-15 21:51:47 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-15 21:51:47 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-15 21:51:47 - r - INFO: - algo_name PER_DQN <class 'str'>
10
- 2023-04-15 21:51:47 - r - INFO: - mode test <class 'str'>
11
- 2023-04-15 21:51:47 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-15 21:51:47 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-15 21:51:47 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-15 21:51:47 - r - INFO: - train_eps 200 <class 'int'>
15
- 2023-04-15 21:51:47 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-15 21:51:47 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-15 21:51:47 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-15 21:51:47 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-15 21:51:47 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-15 21:51:47 - r - INFO: - load_path Train_gym_PER_DQN_20230415-215002 <class 'str'>
21
- 2023-04-15 21:51:47 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-15 21:51:47 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-15 21:51:47 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-15 21:51:47 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-15 21:51:47 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-15 21:51:47 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-15 21:51:47 - r - INFO: - hidden_dim 256 <class 'int'>
28
- 2023-04-15 21:51:47 - r - INFO: - gamma 0.99 <class 'float'>
29
- 2023-04-15 21:51:47 - r - INFO: - lr 0.0001 <class 'float'>
30
- 2023-04-15 21:51:47 - r - INFO: - buffer_size 100000 <class 'int'>
31
- 2023-04-15 21:51:47 - r - INFO: - per_alpha 0.6 <class 'float'>
32
- 2023-04-15 21:51:47 - r - INFO: - per_beta 0.4 <class 'float'>
33
- 2023-04-15 21:51:47 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
34
- 2023-04-15 21:51:47 - r - INFO: - per_epsilon 0.01 <class 'float'>
35
- 2023-04-15 21:51:47 - r - INFO: - batch_size 64 <class 'int'>
36
- 2023-04-15 21:51:47 - r - INFO: - target_update 4 <class 'int'>
37
- 2023-04-15 21:51:47 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
38
- 2023-04-15 21:51:47 - r - INFO: - id CartPole-v1 <class 'str'>
39
- 2023-04-15 21:51:47 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147 <class 'str'>
40
- 2023-04-15 21:51:47 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/results <class 'str'>
41
- 2023-04-15 21:51:47 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/logs <class 'str'>
42
- 2023-04-15 21:51:47 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/traj <class 'str'>
43
- 2023-04-15 21:51:47 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/videos <class 'str'>
44
- 2023-04-15 21:51:47 - r - INFO: - ================================================================================
45
- 2023-04-15 21:51:47 - r - INFO: - n_states: 4, n_actions: 2
46
- 2023-04-15 21:51:47 - r - INFO: - Start testing!
47
- 2023-04-15 21:51:47 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
48
- 2023-04-15 21:51:47 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
49
- 2023-04-15 21:51:47 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
50
- 2023-04-15 21:51:47 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
51
- 2023-04-15 21:51:47 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
52
- 2023-04-15 21:51:47 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
53
- 2023-04-15 21:51:47 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
54
- 2023-04-15 21:51:47 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
55
- 2023-04-15 21:51:47 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
56
- 2023-04-15 21:51:47 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
57
- 2023-04-15 21:51:47 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
58
- 2023-04-15 21:51:47 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/learning_curve.png DELETED
Binary file (27.1 kB)
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/config.yaml DELETED
@@ -1,55 +0,0 @@
1
- general_cfg:
2
- algo_name: PER_DQN
3
- device: cpu
4
- env_name: gym
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_PER_DQN_ray_20230415-215738
9
- max_steps: 200
10
- mode: test
11
- mp_backend: ray
12
- n_workers: 1
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 250
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.99
29
- hidden_dim: 256
30
- lr: 0.0001
31
- per_alpha: 0.6
32
- per_beta: 0.4
33
- per_beta_annealing: 0.001
34
- per_epsilon: 0.01
35
- target_update: 4
36
- value_layers:
37
- - activation: relu
38
- layer_dim:
39
- - n_states
40
- - 256
41
- layer_type: linear
42
- - activation: relu
43
- layer_dim:
44
- - 256
45
- - 256
46
- layer_type: linear
47
- - activation: none
48
- layer_dim:
49
- - 256
50
- - n_actions
51
- layer_type: linear
52
- env_cfg:
53
- id: CartPole-v1
54
- new_step_api: true
55
- render_mode: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/logs/log.txt DELETED
@@ -1,58 +0,0 @@
1
- 2023-04-15 22:05:40 - r - INFO: - Hyperparameters:
2
- 2023-04-15 22:05:40 - r - INFO: - ================================================================================
3
- 2023-04-15 22:05:40 - r - INFO: - Name Value Type
4
- 2023-04-15 22:05:40 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-15 22:05:40 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-15 22:05:40 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-15 22:05:40 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-15 22:05:40 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-15 22:05:40 - r - INFO: - algo_name PER_DQN <class 'str'>
10
- 2023-04-15 22:05:40 - r - INFO: - mode test <class 'str'>
11
- 2023-04-15 22:05:40 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-15 22:05:40 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-15 22:05:40 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-15 22:05:40 - r - INFO: - train_eps 250 <class 'int'>
15
- 2023-04-15 22:05:40 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-15 22:05:40 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-15 22:05:40 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-15 22:05:40 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-15 22:05:40 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-15 22:05:40 - r - INFO: - load_path Train_CartPole-v1_PER_DQN_ray_20230415-215738 <class 'str'>
21
- 2023-04-15 22:05:40 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-15 22:05:40 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-15 22:05:40 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-15 22:05:40 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-15 22:05:40 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-15 22:05:40 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-15 22:05:40 - r - INFO: - hidden_dim 256 <class 'int'>
28
- 2023-04-15 22:05:40 - r - INFO: - gamma 0.99 <class 'float'>
29
- 2023-04-15 22:05:40 - r - INFO: - lr 0.0001 <class 'float'>
30
- 2023-04-15 22:05:40 - r - INFO: - buffer_size 100000 <class 'int'>
31
- 2023-04-15 22:05:40 - r - INFO: - per_alpha 0.6 <class 'float'>
32
- 2023-04-15 22:05:40 - r - INFO: - per_beta 0.4 <class 'float'>
33
- 2023-04-15 22:05:40 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
34
- 2023-04-15 22:05:40 - r - INFO: - per_epsilon 0.01 <class 'float'>
35
- 2023-04-15 22:05:40 - r - INFO: - batch_size 64 <class 'int'>
36
- 2023-04-15 22:05:40 - r - INFO: - target_update 4 <class 'int'>
37
- 2023-04-15 22:05:40 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
38
- 2023-04-15 22:05:40 - r - INFO: - id CartPole-v1 <class 'str'>
39
- 2023-04-15 22:05:40 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540 <class 'str'>
40
- 2023-04-15 22:05:40 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/results <class 'str'>
41
- 2023-04-15 22:05:40 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/logs <class 'str'>
42
- 2023-04-15 22:05:40 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/traj <class 'str'>
43
- 2023-04-15 22:05:40 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/videos <class 'str'>
44
- 2023-04-15 22:05:40 - r - INFO: - ================================================================================
45
- 2023-04-15 22:05:40 - r - INFO: - n_states: 4, n_actions: 2
46
- 2023-04-15 22:05:40 - r - INFO: - Start testing!
47
- 2023-04-15 22:05:40 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
48
- 2023-04-15 22:05:40 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
49
- 2023-04-15 22:05:40 - r - INFO: - Episode: 2/10, Reward: 199.000, Step: 199
50
- 2023-04-15 22:05:40 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
51
- 2023-04-15 22:05:40 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
52
- 2023-04-15 22:05:40 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
53
- 2023-04-15 22:05:40 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
54
- 2023-04-15 22:05:40 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
55
- 2023-04-15 22:05:40 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
56
- 2023-04-15 22:05:41 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
57
- 2023-04-15 22:05:41 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
58
- 2023-04-15 22:05:41 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0efe3ec576afef2311748067e61af0fe6c939f7a2c2a1500001987a5d0092ce3
3
- size 272407
 
 
 
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/learning_curve.png DELETED
Binary file (32.6 kB)
 
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,199.0,199
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/logs/log.txt DELETED
@@ -1,57 +0,0 @@
1
- 2023-05-15 21:19:26 - SimpleLog - INFO: - General Configs:
2
- 2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
3
- 2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type
4
- 2023-05-15 21:19:26 - SimpleLog - INFO: - env_name gym <class 'str'>
5
- 2023-05-15 21:19:26 - SimpleLog - INFO: - algo_name DQN <class 'str'>
6
- 2023-05-15 21:19:26 - SimpleLog - INFO: - mode test <class 'str'>
7
- 2023-05-15 21:19:26 - SimpleLog - INFO: - collect_traj 1 <class 'bool'>
8
- 2023-05-15 21:19:26 - SimpleLog - INFO: - mp_backend single <class 'str'>
9
- 2023-05-15 21:19:26 - SimpleLog - INFO: - n_workers 1 <class 'int'>
10
- 2023-05-15 21:19:26 - SimpleLog - INFO: - seed 1 <class 'int'>
11
- 2023-05-15 21:19:26 - SimpleLog - INFO: - device cpu <class 'str'>
12
- 2023-05-15 21:19:26 - SimpleLog - INFO: - max_episode 10 <class 'int'>
13
- 2023-05-15 21:19:26 - SimpleLog - INFO: - max_step 200 <class 'int'>
14
- 2023-05-15 21:19:26 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
- 2023-05-15 21:19:26 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
- 2023-05-15 21:19:26 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
17
- 2023-05-15 21:19:26 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
18
- 2023-05-15 21:19:26 - SimpleLog - INFO: - show_fig 0 <class 'bool'>
19
- 2023-05-15 21:19:26 - SimpleLog - INFO: - save_fig 1 <class 'bool'>
20
- 2023-05-15 21:19:26 - SimpleLog - INFO: - load_model_step best <class 'str'>
21
- 2023-05-15 21:19:26 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
22
- 2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
23
- 2023-05-15 21:19:26 - SimpleLog - INFO: - Algo Configs:
24
- 2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
25
- 2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type
26
- 2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
27
- 2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
28
- 2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
29
- 2023-05-15 21:19:26 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
30
- 2023-05-15 21:19:26 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
31
- 2023-05-15 21:19:26 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
32
- 2023-05-15 21:19:26 - SimpleLog - INFO: - batch_size 64 <class 'int'>
33
- 2023-05-15 21:19:26 - SimpleLog - INFO: - target_update 4 <class 'int'>
34
- 2023-05-15 21:19:26 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
35
- 2023-05-15 21:19:26 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
36
- 2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
37
- 2023-05-15 21:19:26 - SimpleLog - INFO: - Env Configs:
38
- 2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
39
- 2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type
40
- 2023-05-15 21:19:26 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
41
- 2023-05-15 21:19:26 - SimpleLog - INFO: - render_mode rgb_array <class 'str'>
42
- 2023-05-15 21:19:26 - SimpleLog - INFO: - wrapper None <class 'str'>
43
- 2023-05-15 21:19:26 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
44
- 2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
45
- 2023-05-15 21:19:26 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
46
- 2023-05-15 21:19:26 - SimpleLog - INFO: - Start testing!
47
- 2023-05-15 21:19:26 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
48
- 2023-05-15 21:19:30 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
49
- 2023-05-15 21:19:30 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
50
- 2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
51
- 2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
52
- 2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
53
- 2023-05-15 21:19:32 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
54
- 2023-05-15 21:19:32 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
55
- 2023-05-15 21:19:33 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
56
- 2023-05-15 21:19:33 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
57
- 2023-05-15 21:19:33 - SimpleLog - INFO: - Finish testing! total time consumed: 7.28s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/results/trajs_0.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fae5aa5ceb51833f761621229159f743bbc8e8a6766007136b3f2af48a1a001
3
- size 130746
 
 
 
 
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/model/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:778bf4752bfe3bce34855fa51be3e7fdeb15c8d13d02779f6ba433435fa2fdf4
3
- size 40
 
 
 
 
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/videos/video.gif DELETED

Git LFS Details

  • SHA256: a417ad8def4a6907872c3de9cd2883536a4b41e1c0d36bb98af3830d6eb76739
  • Pointer size: 131 Bytes
  • Size of remote file: 132 kB
ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926 → Test_single_CartPole-v1_PER_DQN_20230518-232330}/config.yaml RENAMED
@@ -1,39 +1,43 @@
1
  general_cfg:
2
- algo_name: DQN
3
- collect_traj: true
4
- device: cpu
5
  env_name: gym
6
  load_checkpoint: true
7
  load_model_step: best
8
- load_path: Train_single_CartPole-v1_DQN_20230515-211721
9
  max_episode: 10
10
  max_step: 200
11
  mode: test
12
  model_save_fre: 500
13
  mp_backend: single
14
- n_workers: 1
 
15
  online_eval: true
16
  online_eval_episode: 10
17
- save_fig: true
18
  seed: 1
19
- show_fig: false
20
  algo_cfg:
21
  batch_size: 64
22
  buffer_size: 100000
23
- buffer_type: REPLAY_QUE
24
- epsilon_decay: 500
25
  epsilon_end: 0.01
26
  epsilon_start: 0.95
27
- gamma: 0.95
28
  lr: 0.0001
 
 
 
 
29
  target_update: 4
30
  value_layers:
31
  - activation: relu
32
- layer_dim:
33
  - 256
34
  layer_type: linear
35
  - activation: relu
36
- layer_dim:
37
  - 256
38
  layer_type: linear
39
  env_cfg:
@@ -41,5 +45,5 @@ env_cfg:
41
  ignore_params:
42
  - wrapper
43
  - ignore_params
44
- render_mode: rgb_array
45
  wrapper: null
 
1
  general_cfg:
2
+ algo_name: PER_DQN
3
+ collect_traj: false
4
+ device: cuda
5
  env_name: gym
6
  load_checkpoint: true
7
  load_model_step: best
8
+ load_path: Train_single_CartPole-v1_PER_DQN_20230518-232215
9
  max_episode: 10
10
  max_step: 200
11
  mode: test
12
  model_save_fre: 500
13
  mp_backend: single
14
+ n_learners: 1
15
+ n_workers: 2
16
  online_eval: true
17
  online_eval_episode: 10
 
18
  seed: 1
19
+ share_buffer: true
20
  algo_cfg:
21
  batch_size: 64
22
  buffer_size: 100000
23
+ buffer_type: PER_QUE
24
+ epsilon_decay: 1000
25
  epsilon_end: 0.01
26
  epsilon_start: 0.95
27
+ gamma: 0.99
28
  lr: 0.0001
29
+ per_alpha: 0.6
30
+ per_beta: 0.4
31
+ per_beta_annealing: 0.001
32
+ per_epsilon: 0.01
33
  target_update: 4
34
  value_layers:
35
  - activation: relu
36
+ layer_size:
37
  - 256
38
  layer_type: linear
39
  - activation: relu
40
+ layer_size:
41
  - 256
42
  layer_type: linear
43
  env_cfg:
 
45
  ignore_params:
46
  - wrapper
47
  - ignore_params
48
+ render_mode: null
49
  wrapper: null
ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/logs/log.txt ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - General Configs:
2
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
3
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type
4
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - algo_name PER_DQN <class 'str'>
6
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - mode test <class 'str'>
7
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - device cuda <class 'str'>
8
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - max_episode 10 <class 'int'>
10
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - mp_backend single <class 'str'>
13
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - n_learners 1 <class 'int'>
15
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - share_buffer 1 <class 'bool'>
16
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
17
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
18
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
19
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
20
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_PER_DQN_20230518-232215 <class 'str'>
21
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - load_model_step best <class 'str'>
22
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
23
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - Algo Configs:
24
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
25
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type
26
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
27
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
28
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_decay 1000 <class 'int'>
29
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
30
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
31
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - buffer_type PER_QUE <class 'str'>
32
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
33
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - per_alpha 0.6 <class 'float'>
34
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - per_beta 0.4 <class 'float'>
35
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - per_beta_annealing 0.001 <class 'float'>
36
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - per_epsilon 0.01 <class 'float'>
37
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - batch_size 64 <class 'int'>
38
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - target_update 4 <class 'int'>
39
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
40
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
41
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - Env Configs:
42
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
43
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type
44
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
45
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - render_mode None <class 'str'>
46
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - wrapper None <class 'str'>
47
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
48
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
49
+ 2023-05-18 23:23:30 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
50
+ 2023-05-18 23:23:31 - SimpleLog - INFO: - Start testing!
51
+ 2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
52
+ 2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
53
+ 2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
54
+ 2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
55
+ 2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
56
+ 2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
57
+ 2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
58
+ 2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
59
+ 2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
60
+ 2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
61
+ 2023-05-18 23:23:33 - SimpleLog - INFO: - Finish testing! total time consumed: 2.60s
ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/interact/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/interact/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f5348239a851416ae5ead991b86721f16f71d5541ce3e49671133bb408edec2
3
  size 1056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc3ae7a23e9f72498a6b6190433f9925fc02af039e03defef47a6fda915a140c
3
  size 1056
ClassControl/CartPole-v1/{Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs/events.out.tfevents.1680275682.DESKTOP-H34HQIQ.290376.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/model/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.1} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5157a74a9c33c3bb078d1509c6bd0013251490c4413c68decd709494f31a8d60
3
  size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70fea5313fdd99c138f919e97e5556cb1d9e4370727b560fe1eeb6469d023588
3
  size 40
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/config.yaml DELETED
@@ -1,48 +0,0 @@
1
- general_cfg:
2
- algo_name: PER_DQN
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_PER_DQN
9
- max_steps: 200
10
- mode: train
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 200
18
- wrapper: null
19
- algo_cfg:
20
- batch_size: 64
21
- buffer_size: 100000
22
- epsilon_decay: 500
23
- epsilon_end: 0.01
24
- epsilon_start: 0.95
25
- gamma: 0.99
26
- hidden_dim: 256
27
- lr: 0.0001
28
- per_alpha: 0.6
29
- per_beta: 0.4
30
- per_beta_annealing: 0.001
31
- per_epsilon: 0.01
32
- target_update: 4
33
- value_layers:
34
- - activation: relu
35
- layer_dim:
36
- - n_states
37
- - 256
38
- layer_type: linear
39
- - activation: relu
40
- layer_dim:
41
- - 256
42
- - 256
43
- layer_type: linear
44
- - activation: none
45
- layer_dim:
46
- - 256
47
- - n_actions
48
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/logs/log.txt DELETED
@@ -1,260 +0,0 @@
1
- 2023-03-31 23:37:49 - r - INFO: - Hyperparameters:
2
- 2023-03-31 23:37:49 - r - INFO: - ================================================================================
3
- 2023-03-31 23:37:49 - r - INFO: - Name Value Type
4
- 2023-03-31 23:37:49 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-03-31 23:37:49 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-03-31 23:37:49 - r - INFO: - wrapper None <class 'str'>
7
- 2023-03-31 23:37:49 - r - INFO: - render 0 <class 'bool'>
8
- 2023-03-31 23:37:49 - r - INFO: - algo_name PER_DQN <class 'str'>
9
- 2023-03-31 23:37:49 - r - INFO: - mode train <class 'str'>
10
- 2023-03-31 23:37:49 - r - INFO: - seed 1 <class 'int'>
11
- 2023-03-31 23:37:49 - r - INFO: - device cuda <class 'str'>
12
- 2023-03-31 23:37:49 - r - INFO: - train_eps 200 <class 'int'>
13
- 2023-03-31 23:37:49 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-03-31 23:37:49 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-03-31 23:37:49 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-03-31 23:37:49 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-03-31 23:37:49 - r - INFO: - load_checkpoint 0 <class 'bool'>
18
- 2023-03-31 23:37:49 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
19
- 2023-03-31 23:37:49 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-03-31 23:37:49 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-03-31 23:37:49 - r - INFO: - epsilon_start 0.95 <class 'float'>
22
- 2023-03-31 23:37:49 - r - INFO: - epsilon_end 0.01 <class 'float'>
23
- 2023-03-31 23:37:49 - r - INFO: - epsilon_decay 500 <class 'int'>
24
- 2023-03-31 23:37:49 - r - INFO: - hidden_dim 256 <class 'int'>
25
- 2023-03-31 23:37:49 - r - INFO: - gamma 0.99 <class 'float'>
26
- 2023-03-31 23:37:49 - r - INFO: - lr 0.0001 <class 'float'>
27
- 2023-03-31 23:37:49 - r - INFO: - buffer_size 100000 <class 'int'>
28
- 2023-03-31 23:37:49 - r - INFO: - per_alpha 0.6 <class 'float'>
29
- 2023-03-31 23:37:49 - r - INFO: - per_beta 0.4 <class 'float'>
30
- 2023-03-31 23:37:49 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
31
- 2023-03-31 23:37:49 - r - INFO: - per_epsilon 0.01 <class 'float'>
32
- 2023-03-31 23:37:49 - r - INFO: - batch_size 64 <class 'int'>
33
- 2023-03-31 23:37:49 - r - INFO: - target_update 4 <class 'int'>
34
- 2023-03-31 23:37:49 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
35
- 2023-03-31 23:37:49 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749 <class 'str'>
36
- 2023-03-31 23:37:49 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/results <class 'str'>
37
- 2023-03-31 23:37:49 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/logs <class 'str'>
38
- 2023-03-31 23:37:49 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/traj <class 'str'>
39
- 2023-03-31 23:37:49 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/tb_logs <class 'str'>
40
- 2023-03-31 23:37:49 - r - INFO: - ================================================================================
41
- 2023-03-31 23:37:49 - r - INFO: - n_states: 4, n_actions: 2
42
- 2023-03-31 23:37:50 - r - INFO: - Start training!
43
- 2023-03-31 23:37:50 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda
44
- 2023-03-31 23:37:51 - r - INFO: - Episode: 1/200, Reward: 16.000, Step: 16
45
- 2023-03-31 23:37:51 - r - INFO: - Episode: 2/200, Reward: 15.000, Step: 15
46
- 2023-03-31 23:37:51 - r - INFO: - Episode: 3/200, Reward: 25.000, Step: 25
47
- 2023-03-31 23:37:51 - r - INFO: - Episode: 4/200, Reward: 16.000, Step: 16
48
- 2023-03-31 23:37:51 - r - INFO: - Episode: 5/200, Reward: 20.000, Step: 20
49
- 2023-03-31 23:37:51 - r - INFO: - Current episode 5 has the best eval reward: 9.000
50
- 2023-03-31 23:37:51 - r - INFO: - Episode: 6/200, Reward: 10.000, Step: 10
51
- 2023-03-31 23:37:51 - r - INFO: - Episode: 7/200, Reward: 24.000, Step: 24
52
- 2023-03-31 23:37:51 - r - INFO: - Episode: 8/200, Reward: 20.000, Step: 20
53
- 2023-03-31 23:37:51 - r - INFO: - Episode: 9/200, Reward: 20.000, Step: 20
54
- 2023-03-31 23:37:51 - r - INFO: - Episode: 10/200, Reward: 25.000, Step: 25
55
- 2023-03-31 23:37:51 - r - INFO: - Current episode 10 has the best eval reward: 9.100
56
- 2023-03-31 23:37:51 - r - INFO: - Episode: 11/200, Reward: 9.000, Step: 9
57
- 2023-03-31 23:37:51 - r - INFO: - Episode: 12/200, Reward: 23.000, Step: 23
58
- 2023-03-31 23:37:51 - r - INFO: - Episode: 13/200, Reward: 14.000, Step: 14
59
- 2023-03-31 23:37:51 - r - INFO: - Episode: 14/200, Reward: 12.000, Step: 12
60
- 2023-03-31 23:37:51 - r - INFO: - Episode: 15/200, Reward: 11.000, Step: 11
61
- 2023-03-31 23:37:51 - r - INFO: - Episode: 16/200, Reward: 17.000, Step: 17
62
- 2023-03-31 23:37:51 - r - INFO: - Episode: 17/200, Reward: 10.000, Step: 10
63
- 2023-03-31 23:37:51 - r - INFO: - Episode: 18/200, Reward: 17.000, Step: 17
64
- 2023-03-31 23:37:51 - r - INFO: - Episode: 19/200, Reward: 10.000, Step: 10
65
- 2023-03-31 23:37:51 - r - INFO: - Episode: 20/200, Reward: 10.000, Step: 10
66
- 2023-03-31 23:37:52 - r - INFO: - Episode: 21/200, Reward: 22.000, Step: 22
67
- 2023-03-31 23:37:52 - r - INFO: - Episode: 22/200, Reward: 18.000, Step: 18
68
- 2023-03-31 23:37:52 - r - INFO: - Episode: 23/200, Reward: 13.000, Step: 13
69
- 2023-03-31 23:37:52 - r - INFO: - Episode: 24/200, Reward: 13.000, Step: 13
70
- 2023-03-31 23:37:52 - r - INFO: - Episode: 25/200, Reward: 9.000, Step: 9
71
- 2023-03-31 23:37:52 - r - INFO: - Current episode 25 has the best eval reward: 9.600
72
- 2023-03-31 23:37:52 - r - INFO: - Episode: 26/200, Reward: 10.000, Step: 10
73
- 2023-03-31 23:37:52 - r - INFO: - Episode: 27/200, Reward: 13.000, Step: 13
74
- 2023-03-31 23:37:52 - r - INFO: - Episode: 28/200, Reward: 11.000, Step: 11
75
- 2023-03-31 23:37:52 - r - INFO: - Episode: 29/200, Reward: 10.000, Step: 10
76
- 2023-03-31 23:37:52 - r - INFO: - Episode: 30/200, Reward: 12.000, Step: 12
77
- 2023-03-31 23:37:52 - r - INFO: - Episode: 31/200, Reward: 14.000, Step: 14
78
- 2023-03-31 23:37:52 - r - INFO: - Episode: 32/200, Reward: 11.000, Step: 11
79
- 2023-03-31 23:37:52 - r - INFO: - Episode: 33/200, Reward: 18.000, Step: 18
80
- 2023-03-31 23:37:52 - r - INFO: - Episode: 34/200, Reward: 10.000, Step: 10
81
- 2023-03-31 23:37:52 - r - INFO: - Episode: 35/200, Reward: 10.000, Step: 10
82
- 2023-03-31 23:37:52 - r - INFO: - Episode: 36/200, Reward: 8.000, Step: 8
83
- 2023-03-31 23:37:52 - r - INFO: - Episode: 37/200, Reward: 12.000, Step: 12
84
- 2023-03-31 23:37:52 - r - INFO: - Episode: 38/200, Reward: 10.000, Step: 10
85
- 2023-03-31 23:37:52 - r - INFO: - Episode: 39/200, Reward: 11.000, Step: 11
86
- 2023-03-31 23:37:52 - r - INFO: - Episode: 40/200, Reward: 10.000, Step: 10
87
- 2023-03-31 23:37:53 - r - INFO: - Episode: 41/200, Reward: 9.000, Step: 9
88
- 2023-03-31 23:37:53 - r - INFO: - Episode: 42/200, Reward: 12.000, Step: 12
89
- 2023-03-31 23:37:53 - r - INFO: - Episode: 43/200, Reward: 9.000, Step: 9
90
- 2023-03-31 23:37:53 - r - INFO: - Episode: 44/200, Reward: 13.000, Step: 13
91
- 2023-03-31 23:37:53 - r - INFO: - Episode: 45/200, Reward: 13.000, Step: 13
92
- 2023-03-31 23:37:53 - r - INFO: - Episode: 46/200, Reward: 12.000, Step: 12
93
- 2023-03-31 23:37:53 - r - INFO: - Episode: 47/200, Reward: 10.000, Step: 10
94
- 2023-03-31 23:37:53 - r - INFO: - Episode: 48/200, Reward: 10.000, Step: 10
95
- 2023-03-31 23:37:53 - r - INFO: - Episode: 49/200, Reward: 10.000, Step: 10
96
- 2023-03-31 23:37:53 - r - INFO: - Episode: 50/200, Reward: 13.000, Step: 13
97
- 2023-03-31 23:37:53 - r - INFO: - Episode: 51/200, Reward: 10.000, Step: 10
98
- 2023-03-31 23:37:53 - r - INFO: - Episode: 52/200, Reward: 15.000, Step: 15
99
- 2023-03-31 23:37:53 - r - INFO: - Episode: 53/200, Reward: 18.000, Step: 18
100
- 2023-03-31 23:37:53 - r - INFO: - Episode: 54/200, Reward: 18.000, Step: 18
101
- 2023-03-31 23:37:53 - r - INFO: - Episode: 55/200, Reward: 16.000, Step: 16
102
- 2023-03-31 23:37:53 - r - INFO: - Current episode 55 has the best eval reward: 28.000
103
- 2023-03-31 23:37:53 - r - INFO: - Episode: 56/200, Reward: 47.000, Step: 47
104
- 2023-03-31 23:37:54 - r - INFO: - Episode: 57/200, Reward: 87.000, Step: 87
105
- 2023-03-31 23:37:54 - r - INFO: - Episode: 58/200, Reward: 20.000, Step: 20
106
- 2023-03-31 23:37:54 - r - INFO: - Episode: 59/200, Reward: 47.000, Step: 47
107
- 2023-03-31 23:37:54 - r - INFO: - Episode: 60/200, Reward: 17.000, Step: 17
108
- 2023-03-31 23:37:54 - r - INFO: - Episode: 61/200, Reward: 37.000, Step: 37
109
- 2023-03-31 23:37:54 - r - INFO: - Episode: 62/200, Reward: 43.000, Step: 43
110
- 2023-03-31 23:37:54 - r - INFO: - Episode: 63/200, Reward: 33.000, Step: 33
111
- 2023-03-31 23:37:55 - r - INFO: - Episode: 64/200, Reward: 18.000, Step: 18
112
- 2023-03-31 23:37:55 - r - INFO: - Episode: 65/200, Reward: 29.000, Step: 29
113
- 2023-03-31 23:37:55 - r - INFO: - Current episode 65 has the best eval reward: 30.700
114
- 2023-03-31 23:37:55 - r - INFO: - Episode: 66/200, Reward: 30.000, Step: 30
115
- 2023-03-31 23:37:55 - r - INFO: - Episode: 67/200, Reward: 23.000, Step: 23
116
- 2023-03-31 23:37:55 - r - INFO: - Episode: 68/200, Reward: 26.000, Step: 26
117
- 2023-03-31 23:37:55 - r - INFO: - Episode: 69/200, Reward: 18.000, Step: 18
118
- 2023-03-31 23:37:55 - r - INFO: - Episode: 70/200, Reward: 20.000, Step: 20
119
- 2023-03-31 23:37:55 - r - INFO: - Episode: 71/200, Reward: 26.000, Step: 26
120
- 2023-03-31 23:37:55 - r - INFO: - Episode: 72/200, Reward: 16.000, Step: 16
121
- 2023-03-31 23:37:55 - r - INFO: - Episode: 73/200, Reward: 23.000, Step: 23
122
- 2023-03-31 23:37:56 - r - INFO: - Episode: 74/200, Reward: 30.000, Step: 30
123
- 2023-03-31 23:37:56 - r - INFO: - Episode: 75/200, Reward: 23.000, Step: 23
124
- 2023-03-31 23:37:56 - r - INFO: - Episode: 76/200, Reward: 26.000, Step: 26
125
- 2023-03-31 23:37:56 - r - INFO: - Episode: 77/200, Reward: 34.000, Step: 34
126
- 2023-03-31 23:37:56 - r - INFO: - Episode: 78/200, Reward: 29.000, Step: 29
127
- 2023-03-31 23:37:56 - r - INFO: - Episode: 79/200, Reward: 32.000, Step: 32
128
- 2023-03-31 23:37:56 - r - INFO: - Episode: 80/200, Reward: 23.000, Step: 23
129
- 2023-03-31 23:37:57 - r - INFO: - Episode: 81/200, Reward: 32.000, Step: 32
130
- 2023-03-31 23:37:57 - r - INFO: - Episode: 82/200, Reward: 72.000, Step: 72
131
- 2023-03-31 23:37:57 - r - INFO: - Episode: 83/200, Reward: 105.000, Step: 105
132
- 2023-03-31 23:37:58 - r - INFO: - Episode: 84/200, Reward: 63.000, Step: 63
133
- 2023-03-31 23:37:58 - r - INFO: - Episode: 85/200, Reward: 119.000, Step: 119
134
- 2023-03-31 23:37:59 - r - INFO: - Current episode 85 has the best eval reward: 86.500
135
- 2023-03-31 23:37:59 - r - INFO: - Episode: 86/200, Reward: 52.000, Step: 52
136
- 2023-03-31 23:37:59 - r - INFO: - Episode: 87/200, Reward: 155.000, Step: 155
137
- 2023-03-31 23:38:00 - r - INFO: - Episode: 88/200, Reward: 79.000, Step: 79
138
- 2023-03-31 23:38:00 - r - INFO: - Episode: 89/200, Reward: 44.000, Step: 44
139
- 2023-03-31 23:38:00 - r - INFO: - Episode: 90/200, Reward: 140.000, Step: 140
140
- 2023-03-31 23:38:01 - r - INFO: - Episode: 91/200, Reward: 86.000, Step: 86
141
- 2023-03-31 23:38:01 - r - INFO: - Episode: 92/200, Reward: 183.000, Step: 183
142
- 2023-03-31 23:38:02 - r - INFO: - Episode: 93/200, Reward: 112.000, Step: 112
143
- 2023-03-31 23:38:03 - r - INFO: - Episode: 94/200, Reward: 190.000, Step: 190
144
- 2023-03-31 23:38:03 - r - INFO: - Episode: 95/200, Reward: 200.000, Step: 200
145
- 2023-03-31 23:38:04 - r - INFO: - Current episode 95 has the best eval reward: 164.200
146
- 2023-03-31 23:38:05 - r - INFO: - Episode: 96/200, Reward: 157.000, Step: 157
147
- 2023-03-31 23:38:05 - r - INFO: - Episode: 97/200, Reward: 200.000, Step: 200
148
- 2023-03-31 23:38:06 - r - INFO: - Episode: 98/200, Reward: 200.000, Step: 200
149
- 2023-03-31 23:38:07 - r - INFO: - Episode: 99/200, Reward: 200.000, Step: 200
150
- 2023-03-31 23:38:08 - r - INFO: - Episode: 100/200, Reward: 200.000, Step: 200
151
- 2023-03-31 23:38:08 - r - INFO: - Current episode 100 has the best eval reward: 200.000
152
- 2023-03-31 23:38:09 - r - INFO: - Episode: 101/200, Reward: 200.000, Step: 200
153
- 2023-03-31 23:38:10 - r - INFO: - Episode: 102/200, Reward: 200.000, Step: 200
154
- 2023-03-31 23:38:11 - r - INFO: - Episode: 103/200, Reward: 200.000, Step: 200
155
- 2023-03-31 23:38:12 - r - INFO: - Episode: 104/200, Reward: 200.000, Step: 200
156
- 2023-03-31 23:38:12 - r - INFO: - Episode: 105/200, Reward: 200.000, Step: 200
157
- 2023-03-31 23:38:13 - r - INFO: - Current episode 105 has the best eval reward: 200.000
158
- 2023-03-31 23:38:14 - r - INFO: - Episode: 106/200, Reward: 200.000, Step: 200
159
- 2023-03-31 23:38:15 - r - INFO: - Episode: 107/200, Reward: 200.000, Step: 200
160
- 2023-03-31 23:38:16 - r - INFO: - Episode: 108/200, Reward: 200.000, Step: 200
161
- 2023-03-31 23:38:17 - r - INFO: - Episode: 109/200, Reward: 200.000, Step: 200
162
- 2023-03-31 23:38:17 - r - INFO: - Episode: 110/200, Reward: 200.000, Step: 200
163
- 2023-03-31 23:38:19 - r - INFO: - Episode: 111/200, Reward: 200.000, Step: 200
164
- 2023-03-31 23:38:20 - r - INFO: - Episode: 112/200, Reward: 200.000, Step: 200
165
- 2023-03-31 23:38:21 - r - INFO: - Episode: 113/200, Reward: 200.000, Step: 200
166
- 2023-03-31 23:38:22 - r - INFO: - Episode: 114/200, Reward: 200.000, Step: 200
167
- 2023-03-31 23:38:23 - r - INFO: - Episode: 115/200, Reward: 190.000, Step: 190
168
- 2023-03-31 23:38:24 - r - INFO: - Episode: 116/200, Reward: 200.000, Step: 200
169
- 2023-03-31 23:38:25 - r - INFO: - Episode: 117/200, Reward: 200.000, Step: 200
170
- 2023-03-31 23:38:26 - r - INFO: - Episode: 118/200, Reward: 200.000, Step: 200
171
- 2023-03-31 23:38:27 - r - INFO: - Episode: 119/200, Reward: 200.000, Step: 200
172
- 2023-03-31 23:38:28 - r - INFO: - Episode: 120/200, Reward: 200.000, Step: 200
173
- 2023-03-31 23:38:28 - r - INFO: - Current episode 120 has the best eval reward: 200.000
174
- 2023-03-31 23:38:29 - r - INFO: - Episode: 121/200, Reward: 200.000, Step: 200
175
- 2023-03-31 23:38:30 - r - INFO: - Episode: 122/200, Reward: 200.000, Step: 200
176
- 2023-03-31 23:38:31 - r - INFO: - Episode: 123/200, Reward: 200.000, Step: 200
177
- 2023-03-31 23:38:32 - r - INFO: - Episode: 124/200, Reward: 198.000, Step: 198
178
- 2023-03-31 23:38:33 - r - INFO: - Episode: 125/200, Reward: 200.000, Step: 200
179
- 2023-03-31 23:38:35 - r - INFO: - Episode: 126/200, Reward: 188.000, Step: 188
180
- 2023-03-31 23:38:36 - r - INFO: - Episode: 127/200, Reward: 200.000, Step: 200
181
- 2023-03-31 23:38:37 - r - INFO: - Episode: 128/200, Reward: 200.000, Step: 200
182
- 2023-03-31 23:38:38 - r - INFO: - Episode: 129/200, Reward: 175.000, Step: 175
183
- 2023-03-31 23:38:39 - r - INFO: - Episode: 130/200, Reward: 200.000, Step: 200
184
- 2023-03-31 23:38:41 - r - INFO: - Episode: 131/200, Reward: 200.000, Step: 200
185
- 2023-03-31 23:38:42 - r - INFO: - Episode: 132/200, Reward: 172.000, Step: 172
186
- 2023-03-31 23:38:43 - r - INFO: - Episode: 133/200, Reward: 200.000, Step: 200
187
- 2023-03-31 23:38:44 - r - INFO: - Episode: 134/200, Reward: 200.000, Step: 200
188
- 2023-03-31 23:38:45 - r - INFO: - Episode: 135/200, Reward: 179.000, Step: 179
189
- 2023-03-31 23:38:46 - r - INFO: - Episode: 136/200, Reward: 200.000, Step: 200
190
- 2023-03-31 23:38:47 - r - INFO: - Episode: 137/200, Reward: 200.000, Step: 200
191
- 2023-03-31 23:38:49 - r - INFO: - Episode: 138/200, Reward: 200.000, Step: 200
192
- 2023-03-31 23:38:49 - r - INFO: - Episode: 139/200, Reward: 161.000, Step: 161
193
- 2023-03-31 23:38:51 - r - INFO: - Episode: 140/200, Reward: 200.000, Step: 200
194
- 2023-03-31 23:38:52 - r - INFO: - Episode: 141/200, Reward: 150.000, Step: 150
195
- 2023-03-31 23:38:53 - r - INFO: - Episode: 142/200, Reward: 200.000, Step: 200
196
- 2023-03-31 23:38:54 - r - INFO: - Episode: 143/200, Reward: 200.000, Step: 200
197
- 2023-03-31 23:38:55 - r - INFO: - Episode: 144/200, Reward: 170.000, Step: 170
198
- 2023-03-31 23:38:56 - r - INFO: - Episode: 145/200, Reward: 200.000, Step: 200
199
- 2023-03-31 23:38:58 - r - INFO: - Episode: 146/200, Reward: 200.000, Step: 200
200
- 2023-03-31 23:38:59 - r - INFO: - Episode: 147/200, Reward: 160.000, Step: 160
201
- 2023-03-31 23:39:00 - r - INFO: - Episode: 148/200, Reward: 160.000, Step: 160
202
- 2023-03-31 23:39:01 - r - INFO: - Episode: 149/200, Reward: 200.000, Step: 200
203
- 2023-03-31 23:39:02 - r - INFO: - Episode: 150/200, Reward: 200.000, Step: 200
204
- 2023-03-31 23:39:04 - r - INFO: - Episode: 151/200, Reward: 177.000, Step: 177
205
- 2023-03-31 23:39:05 - r - INFO: - Episode: 152/200, Reward: 193.000, Step: 193
206
- 2023-03-31 23:39:06 - r - INFO: - Episode: 153/200, Reward: 182.000, Step: 182
207
- 2023-03-31 23:39:08 - r - INFO: - Episode: 154/200, Reward: 176.000, Step: 176
208
- 2023-03-31 23:39:09 - r - INFO: - Episode: 155/200, Reward: 200.000, Step: 200
209
- 2023-03-31 23:39:11 - r - INFO: - Episode: 156/200, Reward: 200.000, Step: 200
210
- 2023-03-31 23:39:12 - r - INFO: - Episode: 157/200, Reward: 171.000, Step: 171
211
- 2023-03-31 23:39:13 - r - INFO: - Episode: 158/200, Reward: 192.000, Step: 192
212
- 2023-03-31 23:39:14 - r - INFO: - Episode: 159/200, Reward: 200.000, Step: 200
213
- 2023-03-31 23:39:15 - r - INFO: - Episode: 160/200, Reward: 179.000, Step: 179
214
- 2023-03-31 23:39:17 - r - INFO: - Episode: 161/200, Reward: 177.000, Step: 177
215
- 2023-03-31 23:39:18 - r - INFO: - Episode: 162/200, Reward: 199.000, Step: 199
216
- 2023-03-31 23:39:19 - r - INFO: - Episode: 163/200, Reward: 200.000, Step: 200
217
- 2023-03-31 23:39:21 - r - INFO: - Episode: 164/200, Reward: 186.000, Step: 186
218
- 2023-03-31 23:39:22 - r - INFO: - Episode: 165/200, Reward: 178.000, Step: 178
219
- 2023-03-31 23:39:23 - r - INFO: - Episode: 166/200, Reward: 200.000, Step: 200
220
- 2023-03-31 23:39:25 - r - INFO: - Episode: 167/200, Reward: 200.000, Step: 200
221
- 2023-03-31 23:39:26 - r - INFO: - Episode: 168/200, Reward: 200.000, Step: 200
222
- 2023-03-31 23:39:27 - r - INFO: - Episode: 169/200, Reward: 179.000, Step: 179
223
- 2023-03-31 23:39:29 - r - INFO: - Episode: 170/200, Reward: 200.000, Step: 200
224
- 2023-03-31 23:39:31 - r - INFO: - Episode: 171/200, Reward: 200.000, Step: 200
225
- 2023-03-31 23:39:32 - r - INFO: - Episode: 172/200, Reward: 200.000, Step: 200
226
- 2023-03-31 23:39:34 - r - INFO: - Episode: 173/200, Reward: 200.000, Step: 200
227
- 2023-03-31 23:39:35 - r - INFO: - Episode: 174/200, Reward: 200.000, Step: 200
228
- 2023-03-31 23:39:36 - r - INFO: - Episode: 175/200, Reward: 200.000, Step: 200
229
- 2023-03-31 23:39:37 - r - INFO: - Current episode 175 has the best eval reward: 200.000
230
- 2023-03-31 23:39:38 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200
231
- 2023-03-31 23:39:40 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200
232
- 2023-03-31 23:39:41 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200
233
- 2023-03-31 23:39:43 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200
234
- 2023-03-31 23:39:44 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200
235
- 2023-03-31 23:39:45 - r - INFO: - Current episode 180 has the best eval reward: 200.000
236
- 2023-03-31 23:39:46 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200
237
- 2023-03-31 23:39:47 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200
238
- 2023-03-31 23:39:49 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200
239
- 2023-03-31 23:39:50 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200
240
- 2023-03-31 23:39:52 - r - INFO: - Episode: 185/200, Reward: 200.000, Step: 200
241
- 2023-03-31 23:39:52 - r - INFO: - Current episode 185 has the best eval reward: 200.000
242
- 2023-03-31 23:39:54 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200
243
- 2023-03-31 23:39:55 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200
244
- 2023-03-31 23:39:57 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200
245
- 2023-03-31 23:39:58 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200
246
- 2023-03-31 23:40:00 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200
247
- 2023-03-31 23:40:00 - r - INFO: - Current episode 190 has the best eval reward: 200.000
248
- 2023-03-31 23:40:02 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200
249
- 2023-03-31 23:40:03 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200
250
- 2023-03-31 23:40:05 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200
251
- 2023-03-31 23:40:06 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200
252
- 2023-03-31 23:40:08 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200
253
- 2023-03-31 23:40:09 - r - INFO: - Current episode 195 has the best eval reward: 200.000
254
- 2023-03-31 23:40:10 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200
255
- 2023-03-31 23:40:13 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200
256
- 2023-03-31 23:40:17 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200
257
- 2023-03-31 23:40:24 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200
258
- 2023-03-31 23:40:29 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200
259
- 2023-03-31 23:40:32 - r - INFO: - Current episode 200 has the best eval reward: 200.000
260
- 2023-03-31 23:40:32 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4ba22dbfbe3211e48c45027f9c4efb9981cdf6ddbd972b57201fb68ca90d2fd
3
- size 272471
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/learning_curve.png DELETED
Binary file (50.7 kB)
 
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/res.csv DELETED
@@ -1,201 +0,0 @@
1
- episodes,rewards,steps
2
- 0,16.0,16
3
- 1,15.0,15
4
- 2,25.0,25
5
- 3,16.0,16
6
- 4,20.0,20
7
- 5,10.0,10
8
- 6,24.0,24
9
- 7,20.0,20
10
- 8,20.0,20
11
- 9,25.0,25
12
- 10,9.0,9
13
- 11,23.0,23
14
- 12,14.0,14
15
- 13,12.0,12
16
- 14,11.0,11
17
- 15,17.0,17
18
- 16,10.0,10
19
- 17,17.0,17
20
- 18,10.0,10
21
- 19,10.0,10
22
- 20,22.0,22
23
- 21,18.0,18
24
- 22,13.0,13
25
- 23,13.0,13
26
- 24,9.0,9
27
- 25,10.0,10
28
- 26,13.0,13
29
- 27,11.0,11
30
- 28,10.0,10
31
- 29,12.0,12
32
- 30,14.0,14
33
- 31,11.0,11
34
- 32,18.0,18
35
- 33,10.0,10
36
- 34,10.0,10
37
- 35,8.0,8
38
- 36,12.0,12
39
- 37,10.0,10
40
- 38,11.0,11
41
- 39,10.0,10
42
- 40,9.0,9
43
- 41,12.0,12
44
- 42,9.0,9
45
- 43,13.0,13
46
- 44,13.0,13
47
- 45,12.0,12
48
- 46,10.0,10
49
- 47,10.0,10
50
- 48,10.0,10
51
- 49,13.0,13
52
- 50,10.0,10
53
- 51,15.0,15
54
- 52,18.0,18
55
- 53,18.0,18
56
- 54,16.0,16
57
- 55,47.0,47
58
- 56,87.0,87
59
- 57,20.0,20
60
- 58,47.0,47
61
- 59,17.0,17
62
- 60,37.0,37
63
- 61,43.0,43
64
- 62,33.0,33
65
- 63,18.0,18
66
- 64,29.0,29
67
- 65,30.0,30
68
- 66,23.0,23
69
- 67,26.0,26
70
- 68,18.0,18
71
- 69,20.0,20
72
- 70,26.0,26
73
- 71,16.0,16
74
- 72,23.0,23
75
- 73,30.0,30
76
- 74,23.0,23
77
- 75,26.0,26
78
- 76,34.0,34
79
- 77,29.0,29
80
- 78,32.0,32
81
- 79,23.0,23
82
- 80,32.0,32
83
- 81,72.0,72
84
- 82,105.0,105
85
- 83,63.0,63
86
- 84,119.0,119
87
- 85,52.0,52
88
- 86,155.0,155
89
- 87,79.0,79
90
- 88,44.0,44
91
- 89,140.0,140
92
- 90,86.0,86
93
- 91,183.0,183
94
- 92,112.0,112
95
- 93,190.0,190
96
- 94,200.0,200
97
- 95,157.0,157
98
- 96,200.0,200
99
- 97,200.0,200
100
- 98,200.0,200
101
- 99,200.0,200
102
- 100,200.0,200
103
- 101,200.0,200
104
- 102,200.0,200
105
- 103,200.0,200
106
- 104,200.0,200
107
- 105,200.0,200
108
- 106,200.0,200
109
- 107,200.0,200
110
- 108,200.0,200
111
- 109,200.0,200
112
- 110,200.0,200
113
- 111,200.0,200
114
- 112,200.0,200
115
- 113,200.0,200
116
- 114,190.0,190
117
- 115,200.0,200
118
- 116,200.0,200
119
- 117,200.0,200
120
- 118,200.0,200
121
- 119,200.0,200
122
- 120,200.0,200
123
- 121,200.0,200
124
- 122,200.0,200
125
- 123,198.0,198
126
- 124,200.0,200
127
- 125,188.0,188
128
- 126,200.0,200
129
- 127,200.0,200
130
- 128,175.0,175
131
- 129,200.0,200
132
- 130,200.0,200
133
- 131,172.0,172
134
- 132,200.0,200
135
- 133,200.0,200
136
- 134,179.0,179
137
- 135,200.0,200
138
- 136,200.0,200
139
- 137,200.0,200
140
- 138,161.0,161
141
- 139,200.0,200
142
- 140,150.0,150
143
- 141,200.0,200
144
- 142,200.0,200
145
- 143,170.0,170
146
- 144,200.0,200
147
- 145,200.0,200
148
- 146,160.0,160
149
- 147,160.0,160
150
- 148,200.0,200
151
- 149,200.0,200
152
- 150,177.0,177
153
- 151,193.0,193
154
- 152,182.0,182
155
- 153,176.0,176
156
- 154,200.0,200
157
- 155,200.0,200
158
- 156,171.0,171
159
- 157,192.0,192
160
- 158,200.0,200
161
- 159,179.0,179
162
- 160,177.0,177
163
- 161,199.0,199
164
- 162,200.0,200
165
- 163,186.0,186
166
- 164,178.0,178
167
- 165,200.0,200
168
- 166,200.0,200
169
- 167,200.0,200
170
- 168,179.0,179
171
- 169,200.0,200
172
- 170,200.0,200
173
- 171,200.0,200
174
- 172,200.0,200
175
- 173,200.0,200
176
- 174,200.0,200
177
- 175,200.0,200
178
- 176,200.0,200
179
- 177,200.0,200
180
- 178,200.0,200
181
- 179,200.0,200
182
- 180,200.0,200
183
- 181,200.0,200
184
- 182,200.0,200
185
- 183,200.0,200
186
- 184,200.0,200
187
- 185,200.0,200
188
- 186,200.0,200
189
- 187,200.0,200
190
- 188,200.0,200
191
- 189,200.0,200
192
- 190,200.0,200
193
- 191,200.0,200
194
- 192,200.0,200
195
- 193,200.0,200
196
- 194,200.0,200
197
- 195,200.0,200
198
- 196,200.0,200
199
- 197,200.0,200
200
- 198,200.0,200
201
- 199,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/tb_logs/events.out.tfevents.1680277069.DESKTOP-H34HQIQ.305216.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ead3d7b1b3efd92eecfb7f314b1922f372c92614db7819dbfa6e06770b12d37
3
- size 40
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/config.yaml DELETED
@@ -1,48 +0,0 @@
1
- general_cfg:
2
- algo_name: PER_DQN
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_PER_DQN
9
- max_steps: 200
10
- mode: train
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 200
18
- wrapper: null
19
- algo_cfg:
20
- batch_size: 64
21
- buffer_size: 100000
22
- epsilon_decay: 500
23
- epsilon_end: 0.01
24
- epsilon_start: 0.95
25
- gamma: 0.99
26
- hidden_dim: 256
27
- lr: 0.0001
28
- per_alpha: 0.6
29
- per_beta: 0.4
30
- per_beta_annealing: 0.001
31
- per_epsilon: 0.01
32
- target_update: 4
33
- value_layers:
34
- - activation: relu
35
- layer_dim:
36
- - n_states
37
- - 256
38
- layer_type: linear
39
- - activation: relu
40
- layer_dim:
41
- - 256
42
- - 256
43
- layer_type: linear
44
- - activation: none
45
- layer_dim:
46
- - 256
47
- - n_actions
48
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/logs/log.txt DELETED
@@ -1,267 +0,0 @@
1
- 2023-03-31 22:58:15 - r - INFO: - Hyperparameters:
2
- 2023-03-31 22:58:15 - r - INFO: - ================================================================================
3
- 2023-03-31 22:58:15 - r - INFO: - Name Value Type
4
- 2023-03-31 22:58:15 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-03-31 22:58:15 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-03-31 22:58:15 - r - INFO: - wrapper None <class 'str'>
7
- 2023-03-31 22:58:15 - r - INFO: - render 0 <class 'bool'>
8
- 2023-03-31 22:58:15 - r - INFO: - algo_name PER_DQN <class 'str'>
9
- 2023-03-31 22:58:15 - r - INFO: - mode train <class 'str'>
10
- 2023-03-31 22:58:15 - r - INFO: - seed 1 <class 'int'>
11
- 2023-03-31 22:58:15 - r - INFO: - device cuda <class 'str'>
12
- 2023-03-31 22:58:15 - r - INFO: - train_eps 200 <class 'int'>
13
- 2023-03-31 22:58:15 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-03-31 22:58:15 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-03-31 22:58:15 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-03-31 22:58:15 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-03-31 22:58:15 - r - INFO: - load_checkpoint 0 <class 'bool'>
18
- 2023-03-31 22:58:15 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
19
- 2023-03-31 22:58:15 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-03-31 22:58:15 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-03-31 22:58:15 - r - INFO: - epsilon_start 0.95 <class 'float'>
22
- 2023-03-31 22:58:15 - r - INFO: - epsilon_end 0.01 <class 'float'>
23
- 2023-03-31 22:58:15 - r - INFO: - epsilon_decay 500 <class 'int'>
24
- 2023-03-31 22:58:15 - r - INFO: - hidden_dim 256 <class 'int'>
25
- 2023-03-31 22:58:15 - r - INFO: - gamma 0.99 <class 'float'>
26
- 2023-03-31 22:58:15 - r - INFO: - lr 0.0001 <class 'float'>
27
- 2023-03-31 22:58:15 - r - INFO: - buffer_size 100000 <class 'int'>
28
- 2023-03-31 22:58:15 - r - INFO: - per_alpha 0.6 <class 'float'>
29
- 2023-03-31 22:58:15 - r - INFO: - per_beta 0.4 <class 'float'>
30
- 2023-03-31 22:58:15 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
31
- 2023-03-31 22:58:15 - r - INFO: - per_epsilon 0.01 <class 'float'>
32
- 2023-03-31 22:58:15 - r - INFO: - batch_size 64 <class 'int'>
33
- 2023-03-31 22:58:15 - r - INFO: - target_update 4 <class 'int'>
34
- 2023-03-31 22:58:15 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
35
- 2023-03-31 22:58:15 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815 <class 'str'>
36
- 2023-03-31 22:58:15 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/results <class 'str'>
37
- 2023-03-31 22:58:15 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/logs <class 'str'>
38
- 2023-03-31 22:58:15 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/traj <class 'str'>
39
- 2023-03-31 22:58:15 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs <class 'str'>
40
- 2023-03-31 22:58:15 - r - INFO: - ================================================================================
41
- 2023-03-31 22:58:15 - r - INFO: - n_states: 4, n_actions: 2
42
- 2023-03-31 22:58:16 - r - INFO: - Start training!
43
- 2023-03-31 22:58:16 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda
44
- 2023-03-31 22:58:17 - r - INFO: - Episode: 1/200, Reward: 15.000, Step: 15
45
- 2023-03-31 22:58:17 - r - INFO: - Episode: 2/200, Reward: 29.000, Step: 29
46
- 2023-03-31 22:58:17 - r - INFO: - Episode: 3/200, Reward: 13.000, Step: 13
47
- 2023-03-31 22:58:17 - r - INFO: - Episode: 4/200, Reward: 14.000, Step: 14
48
- 2023-03-31 22:58:17 - r - INFO: - Episode: 5/200, Reward: 11.000, Step: 11
49
- 2023-03-31 22:58:17 - r - INFO: - Current episode 5 has the best eval reward: 9.600
50
- 2023-03-31 22:58:17 - r - INFO: - Episode: 6/200, Reward: 39.000, Step: 39
51
- 2023-03-31 22:58:18 - r - INFO: - Episode: 7/200, Reward: 35.000, Step: 35
52
- 2023-03-31 22:58:18 - r - INFO: - Episode: 8/200, Reward: 16.000, Step: 16
53
- 2023-03-31 22:58:18 - r - INFO: - Episode: 9/200, Reward: 13.000, Step: 13
54
- 2023-03-31 22:58:18 - r - INFO: - Episode: 10/200, Reward: 12.000, Step: 12
55
- 2023-03-31 22:58:18 - r - INFO: - Episode: 11/200, Reward: 11.000, Step: 11
56
- 2023-03-31 22:58:18 - r - INFO: - Episode: 12/200, Reward: 34.000, Step: 34
57
- 2023-03-31 22:58:18 - r - INFO: - Episode: 13/200, Reward: 15.000, Step: 15
58
- 2023-03-31 22:58:18 - r - INFO: - Episode: 14/200, Reward: 23.000, Step: 23
59
- 2023-03-31 22:58:18 - r - INFO: - Episode: 15/200, Reward: 9.000, Step: 9
60
- 2023-03-31 22:58:19 - r - INFO: - Episode: 16/200, Reward: 19.000, Step: 19
61
- 2023-03-31 22:58:19 - r - INFO: - Episode: 17/200, Reward: 9.000, Step: 9
62
- 2023-03-31 22:58:19 - r - INFO: - Episode: 18/200, Reward: 10.000, Step: 10
63
- 2023-03-31 22:58:19 - r - INFO: - Episode: 19/200, Reward: 16.000, Step: 16
64
- 2023-03-31 22:58:19 - r - INFO: - Episode: 20/200, Reward: 19.000, Step: 19
65
- 2023-03-31 22:58:19 - r - INFO: - Current episode 20 has the best eval reward: 9.700
66
- 2023-03-31 22:58:19 - r - INFO: - Episode: 21/200, Reward: 11.000, Step: 11
67
- 2023-03-31 22:58:19 - r - INFO: - Episode: 22/200, Reward: 10.000, Step: 10
68
- 2023-03-31 22:58:19 - r - INFO: - Episode: 23/200, Reward: 14.000, Step: 14
69
- 2023-03-31 22:58:19 - r - INFO: - Episode: 24/200, Reward: 12.000, Step: 12
70
- 2023-03-31 22:58:19 - r - INFO: - Episode: 25/200, Reward: 16.000, Step: 16
71
- 2023-03-31 22:58:19 - r - INFO: - Episode: 26/200, Reward: 11.000, Step: 11
72
- 2023-03-31 22:58:19 - r - INFO: - Episode: 27/200, Reward: 10.000, Step: 10
73
- 2023-03-31 22:58:19 - r - INFO: - Episode: 28/200, Reward: 16.000, Step: 16
74
- 2023-03-31 22:58:19 - r - INFO: - Episode: 29/200, Reward: 12.000, Step: 12
75
- 2023-03-31 22:58:20 - r - INFO: - Episode: 30/200, Reward: 16.000, Step: 16
76
- 2023-03-31 22:58:20 - r - INFO: - Episode: 31/200, Reward: 11.000, Step: 11
77
- 2023-03-31 22:58:20 - r - INFO: - Episode: 32/200, Reward: 8.000, Step: 8
78
- 2023-03-31 22:58:20 - r - INFO: - Episode: 33/200, Reward: 8.000, Step: 8
79
- 2023-03-31 22:58:20 - r - INFO: - Episode: 34/200, Reward: 12.000, Step: 12
80
- 2023-03-31 22:58:20 - r - INFO: - Episode: 35/200, Reward: 10.000, Step: 10
81
- 2023-03-31 22:58:20 - r - INFO: - Episode: 36/200, Reward: 9.000, Step: 9
82
- 2023-03-31 22:58:20 - r - INFO: - Episode: 37/200, Reward: 11.000, Step: 11
83
- 2023-03-31 22:58:20 - r - INFO: - Episode: 38/200, Reward: 10.000, Step: 10
84
- 2023-03-31 22:58:20 - r - INFO: - Episode: 39/200, Reward: 11.000, Step: 11
85
- 2023-03-31 22:58:20 - r - INFO: - Episode: 40/200, Reward: 10.000, Step: 10
86
- 2023-03-31 22:58:20 - r - INFO: - Episode: 41/200, Reward: 10.000, Step: 10
87
- 2023-03-31 22:58:20 - r - INFO: - Episode: 42/200, Reward: 10.000, Step: 10
88
- 2023-03-31 22:58:20 - r - INFO: - Episode: 43/200, Reward: 10.000, Step: 10
89
- 2023-03-31 22:58:20 - r - INFO: - Episode: 44/200, Reward: 9.000, Step: 9
90
- 2023-03-31 22:58:20 - r - INFO: - Episode: 45/200, Reward: 11.000, Step: 11
91
- 2023-03-31 22:58:21 - r - INFO: - Current episode 45 has the best eval reward: 10.600
92
- 2023-03-31 22:58:21 - r - INFO: - Episode: 46/200, Reward: 10.000, Step: 10
93
- 2023-03-31 22:58:21 - r - INFO: - Episode: 47/200, Reward: 10.000, Step: 10
94
- 2023-03-31 22:58:21 - r - INFO: - Episode: 48/200, Reward: 11.000, Step: 11
95
- 2023-03-31 22:58:21 - r - INFO: - Episode: 49/200, Reward: 10.000, Step: 10
96
- 2023-03-31 22:58:21 - r - INFO: - Episode: 50/200, Reward: 13.000, Step: 13
97
- 2023-03-31 22:58:21 - r - INFO: - Episode: 51/200, Reward: 18.000, Step: 18
98
- 2023-03-31 22:58:21 - r - INFO: - Episode: 52/200, Reward: 12.000, Step: 12
99
- 2023-03-31 22:58:21 - r - INFO: - Episode: 53/200, Reward: 10.000, Step: 10
100
- 2023-03-31 22:58:21 - r - INFO: - Episode: 54/200, Reward: 10.000, Step: 10
101
- 2023-03-31 22:58:21 - r - INFO: - Episode: 55/200, Reward: 11.000, Step: 11
102
- 2023-03-31 22:58:21 - r - INFO: - Episode: 56/200, Reward: 8.000, Step: 8
103
- 2023-03-31 22:58:21 - r - INFO: - Episode: 57/200, Reward: 16.000, Step: 16
104
- 2023-03-31 22:58:21 - r - INFO: - Episode: 58/200, Reward: 11.000, Step: 11
105
- 2023-03-31 22:58:21 - r - INFO: - Episode: 59/200, Reward: 9.000, Step: 9
106
- 2023-03-31 22:58:22 - r - INFO: - Episode: 60/200, Reward: 9.000, Step: 9
107
- 2023-03-31 22:58:22 - r - INFO: - Episode: 61/200, Reward: 10.000, Step: 10
108
- 2023-03-31 22:58:22 - r - INFO: - Episode: 62/200, Reward: 10.000, Step: 10
109
- 2023-03-31 22:58:22 - r - INFO: - Episode: 63/200, Reward: 9.000, Step: 9
110
- 2023-03-31 22:58:22 - r - INFO: - Episode: 64/200, Reward: 8.000, Step: 8
111
- 2023-03-31 22:58:22 - r - INFO: - Episode: 65/200, Reward: 10.000, Step: 10
112
- 2023-03-31 22:58:22 - r - INFO: - Episode: 66/200, Reward: 9.000, Step: 9
113
- 2023-03-31 22:58:22 - r - INFO: - Episode: 67/200, Reward: 10.000, Step: 10
114
- 2023-03-31 22:58:22 - r - INFO: - Episode: 68/200, Reward: 12.000, Step: 12
115
- 2023-03-31 22:58:22 - r - INFO: - Episode: 69/200, Reward: 12.000, Step: 12
116
- 2023-03-31 22:58:22 - r - INFO: - Episode: 70/200, Reward: 12.000, Step: 12
117
- 2023-03-31 22:58:22 - r - INFO: - Current episode 70 has the best eval reward: 12.500
118
- 2023-03-31 22:58:22 - r - INFO: - Episode: 71/200, Reward: 10.000, Step: 10
119
- 2023-03-31 22:58:22 - r - INFO: - Episode: 72/200, Reward: 13.000, Step: 13
120
- 2023-03-31 22:58:22 - r - INFO: - Episode: 73/200, Reward: 20.000, Step: 20
121
- 2023-03-31 22:58:23 - r - INFO: - Episode: 74/200, Reward: 12.000, Step: 12
122
- 2023-03-31 22:58:23 - r - INFO: - Episode: 75/200, Reward: 13.000, Step: 13
123
- 2023-03-31 22:58:23 - r - INFO: - Current episode 75 has the best eval reward: 13.000
124
- 2023-03-31 22:58:23 - r - INFO: - Episode: 76/200, Reward: 15.000, Step: 15
125
- 2023-03-31 22:58:23 - r - INFO: - Episode: 77/200, Reward: 13.000, Step: 13
126
- 2023-03-31 22:58:23 - r - INFO: - Episode: 78/200, Reward: 19.000, Step: 19
127
- 2023-03-31 22:58:23 - r - INFO: - Episode: 79/200, Reward: 14.000, Step: 14
128
- 2023-03-31 22:58:23 - r - INFO: - Episode: 80/200, Reward: 12.000, Step: 12
129
- 2023-03-31 22:58:23 - r - INFO: - Current episode 80 has the best eval reward: 15.400
130
- 2023-03-31 22:58:23 - r - INFO: - Episode: 81/200, Reward: 13.000, Step: 13
131
- 2023-03-31 22:58:24 - r - INFO: - Episode: 82/200, Reward: 14.000, Step: 14
132
- 2023-03-31 22:58:24 - r - INFO: - Episode: 83/200, Reward: 13.000, Step: 13
133
- 2023-03-31 22:58:24 - r - INFO: - Episode: 84/200, Reward: 13.000, Step: 13
134
- 2023-03-31 22:58:24 - r - INFO: - Episode: 85/200, Reward: 14.000, Step: 14
135
- 2023-03-31 22:58:24 - r - INFO: - Current episode 85 has the best eval reward: 16.000
136
- 2023-03-31 22:58:24 - r - INFO: - Episode: 86/200, Reward: 18.000, Step: 18
137
- 2023-03-31 22:58:24 - r - INFO: - Episode: 87/200, Reward: 23.000, Step: 23
138
- 2023-03-31 22:58:24 - r - INFO: - Episode: 88/200, Reward: 13.000, Step: 13
139
- 2023-03-31 22:58:24 - r - INFO: - Episode: 89/200, Reward: 13.000, Step: 13
140
- 2023-03-31 22:58:25 - r - INFO: - Episode: 90/200, Reward: 21.000, Step: 21
141
- 2023-03-31 22:58:25 - r - INFO: - Current episode 90 has the best eval reward: 18.800
142
- 2023-03-31 22:58:25 - r - INFO: - Episode: 91/200, Reward: 17.000, Step: 17
143
- 2023-03-31 22:58:25 - r - INFO: - Episode: 92/200, Reward: 23.000, Step: 23
144
- 2023-03-31 22:58:25 - r - INFO: - Episode: 93/200, Reward: 16.000, Step: 16
145
- 2023-03-31 22:58:25 - r - INFO: - Episode: 94/200, Reward: 22.000, Step: 22
146
- 2023-03-31 22:58:25 - r - INFO: - Episode: 95/200, Reward: 23.000, Step: 23
147
- 2023-03-31 22:58:26 - r - INFO: - Current episode 95 has the best eval reward: 22.000
148
- 2023-03-31 22:58:26 - r - INFO: - Episode: 96/200, Reward: 14.000, Step: 14
149
- 2023-03-31 22:58:26 - r - INFO: - Episode: 97/200, Reward: 20.000, Step: 20
150
- 2023-03-31 22:58:26 - r - INFO: - Episode: 98/200, Reward: 24.000, Step: 24
151
- 2023-03-31 22:58:26 - r - INFO: - Episode: 99/200, Reward: 21.000, Step: 21
152
- 2023-03-31 22:58:26 - r - INFO: - Episode: 100/200, Reward: 22.000, Step: 22
153
- 2023-03-31 22:58:27 - r - INFO: - Episode: 101/200, Reward: 21.000, Step: 21
154
- 2023-03-31 22:58:27 - r - INFO: - Episode: 102/200, Reward: 19.000, Step: 19
155
- 2023-03-31 22:58:27 - r - INFO: - Episode: 103/200, Reward: 18.000, Step: 18
156
- 2023-03-31 22:58:27 - r - INFO: - Episode: 104/200, Reward: 18.000, Step: 18
157
- 2023-03-31 22:58:27 - r - INFO: - Episode: 105/200, Reward: 23.000, Step: 23
158
- 2023-03-31 22:58:27 - r - INFO: - Episode: 106/200, Reward: 16.000, Step: 16
159
- 2023-03-31 22:58:27 - r - INFO: - Episode: 107/200, Reward: 19.000, Step: 19
160
- 2023-03-31 22:58:28 - r - INFO: - Episode: 108/200, Reward: 18.000, Step: 18
161
- 2023-03-31 22:58:28 - r - INFO: - Episode: 109/200, Reward: 21.000, Step: 21
162
- 2023-03-31 22:58:28 - r - INFO: - Episode: 110/200, Reward: 24.000, Step: 24
163
- 2023-03-31 22:58:28 - r - INFO: - Current episode 110 has the best eval reward: 23.300
164
- 2023-03-31 22:58:28 - r - INFO: - Episode: 111/200, Reward: 24.000, Step: 24
165
- 2023-03-31 22:58:28 - r - INFO: - Episode: 112/200, Reward: 27.000, Step: 27
166
- 2023-03-31 22:58:29 - r - INFO: - Episode: 113/200, Reward: 35.000, Step: 35
167
- 2023-03-31 22:58:29 - r - INFO: - Episode: 114/200, Reward: 23.000, Step: 23
168
- 2023-03-31 22:58:29 - r - INFO: - Episode: 115/200, Reward: 29.000, Step: 29
169
- 2023-03-31 22:58:29 - r - INFO: - Current episode 115 has the best eval reward: 24.100
170
- 2023-03-31 22:58:29 - r - INFO: - Episode: 116/200, Reward: 25.000, Step: 25
171
- 2023-03-31 22:58:29 - r - INFO: - Episode: 117/200, Reward: 20.000, Step: 20
172
- 2023-03-31 22:58:30 - r - INFO: - Episode: 118/200, Reward: 23.000, Step: 23
173
- 2023-03-31 22:58:30 - r - INFO: - Episode: 119/200, Reward: 21.000, Step: 21
174
- 2023-03-31 22:58:30 - r - INFO: - Episode: 120/200, Reward: 23.000, Step: 23
175
- 2023-03-31 22:58:30 - r - INFO: - Current episode 120 has the best eval reward: 24.500
176
- 2023-03-31 22:58:30 - r - INFO: - Episode: 121/200, Reward: 17.000, Step: 17
177
- 2023-03-31 22:58:30 - r - INFO: - Episode: 122/200, Reward: 19.000, Step: 19
178
- 2023-03-31 22:58:30 - r - INFO: - Episode: 123/200, Reward: 19.000, Step: 19
179
- 2023-03-31 22:58:31 - r - INFO: - Episode: 124/200, Reward: 21.000, Step: 21
180
- 2023-03-31 22:58:31 - r - INFO: - Episode: 125/200, Reward: 24.000, Step: 24
181
- 2023-03-31 22:58:31 - r - INFO: - Current episode 125 has the best eval reward: 25.600
182
- 2023-03-31 22:58:31 - r - INFO: - Episode: 126/200, Reward: 23.000, Step: 23
183
- 2023-03-31 22:58:31 - r - INFO: - Episode: 127/200, Reward: 22.000, Step: 22
184
- 2023-03-31 22:58:31 - r - INFO: - Episode: 128/200, Reward: 23.000, Step: 23
185
- 2023-03-31 22:58:31 - r - INFO: - Episode: 129/200, Reward: 22.000, Step: 22
186
- 2023-03-31 22:58:32 - r - INFO: - Episode: 130/200, Reward: 28.000, Step: 28
187
- 2023-03-31 22:58:32 - r - INFO: - Current episode 130 has the best eval reward: 29.800
188
- 2023-03-31 22:58:32 - r - INFO: - Episode: 131/200, Reward: 32.000, Step: 32
189
- 2023-03-31 22:58:32 - r - INFO: - Episode: 132/200, Reward: 35.000, Step: 35
190
- 2023-03-31 22:58:32 - r - INFO: - Episode: 133/200, Reward: 27.000, Step: 27
191
- 2023-03-31 22:58:33 - r - INFO: - Episode: 134/200, Reward: 24.000, Step: 24
192
- 2023-03-31 22:58:33 - r - INFO: - Episode: 135/200, Reward: 37.000, Step: 37
193
- 2023-03-31 22:58:33 - r - INFO: - Current episode 135 has the best eval reward: 35.700
194
- 2023-03-31 22:58:33 - r - INFO: - Episode: 136/200, Reward: 33.000, Step: 33
195
- 2023-03-31 22:58:34 - r - INFO: - Episode: 137/200, Reward: 39.000, Step: 39
196
- 2023-03-31 22:58:34 - r - INFO: - Episode: 138/200, Reward: 24.000, Step: 24
197
- 2023-03-31 22:58:34 - r - INFO: - Episode: 139/200, Reward: 24.000, Step: 24
198
- 2023-03-31 22:58:34 - r - INFO: - Episode: 140/200, Reward: 40.000, Step: 40
199
- 2023-03-31 22:58:35 - r - INFO: - Current episode 140 has the best eval reward: 40.200
200
- 2023-03-31 22:58:35 - r - INFO: - Episode: 141/200, Reward: 31.000, Step: 31
201
- 2023-03-31 22:58:35 - r - INFO: - Episode: 142/200, Reward: 30.000, Step: 30
202
- 2023-03-31 22:58:35 - r - INFO: - Episode: 143/200, Reward: 25.000, Step: 25
203
- 2023-03-31 22:58:35 - r - INFO: - Episode: 144/200, Reward: 23.000, Step: 23
204
- 2023-03-31 22:58:35 - r - INFO: - Episode: 145/200, Reward: 29.000, Step: 29
205
- 2023-03-31 22:58:36 - r - INFO: - Current episode 145 has the best eval reward: 58.500
206
- 2023-03-31 22:58:36 - r - INFO: - Episode: 146/200, Reward: 51.000, Step: 51
207
- 2023-03-31 22:58:37 - r - INFO: - Episode: 147/200, Reward: 73.000, Step: 73
208
- 2023-03-31 22:58:37 - r - INFO: - Episode: 148/200, Reward: 38.000, Step: 38
209
- 2023-03-31 22:58:37 - r - INFO: - Episode: 149/200, Reward: 37.000, Step: 37
210
- 2023-03-31 22:58:37 - r - INFO: - Episode: 150/200, Reward: 32.000, Step: 32
211
- 2023-03-31 22:58:38 - r - INFO: - Episode: 151/200, Reward: 43.000, Step: 43
212
- 2023-03-31 22:58:38 - r - INFO: - Episode: 152/200, Reward: 29.000, Step: 29
213
- 2023-03-31 22:58:38 - r - INFO: - Episode: 153/200, Reward: 33.000, Step: 33
214
- 2023-03-31 22:58:38 - r - INFO: - Episode: 154/200, Reward: 31.000, Step: 31
215
- 2023-03-31 22:58:39 - r - INFO: - Episode: 155/200, Reward: 41.000, Step: 41
216
- 2023-03-31 22:58:39 - r - INFO: - Episode: 156/200, Reward: 79.000, Step: 79
217
- 2023-03-31 22:58:40 - r - INFO: - Episode: 157/200, Reward: 47.000, Step: 47
218
- 2023-03-31 22:58:40 - r - INFO: - Episode: 158/200, Reward: 32.000, Step: 32
219
- 2023-03-31 22:58:40 - r - INFO: - Episode: 159/200, Reward: 36.000, Step: 36
220
- 2023-03-31 22:58:41 - r - INFO: - Episode: 160/200, Reward: 76.000, Step: 76
221
- 2023-03-31 22:58:41 - r - INFO: - Current episode 160 has the best eval reward: 75.000
222
- 2023-03-31 22:58:41 - r - INFO: - Episode: 161/200, Reward: 73.000, Step: 73
223
- 2023-03-31 22:58:42 - r - INFO: - Episode: 162/200, Reward: 59.000, Step: 59
224
- 2023-03-31 22:58:42 - r - INFO: - Episode: 163/200, Reward: 102.000, Step: 102
225
- 2023-03-31 22:58:43 - r - INFO: - Episode: 164/200, Reward: 87.000, Step: 87
226
- 2023-03-31 22:58:44 - r - INFO: - Episode: 165/200, Reward: 94.000, Step: 94
227
- 2023-03-31 22:58:44 - r - INFO: - Current episode 165 has the best eval reward: 143.300
228
- 2023-03-31 22:58:45 - r - INFO: - Episode: 166/200, Reward: 116.000, Step: 116
229
- 2023-03-31 22:58:46 - r - INFO: - Episode: 167/200, Reward: 135.000, Step: 135
230
- 2023-03-31 22:58:47 - r - INFO: - Episode: 168/200, Reward: 140.000, Step: 140
231
- 2023-03-31 22:58:48 - r - INFO: - Episode: 169/200, Reward: 167.000, Step: 167
232
- 2023-03-31 22:58:49 - r - INFO: - Episode: 170/200, Reward: 128.000, Step: 128
233
- 2023-03-31 22:58:50 - r - INFO: - Current episode 170 has the best eval reward: 157.400
234
- 2023-03-31 22:58:51 - r - INFO: - Episode: 171/200, Reward: 200.000, Step: 200
235
- 2023-03-31 22:58:52 - r - INFO: - Episode: 172/200, Reward: 135.000, Step: 135
236
- 2023-03-31 22:58:53 - r - INFO: - Episode: 173/200, Reward: 163.000, Step: 163
237
- 2023-03-31 22:58:54 - r - INFO: - Episode: 174/200, Reward: 180.000, Step: 180
238
- 2023-03-31 22:58:56 - r - INFO: - Episode: 175/200, Reward: 185.000, Step: 185
239
- 2023-03-31 22:58:56 - r - INFO: - Current episode 175 has the best eval reward: 165.700
240
- 2023-03-31 22:58:57 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200
241
- 2023-03-31 22:58:59 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200
242
- 2023-03-31 22:59:00 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200
243
- 2023-03-31 22:59:01 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200
244
- 2023-03-31 22:59:03 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200
245
- 2023-03-31 22:59:04 - r - INFO: - Current episode 180 has the best eval reward: 200.000
246
- 2023-03-31 22:59:05 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200
247
- 2023-03-31 22:59:06 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200
248
- 2023-03-31 22:59:08 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200
249
- 2023-03-31 22:59:09 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200
250
- 2023-03-31 22:59:10 - r - INFO: - Episode: 185/200, Reward: 200.000, Step: 200
251
- 2023-03-31 22:59:12 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200
252
- 2023-03-31 22:59:18 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200
253
- 2023-03-31 22:59:22 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200
254
- 2023-03-31 22:59:24 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200
255
- 2023-03-31 22:59:26 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200
256
- 2023-03-31 22:59:28 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200
257
- 2023-03-31 22:59:29 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200
258
- 2023-03-31 22:59:30 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200
259
- 2023-03-31 22:59:32 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200
260
- 2023-03-31 22:59:33 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200
261
- 2023-03-31 22:59:35 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200
262
- 2023-03-31 22:59:37 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200
263
- 2023-03-31 22:59:38 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200
264
- 2023-03-31 22:59:39 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200
265
- 2023-03-31 22:59:40 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200
266
- 2023-03-31 22:59:41 - r - INFO: - Current episode 200 has the best eval reward: 200.000
267
- 2023-03-31 22:59:41 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c438616b97ca890557a9e9b1cd42decfc5decc64e5aee660d89158290e92683d
3
- size 272471
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/learning_curve.png DELETED
Binary file (46.2 kB)
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/res.csv DELETED
@@ -1,201 +0,0 @@
1
- episodes,rewards,steps
2
- 0,15.0,15
3
- 1,29.0,29
4
- 2,13.0,13
5
- 3,14.0,14
6
- 4,11.0,11
7
- 5,39.0,39
8
- 6,35.0,35
9
- 7,16.0,16
10
- 8,13.0,13
11
- 9,12.0,12
12
- 10,11.0,11
13
- 11,34.0,34
14
- 12,15.0,15
15
- 13,23.0,23
16
- 14,9.0,9
17
- 15,19.0,19
18
- 16,9.0,9
19
- 17,10.0,10
20
- 18,16.0,16
21
- 19,19.0,19
22
- 20,11.0,11
23
- 21,10.0,10
24
- 22,14.0,14
25
- 23,12.0,12
26
- 24,16.0,16
27
- 25,11.0,11
28
- 26,10.0,10
29
- 27,16.0,16
30
- 28,12.0,12
31
- 29,16.0,16
32
- 30,11.0,11
33
- 31,8.0,8
34
- 32,8.0,8
35
- 33,12.0,12
36
- 34,10.0,10
37
- 35,9.0,9
38
- 36,11.0,11
39
- 37,10.0,10
40
- 38,11.0,11
41
- 39,10.0,10
42
- 40,10.0,10
43
- 41,10.0,10
44
- 42,10.0,10
45
- 43,9.0,9
46
- 44,11.0,11
47
- 45,10.0,10
48
- 46,10.0,10
49
- 47,11.0,11
50
- 48,10.0,10
51
- 49,13.0,13
52
- 50,18.0,18
53
- 51,12.0,12
54
- 52,10.0,10
55
- 53,10.0,10
56
- 54,11.0,11
57
- 55,8.0,8
58
- 56,16.0,16
59
- 57,11.0,11
60
- 58,9.0,9
61
- 59,9.0,9
62
- 60,10.0,10
63
- 61,10.0,10
64
- 62,9.0,9
65
- 63,8.0,8
66
- 64,10.0,10
67
- 65,9.0,9
68
- 66,10.0,10
69
- 67,12.0,12
70
- 68,12.0,12
71
- 69,12.0,12
72
- 70,10.0,10
73
- 71,13.0,13
74
- 72,20.0,20
75
- 73,12.0,12
76
- 74,13.0,13
77
- 75,15.0,15
78
- 76,13.0,13
79
- 77,19.0,19
80
- 78,14.0,14
81
- 79,12.0,12
82
- 80,13.0,13
83
- 81,14.0,14
84
- 82,13.0,13
85
- 83,13.0,13
86
- 84,14.0,14
87
- 85,18.0,18
88
- 86,23.0,23
89
- 87,13.0,13
90
- 88,13.0,13
91
- 89,21.0,21
92
- 90,17.0,17
93
- 91,23.0,23
94
- 92,16.0,16
95
- 93,22.0,22
96
- 94,23.0,23
97
- 95,14.0,14
98
- 96,20.0,20
99
- 97,24.0,24
100
- 98,21.0,21
101
- 99,22.0,22
102
- 100,21.0,21
103
- 101,19.0,19
104
- 102,18.0,18
105
- 103,18.0,18
106
- 104,23.0,23
107
- 105,16.0,16
108
- 106,19.0,19
109
- 107,18.0,18
110
- 108,21.0,21
111
- 109,24.0,24
112
- 110,24.0,24
113
- 111,27.0,27
114
- 112,35.0,35
115
- 113,23.0,23
116
- 114,29.0,29
117
- 115,25.0,25
118
- 116,20.0,20
119
- 117,23.0,23
120
- 118,21.0,21
121
- 119,23.0,23
122
- 120,17.0,17
123
- 121,19.0,19
124
- 122,19.0,19
125
- 123,21.0,21
126
- 124,24.0,24
127
- 125,23.0,23
128
- 126,22.0,22
129
- 127,23.0,23
130
- 128,22.0,22
131
- 129,28.0,28
132
- 130,32.0,32
133
- 131,35.0,35
134
- 132,27.0,27
135
- 133,24.0,24
136
- 134,37.0,37
137
- 135,33.0,33
138
- 136,39.0,39
139
- 137,24.0,24
140
- 138,24.0,24
141
- 139,40.0,40
142
- 140,31.0,31
143
- 141,30.0,30
144
- 142,25.0,25
145
- 143,23.0,23
146
- 144,29.0,29
147
- 145,51.0,51
148
- 146,73.0,73
149
- 147,38.0,38
150
- 148,37.0,37
151
- 149,32.0,32
152
- 150,43.0,43
153
- 151,29.0,29
154
- 152,33.0,33
155
- 153,31.0,31
156
- 154,41.0,41
157
- 155,79.0,79
158
- 156,47.0,47
159
- 157,32.0,32
160
- 158,36.0,36
161
- 159,76.0,76
162
- 160,73.0,73
163
- 161,59.0,59
164
- 162,102.0,102
165
- 163,87.0,87
166
- 164,94.0,94
167
- 165,116.0,116
168
- 166,135.0,135
169
- 167,140.0,140
170
- 168,167.0,167
171
- 169,128.0,128
172
- 170,200.0,200
173
- 171,135.0,135
174
- 172,163.0,163
175
- 173,180.0,180
176
- 174,185.0,185
177
- 175,200.0,200
178
- 176,200.0,200
179
- 177,200.0,200
180
- 178,200.0,200
181
- 179,200.0,200
182
- 180,200.0,200
183
- 181,200.0,200
184
- 182,200.0,200
185
- 183,200.0,200
186
- 184,200.0,200
187
- 185,200.0,200
188
- 186,200.0,200
189
- 187,200.0,200
190
- 188,200.0,200
191
- 189,200.0,200
192
- 190,200.0,200
193
- 191,200.0,200
194
- 192,200.0,200
195
- 193,200.0,200
196
- 194,200.0,200
197
- 195,200.0,200
198
- 196,200.0,200
199
- 197,200.0,200
200
- 198,200.0,200
201
- 199,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs/events.out.tfevents.1680274695.DESKTOP-H34HQIQ.317208.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4daaaaabe093b8f9d6baf9504a0c5b9e14d2ea89477d20323c5eacbf5942b64
3
- size 40
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/config.yaml DELETED
@@ -1,55 +0,0 @@
1
- general_cfg:
2
- algo_name: PER_DQN
3
- device: cpu
4
- env_name: gym
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_PER_DQN
9
- max_steps: 200
10
- mode: train
11
- mp_backend: mp
12
- n_workers: 2
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 200
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.99
29
- hidden_dim: 256
30
- lr: 0.0001
31
- per_alpha: 0.6
32
- per_beta: 0.4
33
- per_beta_annealing: 0.001
34
- per_epsilon: 0.01
35
- target_update: 4
36
- value_layers:
37
- - activation: relu
38
- layer_dim:
39
- - n_states
40
- - 256
41
- layer_type: linear
42
- - activation: relu
43
- layer_dim:
44
- - 256
45
- - 256
46
- layer_type: linear
47
- - activation: none
48
- layer_dim:
49
- - 256
50
- - n_actions
51
- layer_type: linear
52
- env_cfg:
53
- id: CartPole-v1
54
- new_step_api: true
55
- render_mode: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/logs/log.txt DELETED
@@ -1,48 +0,0 @@
1
- 2023-04-15 21:50:02 - r - INFO: - Hyperparameters:
2
- 2023-04-15 21:50:02 - r - INFO: - ================================================================================
3
- 2023-04-15 21:50:02 - r - INFO: - Name Value Type
4
- 2023-04-15 21:50:02 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-15 21:50:02 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-15 21:50:02 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-15 21:50:02 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-15 21:50:02 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-15 21:50:02 - r - INFO: - algo_name PER_DQN <class 'str'>
10
- 2023-04-15 21:50:02 - r - INFO: - mode train <class 'str'>
11
- 2023-04-15 21:50:02 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-15 21:50:02 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-15 21:50:02 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-15 21:50:02 - r - INFO: - train_eps 200 <class 'int'>
15
- 2023-04-15 21:50:02 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-15 21:50:02 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-15 21:50:02 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-15 21:50:02 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-15 21:50:02 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-15 21:50:02 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
21
- 2023-04-15 21:50:02 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-15 21:50:02 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-15 21:50:02 - r - INFO: - n_workers 2 <class 'int'>
24
- 2023-04-15 21:50:02 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-15 21:50:02 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-15 21:50:02 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-15 21:50:02 - r - INFO: - hidden_dim 256 <class 'int'>
28
- 2023-04-15 21:50:02 - r - INFO: - gamma 0.99 <class 'float'>
29
- 2023-04-15 21:50:02 - r - INFO: - lr 0.0001 <class 'float'>
30
- 2023-04-15 21:50:02 - r - INFO: - buffer_size 100000 <class 'int'>
31
- 2023-04-15 21:50:02 - r - INFO: - per_alpha 0.6 <class 'float'>
32
- 2023-04-15 21:50:02 - r - INFO: - per_beta 0.4 <class 'float'>
33
- 2023-04-15 21:50:02 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
34
- 2023-04-15 21:50:02 - r - INFO: - per_epsilon 0.01 <class 'float'>
35
- 2023-04-15 21:50:02 - r - INFO: - batch_size 64 <class 'int'>
36
- 2023-04-15 21:50:02 - r - INFO: - target_update 4 <class 'int'>
37
- 2023-04-15 21:50:02 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
38
- 2023-04-15 21:50:02 - r - INFO: - id CartPole-v1 <class 'str'>
39
- 2023-04-15 21:50:02 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002 <class 'str'>
40
- 2023-04-15 21:50:02 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/results <class 'str'>
41
- 2023-04-15 21:50:02 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/logs <class 'str'>
42
- 2023-04-15 21:50:02 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/traj <class 'str'>
43
- 2023-04-15 21:50:02 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/videos <class 'str'>
44
- 2023-04-15 21:50:02 - r - INFO: - ================================================================================
45
- 2023-04-15 21:50:02 - r - INFO: - n_states: 4, n_actions: 2
46
- 2023-04-15 21:50:02 - r - INFO: - Start training!
47
- 2023-04-15 21:50:02 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
48
- 2023-04-15 21:51:00 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4251c7f141686d5391c5c933b493b27a184102ccf1596bead1dccaa6cc0bd9a
3
- size 272407
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/learning_curve.png DELETED
Binary file (45.2 kB)
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/res.csv DELETED
@@ -1,202 +0,0 @@
1
- episodes,rewards
2
- 0,18.0
3
- 1,19.0
4
- 2,17.0
5
- 3,24.0
6
- 4,18.0
7
- 5,17.0
8
- 6,13.0
9
- 7,16.0
10
- 8,21.0
11
- 9,30.0
12
- 10,17.0
13
- 11,18.0
14
- 12,11.0
15
- 13,13.0
16
- 14,16.0
17
- 15,14.0
18
- 16,28.0
19
- 17,12.0
20
- 18,14.0
21
- 19,19.0
22
- 20,11.0
23
- 21,10.0
24
- 22,31.0
25
- 23,23.0
26
- 24,22.0
27
- 25,11.0
28
- 26,16.0
29
- 27,12.0
30
- 28,12.0
31
- 29,16.0
32
- 30,12.0
33
- 31,16.0
34
- 32,14.0
35
- 33,21.0
36
- 34,12.0
37
- 35,9.0
38
- 36,9.0
39
- 37,26.0
40
- 38,11.0
41
- 39,22.0
42
- 40,17.0
43
- 41,21.0
44
- 42,16.0
45
- 43,27.0
46
- 44,13.0
47
- 45,18.0
48
- 46,19.0
49
- 47,11.0
50
- 48,11.0
51
- 49,16.0
52
- 50,10.0
53
- 51,9.0
54
- 52,9.0
55
- 53,16.0
56
- 54,9.0
57
- 55,12.0
58
- 56,11.0
59
- 57,11.0
60
- 58,10.0
61
- 59,12.0
62
- 60,10.0
63
- 61,14.0
64
- 62,11.0
65
- 63,12.0
66
- 64,12.0
67
- 65,18.0
68
- 66,12.0
69
- 67,16.0
70
- 68,14.0
71
- 69,23.0
72
- 70,20.0
73
- 71,23.0
74
- 72,17.0
75
- 73,18.0
76
- 74,22.0
77
- 75,22.0
78
- 76,49.0
79
- 77,24.0
80
- 78,60.0
81
- 79,35.0
82
- 80,51.0
83
- 81,78.0
84
- 82,49.0
85
- 83,75.0
86
- 84,100.0
87
- 85,78.0
88
- 86,61.0
89
- 87,65.0
90
- 88,86.0
91
- 89,105.0
92
- 90,54.0
93
- 91,60.0
94
- 92,37.0
95
- 93,149.0
96
- 94,44.0
97
- 95,104.0
98
- 96,200.0
99
- 97,112.0
100
- 98,163.0
101
- 99,167.0
102
- 100,113.0
103
- 101,152.0
104
- 102,200.0
105
- 103,200.0
106
- 104,200.0
107
- 105,200.0
108
- 106,200.0
109
- 107,200.0
110
- 108,200.0
111
- 109,200.0
112
- 110,200.0
113
- 111,200.0
114
- 112,200.0
115
- 113,200.0
116
- 114,200.0
117
- 115,200.0
118
- 116,200.0
119
- 117,200.0
120
- 118,200.0
121
- 119,200.0
122
- 120,200.0
123
- 121,200.0
124
- 122,200.0
125
- 123,200.0
126
- 124,200.0
127
- 125,200.0
128
- 126,200.0
129
- 127,200.0
130
- 128,200.0
131
- 129,200.0
132
- 130,191.0
133
- 131,200.0
134
- 132,189.0
135
- 133,200.0
136
- 134,200.0
137
- 135,200.0
138
- 136,185.0
139
- 137,200.0
140
- 138,197.0
141
- 139,200.0
142
- 140,188.0
143
- 141,200.0
144
- 142,199.0
145
- 143,200.0
146
- 144,200.0
147
- 145,200.0
148
- 146,200.0
149
- 147,200.0
150
- 148,200.0
151
- 149,200.0
152
- 150,200.0
153
- 151,200.0
154
- 152,200.0
155
- 153,200.0
156
- 154,200.0
157
- 155,200.0
158
- 156,200.0
159
- 157,200.0
160
- 158,200.0
161
- 159,200.0
162
- 160,200.0
163
- 161,200.0
164
- 162,200.0
165
- 163,200.0
166
- 164,200.0
167
- 165,200.0
168
- 166,200.0
169
- 167,200.0
170
- 168,200.0
171
- 169,200.0
172
- 170,200.0
173
- 171,200.0
174
- 172,200.0
175
- 173,200.0
176
- 174,200.0
177
- 175,200.0
178
- 176,200.0
179
- 177,200.0
180
- 178,200.0
181
- 179,200.0
182
- 180,200.0
183
- 181,200.0
184
- 182,200.0
185
- 183,200.0
186
- 184,200.0
187
- 185,200.0
188
- 186,200.0
189
- 187,200.0
190
- 188,200.0
191
- 189,200.0
192
- 190,200.0
193
- 191,200.0
194
- 192,200.0
195
- 193,200.0
196
- 194,200.0
197
- 195,200.0
198
- 196,200.0
199
- 197,200.0
200
- 198,200.0
201
- 199,200.0
202
- 200,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/config.yaml DELETED
@@ -1,55 +0,0 @@
1
- general_cfg:
2
- algo_name: PER_DQN
3
- device: cpu
4
- env_name: gym
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_PER_DQN
9
- max_steps: 200
10
- mode: train
11
- mp_backend: ray
12
- n_workers: 2
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 250
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.99
29
- hidden_dim: 256
30
- lr: 0.0001
31
- per_alpha: 0.6
32
- per_beta: 0.4
33
- per_beta_annealing: 0.001
34
- per_epsilon: 0.01
35
- target_update: 4
36
- value_layers:
37
- - activation: relu
38
- layer_dim:
39
- - n_states
40
- - 256
41
- layer_type: linear
42
- - activation: relu
43
- layer_dim:
44
- - 256
45
- - 256
46
- layer_type: linear
47
- - activation: none
48
- layer_dim:
49
- - 256
50
- - n_actions
51
- layer_type: linear
52
- env_cfg:
53
- id: CartPole-v1
54
- new_step_api: true
55
- render_mode: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/logs/log.txt DELETED
@@ -1,48 +0,0 @@
1
- 2023-04-15 21:57:38 - r - INFO: - Hyperparameters:
2
- 2023-04-15 21:57:38 - r - INFO: - ================================================================================
3
- 2023-04-15 21:57:38 - r - INFO: - Name Value Type
4
- 2023-04-15 21:57:38 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-15 21:57:38 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-15 21:57:38 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-15 21:57:38 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-15 21:57:38 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-15 21:57:38 - r - INFO: - algo_name PER_DQN <class 'str'>
10
- 2023-04-15 21:57:38 - r - INFO: - mode train <class 'str'>
11
- 2023-04-15 21:57:38 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-15 21:57:38 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-15 21:57:38 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-15 21:57:38 - r - INFO: - train_eps 250 <class 'int'>
15
- 2023-04-15 21:57:38 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-15 21:57:38 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-15 21:57:38 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-15 21:57:38 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-15 21:57:38 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-15 21:57:38 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
21
- 2023-04-15 21:57:38 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-15 21:57:38 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-15 21:57:38 - r - INFO: - n_workers 2 <class 'int'>
24
- 2023-04-15 21:57:38 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-15 21:57:38 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-15 21:57:38 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-15 21:57:38 - r - INFO: - hidden_dim 256 <class 'int'>
28
- 2023-04-15 21:57:38 - r - INFO: - gamma 0.99 <class 'float'>
29
- 2023-04-15 21:57:38 - r - INFO: - lr 0.0001 <class 'float'>
30
- 2023-04-15 21:57:38 - r - INFO: - buffer_size 100000 <class 'int'>
31
- 2023-04-15 21:57:38 - r - INFO: - per_alpha 0.6 <class 'float'>
32
- 2023-04-15 21:57:38 - r - INFO: - per_beta 0.4 <class 'float'>
33
- 2023-04-15 21:57:38 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
34
- 2023-04-15 21:57:38 - r - INFO: - per_epsilon 0.01 <class 'float'>
35
- 2023-04-15 21:57:38 - r - INFO: - batch_size 64 <class 'int'>
36
- 2023-04-15 21:57:38 - r - INFO: - target_update 4 <class 'int'>
37
- 2023-04-15 21:57:38 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
38
- 2023-04-15 21:57:38 - r - INFO: - id CartPole-v1 <class 'str'>
39
- 2023-04-15 21:57:38 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738 <class 'str'>
40
- 2023-04-15 21:57:38 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/results <class 'str'>
41
- 2023-04-15 21:57:38 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/logs <class 'str'>
42
- 2023-04-15 21:57:38 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/traj <class 'str'>
43
- 2023-04-15 21:57:38 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/videos <class 'str'>
44
- 2023-04-15 21:57:38 - r - INFO: - ================================================================================
45
- 2023-04-15 21:57:40 - r - INFO: - n_states: 4, n_actions: 2
46
- 2023-04-15 21:57:40 - r - INFO: - Start training!
47
- 2023-04-15 21:57:40 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
48
- 2023-04-15 22:00:44 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0efe3ec576afef2311748067e61af0fe6c939f7a2c2a1500001987a5d0092ce3
3
- size 272407
 
 
 
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/learning_curve.png DELETED
Binary file (52.8 kB)
 
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/res.csv DELETED
@@ -1,251 +0,0 @@
1
- episodes,rewards
2
- 0,18.0
3
- 1,18.0
4
- 2,39.0
5
- 3,28.0
6
- 4,15.0
7
- 5,14.0
8
- 6,39.0
9
- 7,52.0
10
- 8,36.0
11
- 9,28.0
12
- 10,13.0
13
- 11,16.0
14
- 12,20.0
15
- 13,19.0
16
- 14,31.0
17
- 15,11.0
18
- 16,10.0
19
- 17,22.0
20
- 18,23.0
21
- 19,16.0
22
- 20,11.0
23
- 21,12.0
24
- 22,12.0
25
- 23,12.0
26
- 24,16.0
27
- 25,14.0
28
- 26,16.0
29
- 27,12.0
30
- 28,21.0
31
- 29,25.0
32
- 30,9.0
33
- 31,10.0
34
- 32,9.0
35
- 33,41.0
36
- 34,22.0
37
- 35,19.0
38
- 36,13.0
39
- 37,12.0
40
- 38,16.0
41
- 39,13.0
42
- 40,13.0
43
- 41,9.0
44
- 42,11.0
45
- 43,13.0
46
- 44,11.0
47
- 45,11.0
48
- 46,11.0
49
- 47,11.0
50
- 48,10.0
51
- 49,11.0
52
- 50,10.0
53
- 51,14.0
54
- 52,12.0
55
- 53,9.0
56
- 54,10.0
57
- 55,9.0
58
- 56,10.0
59
- 57,10.0
60
- 58,12.0
61
- 59,9.0
62
- 60,10.0
63
- 61,9.0
64
- 62,11.0
65
- 63,13.0
66
- 64,10.0
67
- 65,12.0
68
- 66,15.0
69
- 67,9.0
70
- 68,11.0
71
- 69,10.0
72
- 70,10.0
73
- 71,9.0
74
- 72,10.0
75
- 73,9.0
76
- 74,11.0
77
- 75,9.0
78
- 76,10.0
79
- 77,9.0
80
- 78,9.0
81
- 79,11.0
82
- 80,11.0
83
- 81,10.0
84
- 82,12.0
85
- 83,29.0
86
- 84,14.0
87
- 85,11.0
88
- 86,14.0
89
- 87,10.0
90
- 88,10.0
91
- 89,15.0
92
- 90,18.0
93
- 91,16.0
94
- 92,15.0
95
- 93,17.0
96
- 94,12.0
97
- 95,70.0
98
- 96,27.0
99
- 97,23.0
100
- 98,115.0
101
- 99,77.0
102
- 100,34.0
103
- 101,25.0
104
- 102,18.0
105
- 103,24.0
106
- 104,19.0
107
- 105,29.0
108
- 106,33.0
109
- 107,77.0
110
- 108,44.0
111
- 109,35.0
112
- 110,51.0
113
- 111,31.0
114
- 112,53.0
115
- 113,28.0
116
- 114,33.0
117
- 115,47.0
118
- 116,69.0
119
- 117,30.0
120
- 118,30.0
121
- 119,59.0
122
- 120,41.0
123
- 121,33.0
124
- 122,82.0
125
- 123,58.0
126
- 124,31.0
127
- 125,40.0
128
- 126,38.0
129
- 127,57.0
130
- 128,34.0
131
- 129,47.0
132
- 130,36.0
133
- 131,32.0
134
- 132,38.0
135
- 133,37.0
136
- 134,57.0
137
- 135,33.0
138
- 136,52.0
139
- 137,72.0
140
- 138,55.0
141
- 139,88.0
142
- 140,50.0
143
- 141,35.0
144
- 142,49.0
145
- 143,35.0
146
- 144,54.0
147
- 145,39.0
148
- 146,34.0
149
- 147,47.0
150
- 148,34.0
151
- 149,61.0
152
- 150,39.0
153
- 151,54.0
154
- 152,69.0
155
- 153,72.0
156
- 154,65.0
157
- 155,51.0
158
- 156,101.0
159
- 157,40.0
160
- 158,49.0
161
- 159,65.0
162
- 160,43.0
163
- 161,47.0
164
- 162,154.0
165
- 163,88.0
166
- 164,99.0
167
- 165,72.0
168
- 166,152.0
169
- 167,53.0
170
- 168,74.0
171
- 169,87.0
172
- 170,62.0
173
- 171,104.0
174
- 172,80.0
175
- 173,113.0
176
- 174,75.0
177
- 175,200.0
178
- 176,69.0
179
- 177,200.0
180
- 178,200.0
181
- 179,200.0
182
- 180,130.0
183
- 181,200.0
184
- 182,150.0
185
- 183,191.0
186
- 184,200.0
187
- 185,200.0
188
- 186,200.0
189
- 187,196.0
190
- 188,175.0
191
- 189,200.0
192
- 190,200.0
193
- 191,200.0
194
- 192,200.0
195
- 193,200.0
196
- 194,200.0
197
- 195,200.0
198
- 196,200.0
199
- 197,200.0
200
- 198,200.0
201
- 199,200.0
202
- 200,197.0
203
- 201,200.0
204
- 202,200.0
205
- 203,200.0
206
- 204,200.0
207
- 205,200.0
208
- 206,200.0
209
- 207,200.0
210
- 208,200.0
211
- 209,200.0
212
- 210,200.0
213
- 211,200.0
214
- 212,200.0
215
- 213,200.0
216
- 214,200.0
217
- 215,200.0
218
- 216,200.0
219
- 217,200.0
220
- 218,200.0
221
- 219,200.0
222
- 220,200.0
223
- 221,200.0
224
- 222,200.0
225
- 223,200.0
226
- 224,200.0
227
- 225,200.0
228
- 226,200.0
229
- 227,200.0
230
- 228,200.0
231
- 229,200.0
232
- 230,200.0
233
- 231,200.0
234
- 232,200.0
235
- 233,200.0
236
- 234,200.0
237
- 235,200.0
238
- 236,200.0
239
- 237,200.0
240
- 238,200.0
241
- 239,200.0
242
- 240,200.0
243
- 241,200.0
244
- 242,200.0
245
- 243,200.0
246
- 244,200.0
247
- 245,200.0
248
- 246,200.0
249
- 247,200.0
250
- 248,200.0
251
- 249,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/config.yaml DELETED
@@ -1,45 +0,0 @@
1
- general_cfg:
2
- algo_name: DQN
3
- collect_traj: true
4
- device: cpu
5
- env_name: gym
6
- load_checkpoint: false
7
- load_model_step: best
8
- load_path: Train_single_CartPole-v1_DQN_20230515-211721
9
- max_episode: 100
10
- max_step: 200
11
- mode: train
12
- model_save_fre: 500
13
- mp_backend: ray
14
- n_workers: 2
15
- online_eval: true
16
- online_eval_episode: 10
17
- save_fig: true
18
- seed: 1
19
- show_fig: false
20
- algo_cfg:
21
- batch_size: 64
22
- buffer_size: 100000
23
- buffer_type: REPLAY_QUE
24
- epsilon_decay: 500
25
- epsilon_end: 0.01
26
- epsilon_start: 0.95
27
- gamma: 0.95
28
- lr: 0.0001
29
- target_update: 4
30
- value_layers:
31
- - activation: relu
32
- layer_dim:
33
- - 256
34
- layer_type: linear
35
- - activation: relu
36
- layer_dim:
37
- - 256
38
- layer_type: linear
39
- env_cfg:
40
- id: CartPole-v1
41
- ignore_params:
42
- - wrapper
43
- - ignore_params
44
- render_mode: null
45
- wrapper: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/logs/log.txt DELETED
@@ -1,166 +0,0 @@
1
- 2023-05-15 22:19:16 - SimpleLog - INFO: - General Configs:
2
- 2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
3
- 2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type
4
- 2023-05-15 22:19:16 - SimpleLog - INFO: - env_name gym <class 'str'>
5
- 2023-05-15 22:19:16 - SimpleLog - INFO: - algo_name DQN <class 'str'>
6
- 2023-05-15 22:19:16 - SimpleLog - INFO: - mode train <class 'str'>
7
- 2023-05-15 22:19:16 - SimpleLog - INFO: - collect_traj 1 <class 'bool'>
8
- 2023-05-15 22:19:16 - SimpleLog - INFO: - mp_backend ray <class 'str'>
9
- 2023-05-15 22:19:16 - SimpleLog - INFO: - n_workers 2 <class 'int'>
10
- 2023-05-15 22:19:16 - SimpleLog - INFO: - seed 1 <class 'int'>
11
- 2023-05-15 22:19:16 - SimpleLog - INFO: - device cpu <class 'str'>
12
- 2023-05-15 22:19:16 - SimpleLog - INFO: - max_episode 100 <class 'int'>
13
- 2023-05-15 22:19:16 - SimpleLog - INFO: - max_step 200 <class 'int'>
14
- 2023-05-15 22:19:16 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
- 2023-05-15 22:19:16 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
- 2023-05-15 22:19:16 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
17
- 2023-05-15 22:19:16 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
18
- 2023-05-15 22:19:16 - SimpleLog - INFO: - show_fig 0 <class 'bool'>
19
- 2023-05-15 22:19:16 - SimpleLog - INFO: - save_fig 1 <class 'bool'>
20
- 2023-05-15 22:19:16 - SimpleLog - INFO: - load_model_step best <class 'str'>
21
- 2023-05-15 22:19:16 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
22
- 2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
23
- 2023-05-15 22:19:16 - SimpleLog - INFO: - Algo Configs:
24
- 2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
25
- 2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type
26
- 2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
27
- 2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
28
- 2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
29
- 2023-05-15 22:19:16 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
30
- 2023-05-15 22:19:16 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
31
- 2023-05-15 22:19:16 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
32
- 2023-05-15 22:19:16 - SimpleLog - INFO: - batch_size 64 <class 'int'>
33
- 2023-05-15 22:19:16 - SimpleLog - INFO: - target_update 4 <class 'int'>
34
- 2023-05-15 22:19:16 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
35
- 2023-05-15 22:19:16 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
36
- 2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
37
- 2023-05-15 22:19:16 - SimpleLog - INFO: - Env Configs:
38
- 2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
39
- 2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type
40
- 2023-05-15 22:19:16 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
41
- 2023-05-15 22:19:16 - SimpleLog - INFO: - render_mode None <class 'str'>
42
- 2023-05-15 22:19:16 - SimpleLog - INFO: - wrapper None <class 'str'>
43
- 2023-05-15 22:19:16 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
44
- 2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
45
- 2023-05-15 22:19:21 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
46
- 2023-05-15 22:19:24 - RayLog - INFO: - Worker 0 finished episode 0 with reward 12.0 in 12 steps
47
- 2023-05-15 22:19:24 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps
48
- 2023-05-15 22:19:24 - RayLog - INFO: - Worker 0 finished episode 1 with reward 21.0 in 21 steps
49
- 2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 3 with reward 18.0 in 18 steps
50
- 2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 2 with reward 32.0 in 32 steps
51
- 2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 5 with reward 13.0 in 13 steps
52
- 2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 4 with reward 23.0 in 23 steps
53
- 2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 6 with reward 9.0 in 9 steps
54
- 2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 7 with reward 12.0 in 12 steps
55
- 2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps
56
- 2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 10 with reward 17.0 in 17 steps
57
- 2023-05-15 22:19:26 - RayLog - INFO: - Worker 0 finished episode 9 with reward 19.0 in 19 steps
58
- 2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 11 with reward 9.0 in 9 steps
59
- 2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 13 with reward 14.0 in 14 steps
60
- 2023-05-15 22:19:26 - RayLog - INFO: - Worker 0 finished episode 12 with reward 25.0 in 25 steps
61
- 2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 14 with reward 13.0 in 13 steps
62
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 15 with reward 12.0 in 12 steps
63
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 16 with reward 13.0 in 13 steps
64
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 17 with reward 17.0 in 17 steps
65
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 18 with reward 9.0 in 9 steps
66
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 19 with reward 13.0 in 13 steps
67
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 20 with reward 11.0 in 11 steps
68
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 21 with reward 11.0 in 11 steps
69
- 2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 22 with reward 11.0 in 11 steps
70
- 2023-05-15 22:19:28 - RayLog - INFO: - Worker 1 finished episode 24 with reward 11.0 in 11 steps
71
- 2023-05-15 22:19:28 - RayLog - INFO: - Worker 1 finished episode 25 with reward 9.0 in 9 steps
72
- 2023-05-15 22:19:28 - RayLog - INFO: - Worker 0 finished episode 23 with reward 23.0 in 23 steps
73
- 2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 26 with reward 15.0 in 15 steps
74
- 2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 28 with reward 16.0 in 16 steps
75
- 2023-05-15 22:19:29 - RayLog - INFO: - Worker 0 finished episode 27 with reward 38.0 in 38 steps
76
- 2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 29 with reward 11.0 in 11 steps
77
- 2023-05-15 22:19:29 - RayLog - INFO: - Worker 0 finished episode 30 with reward 13.0 in 13 steps
78
- 2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 31 with reward 12.0 in 12 steps
79
- 2023-05-15 22:19:30 - RayLog - INFO: - Worker 1 finished episode 33 with reward 12.0 in 12 steps
80
- 2023-05-15 22:19:30 - RayLog - INFO: - Worker 0 finished episode 32 with reward 14.0 in 14 steps
81
- 2023-05-15 22:19:30 - RayLog - INFO: - Worker 0 finished episode 35 with reward 9.0 in 9 steps
82
- 2023-05-15 22:19:30 - RayLog - INFO: - Worker 1 finished episode 34 with reward 11.0 in 11 steps
83
- 2023-05-15 22:19:32 - RayLog - INFO: - update_step: 500, online_eval_reward: 9.000
84
- 2023-05-15 22:19:32 - RayLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
85
- 2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 36 with reward 11.0 in 11 steps
86
- 2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 37 with reward 14.0 in 14 steps
87
- 2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 38 with reward 12.0 in 12 steps
88
- 2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 39 with reward 13.0 in 13 steps
89
- 2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 40 with reward 11.0 in 11 steps
90
- 2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 41 with reward 10.0 in 10 steps
91
- 2023-05-15 22:19:33 - RayLog - INFO: - Worker 0 finished episode 42 with reward 11.0 in 11 steps
92
- 2023-05-15 22:19:33 - RayLog - INFO: - Worker 1 finished episode 43 with reward 10.0 in 10 steps
93
- 2023-05-15 22:19:33 - RayLog - INFO: - Worker 0 finished episode 44 with reward 10.0 in 10 steps
94
- 2023-05-15 22:19:33 - RayLog - INFO: - Worker 1 finished episode 45 with reward 21.0 in 21 steps
95
- 2023-05-15 22:19:34 - RayLog - INFO: - Worker 0 finished episode 46 with reward 36.0 in 36 steps
96
- 2023-05-15 22:19:34 - RayLog - INFO: - Worker 1 finished episode 47 with reward 30.0 in 30 steps
97
- 2023-05-15 22:19:34 - RayLog - INFO: - Worker 1 finished episode 49 with reward 19.0 in 19 steps
98
- 2023-05-15 22:19:34 - RayLog - INFO: - Worker 0 finished episode 48 with reward 28.0 in 28 steps
99
- 2023-05-15 22:19:35 - RayLog - INFO: - Worker 1 finished episode 50 with reward 17.0 in 17 steps
100
- 2023-05-15 22:19:35 - RayLog - INFO: - Worker 0 finished episode 51 with reward 28.0 in 28 steps
101
- 2023-05-15 22:19:35 - RayLog - INFO: - Worker 1 finished episode 52 with reward 23.0 in 23 steps
102
- 2023-05-15 22:19:36 - RayLog - INFO: - Worker 0 finished episode 53 with reward 46.0 in 46 steps
103
- 2023-05-15 22:19:36 - RayLog - INFO: - Worker 1 finished episode 54 with reward 37.0 in 37 steps
104
- 2023-05-15 22:19:36 - RayLog - INFO: - Worker 1 finished episode 56 with reward 27.0 in 27 steps
105
- 2023-05-15 22:19:37 - RayLog - INFO: - Worker 0 finished episode 55 with reward 56.0 in 56 steps
106
- 2023-05-15 22:19:37 - RayLog - INFO: - update_step: 1000, online_eval_reward: 94.000
107
- 2023-05-15 22:19:37 - RayLog - INFO: - current update step obtain a better online_eval_reward: 94.000, save the best model!
108
- 2023-05-15 22:19:37 - RayLog - INFO: - Worker 1 finished episode 57 with reward 35.0 in 35 steps
109
- 2023-05-15 22:19:38 - RayLog - INFO: - Worker 1 finished episode 59 with reward 29.0 in 29 steps
110
- 2023-05-15 22:19:38 - RayLog - INFO: - Worker 0 finished episode 58 with reward 65.0 in 65 steps
111
- 2023-05-15 22:19:39 - RayLog - INFO: - Worker 1 finished episode 60 with reward 37.0 in 37 steps
112
- 2023-05-15 22:19:39 - RayLog - INFO: - Worker 1 finished episode 62 with reward 34.0 in 34 steps
113
- 2023-05-15 22:19:40 - RayLog - INFO: - Worker 0 finished episode 61 with reward 70.0 in 70 steps
114
- 2023-05-15 22:19:40 - RayLog - INFO: - Worker 1 finished episode 63 with reward 39.0 in 39 steps
115
- 2023-05-15 22:19:41 - RayLog - INFO: - Worker 1 finished episode 65 with reward 35.0 in 35 steps
116
- 2023-05-15 22:19:41 - RayLog - INFO: - Worker 0 finished episode 64 with reward 55.0 in 55 steps
117
- 2023-05-15 22:19:42 - RayLog - INFO: - Worker 1 finished episode 66 with reward 37.0 in 37 steps
118
- 2023-05-15 22:19:42 - RayLog - INFO: - Worker 0 finished episode 67 with reward 53.0 in 53 steps
119
- 2023-05-15 22:19:42 - RayLog - INFO: - Worker 1 finished episode 68 with reward 32.0 in 32 steps
120
- 2023-05-15 22:19:42 - RayLog - INFO: - update_step: 1500, online_eval_reward: 57.000
121
- 2023-05-15 22:19:43 - RayLog - INFO: - Worker 1 finished episode 70 with reward 51.0 in 51 steps
122
- 2023-05-15 22:19:44 - RayLog - INFO: - Worker 0 finished episode 69 with reward 67.0 in 67 steps
123
- 2023-05-15 22:19:44 - RayLog - INFO: - Worker 1 finished episode 71 with reward 40.0 in 40 steps
124
- 2023-05-15 22:19:45 - RayLog - INFO: - Worker 0 finished episode 72 with reward 68.0 in 68 steps
125
- 2023-05-15 22:19:46 - RayLog - INFO: - Worker 1 finished episode 73 with reward 79.0 in 79 steps
126
- 2023-05-15 22:19:48 - RayLog - INFO: - update_step: 2000, online_eval_reward: 138.000
127
- 2023-05-15 22:19:48 - RayLog - INFO: - current update step obtain a better online_eval_reward: 138.000, save the best model!
128
- 2023-05-15 22:19:48 - RayLog - INFO: - Worker 0 finished episode 74 with reward 124.0 in 124 steps
129
- 2023-05-15 22:19:49 - RayLog - INFO: - Worker 1 finished episode 75 with reward 133.0 in 133 steps
130
- 2023-05-15 22:19:52 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps
131
- 2023-05-15 22:19:53 - RayLog - INFO: - update_step: 2500, online_eval_reward: 200.000
132
- 2023-05-15 22:19:53 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
133
- 2023-05-15 22:19:53 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps
134
- 2023-05-15 22:19:56 - RayLog - INFO: - Worker 0 finished episode 78 with reward 187.0 in 187 steps
135
- 2023-05-15 22:19:57 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps
136
- 2023-05-15 22:19:58 - RayLog - INFO: - update_step: 3000, online_eval_reward: 200.000
137
- 2023-05-15 22:20:00 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps
138
- 2023-05-15 22:20:02 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps
139
- 2023-05-15 22:20:04 - RayLog - INFO: - update_step: 3500, online_eval_reward: 165.000
140
- 2023-05-15 22:20:04 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps
141
- 2023-05-15 22:20:06 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps
142
- 2023-05-15 22:20:08 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps
143
- 2023-05-15 22:20:09 - RayLog - INFO: - update_step: 4000, online_eval_reward: 200.000
144
- 2023-05-15 22:20:10 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps
145
- 2023-05-15 22:20:12 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps
146
- 2023-05-15 22:20:14 - RayLog - INFO: - update_step: 4500, online_eval_reward: 200.000
147
- 2023-05-15 22:20:14 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps
148
- 2023-05-15 22:20:16 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps
149
- 2023-05-15 22:20:18 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps
150
- 2023-05-15 22:20:19 - RayLog - INFO: - update_step: 5000, online_eval_reward: 200.000
151
- 2023-05-15 22:20:20 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps
152
- 2023-05-15 22:20:22 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps
153
- 2023-05-15 22:20:24 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000
154
- 2023-05-15 22:20:24 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps
155
- 2023-05-15 22:20:26 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps
156
- 2023-05-15 22:20:28 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps
157
- 2023-05-15 22:20:29 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000
158
- 2023-05-15 22:20:30 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps
159
- 2023-05-15 22:20:32 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps
160
- 2023-05-15 22:20:34 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000
161
- 2023-05-15 22:20:34 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps
162
- 2023-05-15 22:20:37 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps
163
- 2023-05-15 22:20:38 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps
164
- 2023-05-15 22:20:40 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000
165
- 2023-05-15 22:20:40 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps
166
- 2023-05-15 22:20:43 - SimpleLog - INFO: - Finish training! total time consumed: 87.42s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1000 DELETED
Binary file (545 kB)
 
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1500 DELETED
Binary file (545 kB)
 
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2000 DELETED
Binary file (545 kB)
 
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2500 DELETED
Binary file (545 kB)