johnjim0816 commited on
Commit
ccb908b
β€’
1 Parent(s): 989b5fc

update CartPole-v1 DoubleDQN

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml +0 -40
  2. CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt +0 -14
  3. CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png +0 -0
  4. CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv +0 -11
  5. CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt +0 -52
  6. CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png +0 -0
  7. CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv +0 -11
  8. CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml +0 -46
  9. CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt +0 -52
  10. CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png +0 -0
  11. CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv +0 -11
  12. CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410 β†’ Test_single_CartPole-v1_DoubleDQN_20230516-115305}/config.yaml +20 -23
  13. CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt +55 -0
  14. CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth β†’ Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0} +2 -2
  15. CartPole-v1/{Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt β†’ Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1} +2 -2
  16. CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml +0 -40
  17. CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt +0 -116
  18. CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png +0 -0
  19. CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv +0 -101
  20. CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt +0 -42
  21. CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth +0 -3
  22. CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png +0 -0
  23. CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv +0 -402
  24. CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt +0 -42
  25. CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt +0 -3
  26. CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png +0 -0
  27. CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv +0 -401
  28. CartPole-v1/{Train_CartPole-v1_DoubleDQN_ray_20230406-162938 β†’ Train_ray_CartPole-v1_DoubleDQN_20230516-115126}/config.yaml +17 -20
  29. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt +157 -0
  30. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000 +0 -0
  31. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500 +0 -0
  32. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000 +0 -0
  33. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500 +0 -0
  34. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000 +0 -0
  35. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500 +0 -0
  36. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500 +0 -0
  37. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best +0 -0
  38. CartPole-v1/{Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth β†’ Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0} +2 -2
  39. CartPole-v1/{Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth β†’ Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0} +2 -2
  40. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1 +3 -0
  41. CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1 +3 -0
  42. CartPole-v1/{Train_CartPole-v1_DoubleDQN_mp_20230406-160028 β†’ Train_single_CartPole-v1_DoubleDQN_20230516-114540}/config.yaml +18 -21
  43. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt +162 -0
  44. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000 +0 -0
  45. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500 +0 -0
  46. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000 +0 -0
  47. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500 +0 -0
  48. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000 +0 -0
  49. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500 +0 -0
  50. CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000 +0 -0
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml DELETED
@@ -1,40 +0,0 @@
1
- general_cfg:
2
- algo_name: DoubleDQN
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_DoubleDQN_20221122-125516
9
- max_steps: 200
10
- mode: test
11
- save_fig: true
12
- seed: 1
13
- show_fig: false
14
- test_eps: 10
15
- train_eps: 100
16
- algo_cfg:
17
- batch_size: 64
18
- buffer_size: 100000
19
- epsilon_decay: 500
20
- epsilon_end: 0.01
21
- epsilon_start: 0.95
22
- gamma: 0.99
23
- lr: 0.0001
24
- target_update: 4
25
- value_layers:
26
- - activation: relu
27
- layer_dim:
28
- - n_states
29
- - 256
30
- layer_type: linear
31
- - activation: relu
32
- layer_dim:
33
- - 256
34
- - 256
35
- layer_type: linear
36
- - activation: none
37
- layer_dim:
38
- - 256
39
- - n_actions
40
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt DELETED
@@ -1,14 +0,0 @@
1
- 2022-11-22 12:56:12 - r - INFO: - n_states: 4, n_actions: 2
2
- 2022-11-22 12:56:14 - r - INFO: - Start testing!
3
- 2022-11-22 12:56:14 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda
4
- 2022-11-22 12:56:14 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
5
- 2022-11-22 12:56:15 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
6
- 2022-11-22 12:56:15 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
7
- 2022-11-22 12:56:15 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
8
- 2022-11-22 12:56:15 - r - INFO: - Episode: 5/10, Reward: 138.000, Step: 138
9
- 2022-11-22 12:56:15 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
10
- 2022-11-22 12:56:15 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
11
- 2022-11-22 12:56:15 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
12
- 2022-11-22 12:56:15 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
13
- 2022-11-22 12:56:15 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
14
- 2022-11-22 12:56:15 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png DELETED
Binary file (31.4 kB)
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,138.0,138
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt DELETED
@@ -1,52 +0,0 @@
1
- 2023-04-06 16:04:10 - r - INFO: - Hyperparameters:
2
- 2023-04-06 16:04:10 - r - INFO: - ================================================================================
3
- 2023-04-06 16:04:10 - r - INFO: - Name Value Type
4
- 2023-04-06 16:04:10 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-06 16:04:10 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-06 16:04:10 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-06 16:04:10 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-06 16:04:10 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-06 16:04:10 - r - INFO: - algo_name DoubleDQN <class 'str'>
10
- 2023-04-06 16:04:10 - r - INFO: - mode test <class 'str'>
11
- 2023-04-06 16:04:10 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-06 16:04:10 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-06 16:04:10 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-06 16:04:10 - r - INFO: - train_eps 400 <class 'int'>
15
- 2023-04-06 16:04:10 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-06 16:04:10 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-06 16:04:10 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-06 16:04:10 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-06 16:04:10 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-06 16:04:10 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_mp_20230406-160028 <class 'str'>
21
- 2023-04-06 16:04:10 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-06 16:04:10 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-06 16:04:10 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-06 16:04:10 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-06 16:04:10 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-06 16:04:10 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-06 16:04:10 - r - INFO: - gamma 0.95 <class 'float'>
28
- 2023-04-06 16:04:10 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-06 16:04:10 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-06 16:04:10 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-06 16:04:10 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-06 16:04:10 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-06 16:04:10 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410 <class 'str'>
34
- 2023-04-06 16:04:10 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/results <class 'str'>
35
- 2023-04-06 16:04:10 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/logs <class 'str'>
36
- 2023-04-06 16:04:10 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/traj <class 'str'>
37
- 2023-04-06 16:04:10 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/videos <class 'str'>
38
- 2023-04-06 16:04:10 - r - INFO: - ================================================================================
39
- 2023-04-06 16:04:10 - r - INFO: - n_states: 4, n_actions: 2
40
- 2023-04-06 16:04:10 - r - INFO: - Start testing!
41
- 2023-04-06 16:04:10 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
42
- 2023-04-06 16:04:10 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
43
- 2023-04-06 16:04:10 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
44
- 2023-04-06 16:04:10 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
45
- 2023-04-06 16:04:10 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
46
- 2023-04-06 16:04:10 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
47
- 2023-04-06 16:04:10 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
48
- 2023-04-06 16:04:10 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
49
- 2023-04-06 16:04:10 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
50
- 2023-04-06 16:04:10 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
51
- 2023-04-06 16:04:10 - r - INFO: - Episode: 10/10, Reward: 198.000, Step: 198
52
- 2023-04-06 16:04:10 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png DELETED
Binary file (34.7 kB)
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,198.0,198
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml DELETED
@@ -1,46 +0,0 @@
1
- general_cfg:
2
- algo_name: DoubleDQN
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_DoubleDQN_ray_20230406-162938
9
- max_steps: 200
10
- mode: test
11
- mp_backend: ray
12
- n_workers: 1
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 400
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.95
29
- lr: 0.0001
30
- target_update: 4
31
- value_layers:
32
- - activation: relu
33
- layer_dim:
34
- - n_states
35
- - 256
36
- layer_type: linear
37
- - activation: relu
38
- layer_dim:
39
- - 256
40
- - 256
41
- layer_type: linear
42
- - activation: none
43
- layer_dim:
44
- - 256
45
- - n_actions
46
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt DELETED
@@ -1,52 +0,0 @@
1
- 2023-04-06 17:03:48 - r - INFO: - Hyperparameters:
2
- 2023-04-06 17:03:48 - r - INFO: - ================================================================================
3
- 2023-04-06 17:03:48 - r - INFO: - Name Value Type
4
- 2023-04-06 17:03:48 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-06 17:03:48 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-06 17:03:48 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-06 17:03:48 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-06 17:03:48 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-06 17:03:48 - r - INFO: - algo_name DoubleDQN <class 'str'>
10
- 2023-04-06 17:03:48 - r - INFO: - mode test <class 'str'>
11
- 2023-04-06 17:03:48 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-06 17:03:48 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-06 17:03:48 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-06 17:03:48 - r - INFO: - train_eps 400 <class 'int'>
15
- 2023-04-06 17:03:48 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-06 17:03:48 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-06 17:03:48 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-06 17:03:48 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-06 17:03:48 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-06 17:03:48 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_ray_20230406-162938 <class 'str'>
21
- 2023-04-06 17:03:48 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-06 17:03:48 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-06 17:03:48 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-06 17:03:48 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-06 17:03:48 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-06 17:03:48 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-06 17:03:48 - r - INFO: - gamma 0.95 <class 'float'>
28
- 2023-04-06 17:03:48 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-06 17:03:48 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-06 17:03:48 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-06 17:03:48 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-06 17:03:48 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-06 17:03:48 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348 <class 'str'>
34
- 2023-04-06 17:03:48 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/results <class 'str'>
35
- 2023-04-06 17:03:48 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/logs <class 'str'>
36
- 2023-04-06 17:03:48 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/traj <class 'str'>
37
- 2023-04-06 17:03:48 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/videos <class 'str'>
38
- 2023-04-06 17:03:48 - r - INFO: - ================================================================================
39
- 2023-04-06 17:03:48 - r - INFO: - n_states: 4, n_actions: 2
40
- 2023-04-06 17:03:48 - r - INFO: - Start testing!
41
- 2023-04-06 17:03:48 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
42
- 2023-04-06 17:03:48 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
43
- 2023-04-06 17:03:48 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
44
- 2023-04-06 17:03:48 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
45
- 2023-04-06 17:03:48 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
46
- 2023-04-06 17:03:48 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
47
- 2023-04-06 17:03:48 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
48
- 2023-04-06 17:03:48 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
49
- 2023-04-06 17:03:48 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
50
- 2023-04-06 17:03:48 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
51
- 2023-04-06 17:03:49 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
52
- 2023-04-06 17:03:49 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png DELETED
Binary file (27.7 kB)
 
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410 β†’ Test_single_CartPole-v1_DoubleDQN_20230516-115305}/config.yaml RENAMED
@@ -1,46 +1,43 @@
1
  general_cfg:
2
  algo_name: DoubleDQN
 
3
  device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
  load_checkpoint: true
8
- load_path: Train_CartPole-v1_DoubleDQN_mp_20230406-160028
9
- max_steps: 200
 
 
10
  mode: test
11
- mp_backend: mp
12
- n_workers: 1
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
  seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 400
21
- wrapper: null
22
  algo_cfg:
23
  batch_size: 64
24
  buffer_size: 100000
 
25
  epsilon_decay: 500
26
  epsilon_end: 0.01
27
  epsilon_start: 0.95
28
- gamma: 0.95
29
  lr: 0.0001
30
  target_update: 4
31
  value_layers:
32
  - activation: relu
33
  layer_dim:
34
- - n_states
35
  - 256
36
  layer_type: linear
37
  - activation: relu
38
  layer_dim:
39
  - 256
40
- - 256
41
- layer_type: linear
42
- - activation: none
43
- layer_dim:
44
- - 256
45
- - n_actions
46
  layer_type: linear
 
 
 
 
 
 
 
 
1
  general_cfg:
2
  algo_name: DoubleDQN
3
+ collect_traj: false
4
  device: cpu
5
+ env_name: gym
 
 
6
  load_checkpoint: true
7
+ load_model_step: best
8
+ load_path: Train_single_CartPole-v1_DoubleDQN_20230516-114540
9
+ max_episode: 10
10
+ max_step: 200
11
  mode: test
12
+ model_save_fre: 500
13
+ mp_backend: single
14
+ n_workers: 2
15
+ online_eval: true
16
+ online_eval_episode: 10
 
17
  seed: 1
 
 
 
 
18
  algo_cfg:
19
  batch_size: 64
20
  buffer_size: 100000
21
+ buffer_type: REPLAY_QUE
22
  epsilon_decay: 500
23
  epsilon_end: 0.01
24
  epsilon_start: 0.95
25
+ gamma: 0.99
26
  lr: 0.0001
27
  target_update: 4
28
  value_layers:
29
  - activation: relu
30
  layer_dim:
 
31
  - 256
32
  layer_type: linear
33
  - activation: relu
34
  layer_dim:
35
  - 256
 
 
 
 
 
 
36
  layer_type: linear
37
+ env_cfg:
38
+ id: CartPole-v1
39
+ ignore_params:
40
+ - wrapper
41
+ - ignore_params
42
+ render_mode: null
43
+ wrapper: null
CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - General Configs:
2
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
3
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
4
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
6
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - mode test <class 'str'>
7
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - max_episode 10 <class 'int'>
10
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - mp_backend single <class 'str'>
13
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
17
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
18
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DoubleDQN_20230516-114540 <class 'str'>
19
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - load_model_step best <class 'str'>
20
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
21
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - Algo Configs:
22
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
23
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
24
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
25
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
26
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
27
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
28
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
29
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
30
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - batch_size 64 <class 'int'>
31
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - target_update 4 <class 'int'>
32
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
33
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
34
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
35
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - Env Configs:
36
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
37
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
38
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
39
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - render_mode None <class 'str'>
40
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - wrapper None <class 'str'>
41
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
42
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
43
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
44
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - Start testing!
45
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
46
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
47
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
48
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
49
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
50
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
51
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
52
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
53
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
54
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
55
+ 2023-05-16 11:53:05 - SimpleLog - INFO: - Finish testing! total time consumed: 0.24s
CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth β†’ Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cd537f6ab4ac0ff27caa323076685e3a3fff04b064dbdfc509baae76e9a9406
3
- size 272407
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350321a3436f2c600f7c9a0f8ba02ba28a6ad9c6e949481d6926ca5daf32d79e
3
+ size 1056
CartPole-v1/{Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt β†’ Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d3a724152cea263dec5b58d80bee101405e7b3268a34c265d414ebbd771c5ac
3
- size 272407
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25447bc5c9337e3d33f456f66eb8230e83dcc359ad3630edde9c63f21baefd4d
3
+ size 40
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml DELETED
@@ -1,40 +0,0 @@
1
- general_cfg:
2
- algo_name: DoubleDQN
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_DQN_20221026-054757
9
- max_steps: 200
10
- mode: train
11
- save_fig: true
12
- seed: 1
13
- show_fig: false
14
- test_eps: 10
15
- train_eps: 100
16
- algo_cfg:
17
- batch_size: 64
18
- buffer_size: 100000
19
- epsilon_decay: 500
20
- epsilon_end: 0.01
21
- epsilon_start: 0.95
22
- gamma: 0.99
23
- lr: 0.0001
24
- target_update: 4
25
- value_layers:
26
- - activation: relu
27
- layer_dim:
28
- - n_states
29
- - 256
30
- layer_type: linear
31
- - activation: relu
32
- layer_dim:
33
- - 256
34
- - 256
35
- layer_type: linear
36
- - activation: none
37
- layer_dim:
38
- - 256
39
- - n_actions
40
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt DELETED
@@ -1,116 +0,0 @@
1
- 2022-11-22 12:55:16 - r - INFO: - n_states: 4, n_actions: 2
2
- 2022-11-22 12:55:19 - r - INFO: - Start training!
3
- 2022-11-22 12:55:19 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda
4
- 2022-11-22 12:55:19 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18
5
- 2022-11-22 12:55:19 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35
6
- 2022-11-22 12:55:19 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13
7
- 2022-11-22 12:55:19 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32
8
- 2022-11-22 12:55:19 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16
9
- 2022-11-22 12:55:19 - r - INFO: - Current episode 5 has the best eval reward: 9.100
10
- 2022-11-22 12:55:19 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9
11
- 2022-11-22 12:55:19 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12
12
- 2022-11-22 12:55:19 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16
13
- 2022-11-22 12:55:19 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14
14
- 2022-11-22 12:55:19 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12
15
- 2022-11-22 12:55:19 - r - INFO: - Current episode 10 has the best eval reward: 9.200
16
- 2022-11-22 12:55:19 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13
17
- 2022-11-22 12:55:19 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14
18
- 2022-11-22 12:55:19 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19
19
- 2022-11-22 12:55:19 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9
20
- 2022-11-22 12:55:19 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15
21
- 2022-11-22 12:55:19 - r - INFO: - Current episode 15 has the best eval reward: 9.300
22
- 2022-11-22 12:55:19 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12
23
- 2022-11-22 12:55:19 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11
24
- 2022-11-22 12:55:19 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9
25
- 2022-11-22 12:55:19 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13
26
- 2022-11-22 12:55:19 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17
27
- 2022-11-22 12:55:19 - r - INFO: - Episode: 21/100, Reward: 13.000, Step: 13
28
- 2022-11-22 12:55:19 - r - INFO: - Episode: 22/100, Reward: 15.000, Step: 15
29
- 2022-11-22 12:55:19 - r - INFO: - Episode: 23/100, Reward: 22.000, Step: 22
30
- 2022-11-22 12:55:20 - r - INFO: - Episode: 24/100, Reward: 26.000, Step: 26
31
- 2022-11-22 12:55:20 - r - INFO: - Episode: 25/100, Reward: 19.000, Step: 19
32
- 2022-11-22 12:55:20 - r - INFO: - Current episode 25 has the best eval reward: 9.800
33
- 2022-11-22 12:55:20 - r - INFO: - Episode: 26/100, Reward: 10.000, Step: 10
34
- 2022-11-22 12:55:20 - r - INFO: - Episode: 27/100, Reward: 10.000, Step: 10
35
- 2022-11-22 12:55:20 - r - INFO: - Episode: 28/100, Reward: 11.000, Step: 11
36
- 2022-11-22 12:55:20 - r - INFO: - Episode: 29/100, Reward: 13.000, Step: 13
37
- 2022-11-22 12:55:20 - r - INFO: - Episode: 30/100, Reward: 16.000, Step: 16
38
- 2022-11-22 12:55:20 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13
39
- 2022-11-22 12:55:20 - r - INFO: - Episode: 32/100, Reward: 15.000, Step: 15
40
- 2022-11-22 12:55:20 - r - INFO: - Episode: 33/100, Reward: 12.000, Step: 12
41
- 2022-11-22 12:55:20 - r - INFO: - Episode: 34/100, Reward: 13.000, Step: 13
42
- 2022-11-22 12:55:20 - r - INFO: - Episode: 35/100, Reward: 13.000, Step: 13
43
- 2022-11-22 12:55:20 - r - INFO: - Episode: 36/100, Reward: 11.000, Step: 11
44
- 2022-11-22 12:55:20 - r - INFO: - Episode: 37/100, Reward: 9.000, Step: 9
45
- 2022-11-22 12:55:20 - r - INFO: - Episode: 38/100, Reward: 9.000, Step: 9
46
- 2022-11-22 12:55:20 - r - INFO: - Episode: 39/100, Reward: 10.000, Step: 10
47
- 2022-11-22 12:55:20 - r - INFO: - Episode: 40/100, Reward: 14.000, Step: 14
48
- 2022-11-22 12:55:20 - r - INFO: - Episode: 41/100, Reward: 9.000, Step: 9
49
- 2022-11-22 12:55:20 - r - INFO: - Episode: 42/100, Reward: 10.000, Step: 10
50
- 2022-11-22 12:55:20 - r - INFO: - Episode: 43/100, Reward: 9.000, Step: 9
51
- 2022-11-22 12:55:20 - r - INFO: - Episode: 44/100, Reward: 14.000, Step: 14
52
- 2022-11-22 12:55:20 - r - INFO: - Episode: 45/100, Reward: 10.000, Step: 10
53
- 2022-11-22 12:55:20 - r - INFO: - Episode: 46/100, Reward: 19.000, Step: 19
54
- 2022-11-22 12:55:20 - r - INFO: - Episode: 47/100, Reward: 10.000, Step: 10
55
- 2022-11-22 12:55:20 - r - INFO: - Episode: 48/100, Reward: 14.000, Step: 14
56
- 2022-11-22 12:55:20 - r - INFO: - Episode: 49/100, Reward: 18.000, Step: 18
57
- 2022-11-22 12:55:20 - r - INFO: - Episode: 50/100, Reward: 32.000, Step: 32
58
- 2022-11-22 12:55:20 - r - INFO: - Current episode 50 has the best eval reward: 24.300
59
- 2022-11-22 12:55:21 - r - INFO: - Episode: 51/100, Reward: 17.000, Step: 17
60
- 2022-11-22 12:55:21 - r - INFO: - Episode: 52/100, Reward: 15.000, Step: 15
61
- 2022-11-22 12:55:21 - r - INFO: - Episode: 53/100, Reward: 18.000, Step: 18
62
- 2022-11-22 12:55:21 - r - INFO: - Episode: 54/100, Reward: 14.000, Step: 14
63
- 2022-11-22 12:55:21 - r - INFO: - Episode: 55/100, Reward: 22.000, Step: 22
64
- 2022-11-22 12:55:21 - r - INFO: - Episode: 56/100, Reward: 14.000, Step: 14
65
- 2022-11-22 12:55:21 - r - INFO: - Episode: 57/100, Reward: 21.000, Step: 21
66
- 2022-11-22 12:55:21 - r - INFO: - Episode: 58/100, Reward: 21.000, Step: 21
67
- 2022-11-22 12:55:21 - r - INFO: - Episode: 59/100, Reward: 23.000, Step: 23
68
- 2022-11-22 12:55:21 - r - INFO: - Episode: 60/100, Reward: 21.000, Step: 21
69
- 2022-11-22 12:55:21 - r - INFO: - Episode: 61/100, Reward: 21.000, Step: 21
70
- 2022-11-22 12:55:21 - r - INFO: - Episode: 62/100, Reward: 35.000, Step: 35
71
- 2022-11-22 12:55:21 - r - INFO: - Episode: 63/100, Reward: 23.000, Step: 23
72
- 2022-11-22 12:55:21 - r - INFO: - Episode: 64/100, Reward: 27.000, Step: 27
73
- 2022-11-22 12:55:21 - r - INFO: - Episode: 65/100, Reward: 24.000, Step: 24
74
- 2022-11-22 12:55:21 - r - INFO: - Current episode 65 has the best eval reward: 29.700
75
- 2022-11-22 12:55:21 - r - INFO: - Episode: 66/100, Reward: 28.000, Step: 28
76
- 2022-11-22 12:55:21 - r - INFO: - Episode: 67/100, Reward: 30.000, Step: 30
77
- 2022-11-22 12:55:22 - r - INFO: - Episode: 68/100, Reward: 33.000, Step: 33
78
- 2022-11-22 12:55:22 - r - INFO: - Episode: 69/100, Reward: 33.000, Step: 33
79
- 2022-11-22 12:55:22 - r - INFO: - Episode: 70/100, Reward: 26.000, Step: 26
80
- 2022-11-22 12:55:22 - r - INFO: - Current episode 70 has the best eval reward: 34.400
81
- 2022-11-22 12:55:22 - r - INFO: - Episode: 71/100, Reward: 37.000, Step: 37
82
- 2022-11-22 12:55:22 - r - INFO: - Episode: 72/100, Reward: 28.000, Step: 28
83
- 2022-11-22 12:55:22 - r - INFO: - Episode: 73/100, Reward: 30.000, Step: 30
84
- 2022-11-22 12:55:22 - r - INFO: - Episode: 74/100, Reward: 41.000, Step: 41
85
- 2022-11-22 12:55:22 - r - INFO: - Episode: 75/100, Reward: 45.000, Step: 45
86
- 2022-11-22 12:55:22 - r - INFO: - Current episode 75 has the best eval reward: 35.600
87
- 2022-11-22 12:55:23 - r - INFO: - Episode: 76/100, Reward: 68.000, Step: 68
88
- 2022-11-22 12:55:23 - r - INFO: - Episode: 77/100, Reward: 33.000, Step: 33
89
- 2022-11-22 12:55:23 - r - INFO: - Episode: 78/100, Reward: 46.000, Step: 46
90
- 2022-11-22 12:55:23 - r - INFO: - Episode: 79/100, Reward: 54.000, Step: 54
91
- 2022-11-22 12:55:23 - r - INFO: - Episode: 80/100, Reward: 37.000, Step: 37
92
- 2022-11-22 12:55:23 - r - INFO: - Current episode 80 has the best eval reward: 42.800
93
- 2022-11-22 12:55:23 - r - INFO: - Episode: 81/100, Reward: 43.000, Step: 43
94
- 2022-11-22 12:55:23 - r - INFO: - Episode: 82/100, Reward: 79.000, Step: 79
95
- 2022-11-22 12:55:23 - r - INFO: - Episode: 83/100, Reward: 36.000, Step: 36
96
- 2022-11-22 12:55:24 - r - INFO: - Episode: 84/100, Reward: 58.000, Step: 58
97
- 2022-11-22 12:55:24 - r - INFO: - Episode: 85/100, Reward: 42.000, Step: 42
98
- 2022-11-22 12:55:24 - r - INFO: - Current episode 85 has the best eval reward: 62.100
99
- 2022-11-22 12:55:24 - r - INFO: - Episode: 86/100, Reward: 136.000, Step: 136
100
- 2022-11-22 12:55:24 - r - INFO: - Episode: 87/100, Reward: 57.000, Step: 57
101
- 2022-11-22 12:55:24 - r - INFO: - Episode: 88/100, Reward: 46.000, Step: 46
102
- 2022-11-22 12:55:25 - r - INFO: - Episode: 89/100, Reward: 105.000, Step: 105
103
- 2022-11-22 12:55:25 - r - INFO: - Episode: 90/100, Reward: 63.000, Step: 63
104
- 2022-11-22 12:55:25 - r - INFO: - Current episode 90 has the best eval reward: 76.600
105
- 2022-11-22 12:55:25 - r - INFO: - Episode: 91/100, Reward: 84.000, Step: 84
106
- 2022-11-22 12:55:26 - r - INFO: - Episode: 92/100, Reward: 136.000, Step: 136
107
- 2022-11-22 12:55:26 - r - INFO: - Episode: 93/100, Reward: 121.000, Step: 121
108
- 2022-11-22 12:55:26 - r - INFO: - Episode: 94/100, Reward: 96.000, Step: 96
109
- 2022-11-22 12:55:26 - r - INFO: - Episode: 95/100, Reward: 106.000, Step: 106
110
- 2022-11-22 12:55:27 - r - INFO: - Current episode 95 has the best eval reward: 187.300
111
- 2022-11-22 12:55:27 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200
112
- 2022-11-22 12:55:28 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200
113
- 2022-11-22 12:55:28 - r - INFO: - Episode: 98/100, Reward: 113.000, Step: 113
114
- 2022-11-22 12:55:28 - r - INFO: - Episode: 99/100, Reward: 113.000, Step: 113
115
- 2022-11-22 12:55:29 - r - INFO: - Episode: 100/100, Reward: 132.000, Step: 132
116
- 2022-11-22 12:55:29 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png DELETED
Binary file (47.3 kB)
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv DELETED
@@ -1,101 +0,0 @@
1
- episodes,rewards,steps
2
- 0,18.0,18
3
- 1,35.0,35
4
- 2,13.0,13
5
- 3,32.0,32
6
- 4,16.0,16
7
- 5,9.0,9
8
- 6,12.0,12
9
- 7,16.0,16
10
- 8,14.0,14
11
- 9,12.0,12
12
- 10,13.0,13
13
- 11,14.0,14
14
- 12,19.0,19
15
- 13,9.0,9
16
- 14,15.0,15
17
- 15,12.0,12
18
- 16,11.0,11
19
- 17,9.0,9
20
- 18,13.0,13
21
- 19,17.0,17
22
- 20,13.0,13
23
- 21,15.0,15
24
- 22,22.0,22
25
- 23,26.0,26
26
- 24,19.0,19
27
- 25,10.0,10
28
- 26,10.0,10
29
- 27,11.0,11
30
- 28,13.0,13
31
- 29,16.0,16
32
- 30,13.0,13
33
- 31,15.0,15
34
- 32,12.0,12
35
- 33,13.0,13
36
- 34,13.0,13
37
- 35,11.0,11
38
- 36,9.0,9
39
- 37,9.0,9
40
- 38,10.0,10
41
- 39,14.0,14
42
- 40,9.0,9
43
- 41,10.0,10
44
- 42,9.0,9
45
- 43,14.0,14
46
- 44,10.0,10
47
- 45,19.0,19
48
- 46,10.0,10
49
- 47,14.0,14
50
- 48,18.0,18
51
- 49,32.0,32
52
- 50,17.0,17
53
- 51,15.0,15
54
- 52,18.0,18
55
- 53,14.0,14
56
- 54,22.0,22
57
- 55,14.0,14
58
- 56,21.0,21
59
- 57,21.0,21
60
- 58,23.0,23
61
- 59,21.0,21
62
- 60,21.0,21
63
- 61,35.0,35
64
- 62,23.0,23
65
- 63,27.0,27
66
- 64,24.0,24
67
- 65,28.0,28
68
- 66,30.0,30
69
- 67,33.0,33
70
- 68,33.0,33
71
- 69,26.0,26
72
- 70,37.0,37
73
- 71,28.0,28
74
- 72,30.0,30
75
- 73,41.0,41
76
- 74,45.0,45
77
- 75,68.0,68
78
- 76,33.0,33
79
- 77,46.0,46
80
- 78,54.0,54
81
- 79,37.0,37
82
- 80,43.0,43
83
- 81,79.0,79
84
- 82,36.0,36
85
- 83,58.0,58
86
- 84,42.0,42
87
- 85,136.0,136
88
- 86,57.0,57
89
- 87,46.0,46
90
- 88,105.0,105
91
- 89,63.0,63
92
- 90,84.0,84
93
- 91,136.0,136
94
- 92,121.0,121
95
- 93,96.0,96
96
- 94,106.0,106
97
- 95,200.0,200
98
- 96,200.0,200
99
- 97,113.0,113
100
- 98,113.0,113
101
- 99,132.0,132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt DELETED
@@ -1,42 +0,0 @@
1
- 2023-04-06 16:00:28 - r - INFO: - Hyperparameters:
2
- 2023-04-06 16:00:28 - r - INFO: - ================================================================================
3
- 2023-04-06 16:00:28 - r - INFO: - Name Value Type
4
- 2023-04-06 16:00:28 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-06 16:00:28 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-06 16:00:28 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-06 16:00:28 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-06 16:00:28 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-06 16:00:28 - r - INFO: - algo_name DoubleDQN <class 'str'>
10
- 2023-04-06 16:00:28 - r - INFO: - mode train <class 'str'>
11
- 2023-04-06 16:00:28 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-06 16:00:28 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-06 16:00:28 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-06 16:00:28 - r - INFO: - train_eps 400 <class 'int'>
15
- 2023-04-06 16:00:28 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-06 16:00:28 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-06 16:00:28 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-06 16:00:28 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-06 16:00:28 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-06 16:00:28 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
21
- 2023-04-06 16:00:28 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-06 16:00:28 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-06 16:00:28 - r - INFO: - n_workers 2 <class 'int'>
24
- 2023-04-06 16:00:28 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-06 16:00:28 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-06 16:00:28 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-06 16:00:28 - r - INFO: - gamma 0.95 <class 'float'>
28
- 2023-04-06 16:00:28 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-06 16:00:28 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-06 16:00:28 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-06 16:00:28 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-06 16:00:28 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-06 16:00:28 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028 <class 'str'>
34
- 2023-04-06 16:00:28 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/results <class 'str'>
35
- 2023-04-06 16:00:28 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/logs <class 'str'>
36
- 2023-04-06 16:00:28 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/traj <class 'str'>
37
- 2023-04-06 16:00:28 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/videos <class 'str'>
38
- 2023-04-06 16:00:28 - r - INFO: - ================================================================================
39
- 2023-04-06 16:00:28 - r - INFO: - n_states: 4, n_actions: 2
40
- 2023-04-06 16:00:28 - r - INFO: - Start training!
41
- 2023-04-06 16:00:28 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
42
- 2023-04-06 16:01:56 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cd537f6ab4ac0ff27caa323076685e3a3fff04b064dbdfc509baae76e9a9406
3
- size 272407
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png DELETED
Binary file (44.9 kB)
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv DELETED
@@ -1,402 +0,0 @@
1
- episodes,rewards
2
- 0,18.0
3
- 1,39.0
4
- 2,18.0
5
- 3,28.0
6
- 4,15.0
7
- 5,12.0
8
- 6,15.0
9
- 7,40.0
10
- 8,31.0
11
- 9,17.0
12
- 10,17.0
13
- 11,31.0
14
- 12,13.0
15
- 13,14.0
16
- 14,15.0
17
- 15,21.0
18
- 16,9.0
19
- 17,10.0
20
- 18,22.0
21
- 19,19.0
22
- 20,11.0
23
- 21,13.0
24
- 22,15.0
25
- 23,20.0
26
- 24,14.0
27
- 25,12.0
28
- 26,11.0
29
- 27,10.0
30
- 28,14.0
31
- 29,12.0
32
- 30,9.0
33
- 31,10.0
34
- 32,16.0
35
- 33,13.0
36
- 34,12.0
37
- 35,15.0
38
- 36,10.0
39
- 37,17.0
40
- 38,10.0
41
- 39,10.0
42
- 40,13.0
43
- 41,12.0
44
- 42,10.0
45
- 43,12.0
46
- 44,12.0
47
- 45,12.0
48
- 46,9.0
49
- 47,10.0
50
- 48,10.0
51
- 49,11.0
52
- 50,9.0
53
- 51,13.0
54
- 52,9.0
55
- 53,10.0
56
- 54,10.0
57
- 55,11.0
58
- 56,10.0
59
- 57,10.0
60
- 58,14.0
61
- 59,9.0
62
- 60,10.0
63
- 61,10.0
64
- 62,9.0
65
- 63,12.0
66
- 64,10.0
67
- 65,13.0
68
- 66,12.0
69
- 67,15.0
70
- 68,10.0
71
- 69,13.0
72
- 70,14.0
73
- 71,10.0
74
- 72,30.0
75
- 73,11.0
76
- 74,9.0
77
- 75,11.0
78
- 76,9.0
79
- 77,11.0
80
- 78,9.0
81
- 79,11.0
82
- 80,16.0
83
- 81,21.0
84
- 82,10.0
85
- 83,9.0
86
- 84,14.0
87
- 85,9.0
88
- 86,13.0
89
- 87,9.0
90
- 88,13.0
91
- 89,17.0
92
- 90,26.0
93
- 91,32.0
94
- 92,14.0
95
- 93,17.0
96
- 94,11.0
97
- 95,34.0
98
- 96,10.0
99
- 97,23.0
100
- 98,14.0
101
- 99,37.0
102
- 100,27.0
103
- 101,34.0
104
- 102,23.0
105
- 103,59.0
106
- 104,9.0
107
- 105,91.0
108
- 106,61.0
109
- 107,47.0
110
- 108,21.0
111
- 109,27.0
112
- 110,22.0
113
- 111,30.0
114
- 112,22.0
115
- 113,29.0
116
- 114,25.0
117
- 115,68.0
118
- 116,102.0
119
- 117,54.0
120
- 118,46.0
121
- 119,34.0
122
- 120,61.0
123
- 121,81.0
124
- 122,55.0
125
- 123,67.0
126
- 124,71.0
127
- 125,46.0
128
- 126,88.0
129
- 127,90.0
130
- 128,68.0
131
- 129,114.0
132
- 130,66.0
133
- 131,102.0
134
- 132,100.0
135
- 133,88.0
136
- 134,80.0
137
- 135,81.0
138
- 136,49.0
139
- 137,123.0
140
- 138,197.0
141
- 139,146.0
142
- 140,93.0
143
- 141,135.0
144
- 142,117.0
145
- 143,104.0
146
- 144,168.0
147
- 145,114.0
148
- 146,82.0
149
- 147,153.0
150
- 148,106.0
151
- 149,140.0
152
- 150,100.0
153
- 151,120.0
154
- 152,117.0
155
- 153,173.0
156
- 154,200.0
157
- 155,142.0
158
- 156,180.0
159
- 157,156.0
160
- 158,149.0
161
- 159,173.0
162
- 160,187.0
163
- 161,200.0
164
- 162,188.0
165
- 163,156.0
166
- 164,170.0
167
- 165,158.0
168
- 166,200.0
169
- 167,152.0
170
- 168,194.0
171
- 169,196.0
172
- 170,189.0
173
- 171,200.0
174
- 172,173.0
175
- 173,200.0
176
- 174,154.0
177
- 175,200.0
178
- 176,200.0
179
- 177,200.0
180
- 178,189.0
181
- 179,194.0
182
- 180,199.0
183
- 181,200.0
184
- 182,200.0
185
- 183,189.0
186
- 184,200.0
187
- 185,200.0
188
- 186,200.0
189
- 187,200.0
190
- 188,200.0
191
- 189,200.0
192
- 190,200.0
193
- 191,200.0
194
- 192,200.0
195
- 193,200.0
196
- 194,200.0
197
- 195,189.0
198
- 196,198.0
199
- 197,195.0
200
- 198,199.0
201
- 199,200.0
202
- 200,200.0
203
- 201,200.0
204
- 202,198.0
205
- 203,196.0
206
- 204,200.0
207
- 205,200.0
208
- 206,200.0
209
- 207,200.0
210
- 208,200.0
211
- 209,200.0
212
- 210,195.0
213
- 211,198.0
214
- 212,200.0
215
- 213,200.0
216
- 214,200.0
217
- 215,200.0
218
- 216,200.0
219
- 217,194.0
220
- 218,200.0
221
- 219,200.0
222
- 220,200.0
223
- 221,200.0
224
- 222,200.0
225
- 223,200.0
226
- 224,197.0
227
- 225,200.0
228
- 226,200.0
229
- 227,200.0
230
- 228,199.0
231
- 229,200.0
232
- 230,200.0
233
- 231,198.0
234
- 232,200.0
235
- 233,200.0
236
- 234,197.0
237
- 235,200.0
238
- 236,200.0
239
- 237,200.0
240
- 238,200.0
241
- 239,196.0
242
- 240,200.0
243
- 241,200.0
244
- 242,195.0
245
- 243,200.0
246
- 244,200.0
247
- 245,200.0
248
- 246,200.0
249
- 247,200.0
250
- 248,200.0
251
- 249,200.0
252
- 250,200.0
253
- 251,200.0
254
- 252,200.0
255
- 253,200.0
256
- 254,200.0
257
- 255,199.0
258
- 256,200.0
259
- 257,200.0
260
- 258,200.0
261
- 259,200.0
262
- 260,200.0
263
- 261,200.0
264
- 262,200.0
265
- 263,200.0
266
- 264,200.0
267
- 265,200.0
268
- 266,200.0
269
- 267,200.0
270
- 268,200.0
271
- 269,200.0
272
- 270,200.0
273
- 271,200.0
274
- 272,200.0
275
- 273,200.0
276
- 274,200.0
277
- 275,200.0
278
- 276,200.0
279
- 277,200.0
280
- 278,200.0
281
- 279,200.0
282
- 280,200.0
283
- 281,200.0
284
- 282,200.0
285
- 283,200.0
286
- 284,200.0
287
- 285,200.0
288
- 286,200.0
289
- 287,200.0
290
- 288,200.0
291
- 289,200.0
292
- 290,200.0
293
- 291,200.0
294
- 292,200.0
295
- 293,200.0
296
- 294,200.0
297
- 295,200.0
298
- 296,200.0
299
- 297,200.0
300
- 298,200.0
301
- 299,200.0
302
- 300,200.0
303
- 301,200.0
304
- 302,200.0
305
- 303,200.0
306
- 304,200.0
307
- 305,200.0
308
- 306,200.0
309
- 307,200.0
310
- 308,200.0
311
- 309,200.0
312
- 310,200.0
313
- 311,200.0
314
- 312,200.0
315
- 313,200.0
316
- 314,200.0
317
- 315,200.0
318
- 316,200.0
319
- 317,200.0
320
- 318,200.0
321
- 319,200.0
322
- 320,200.0
323
- 321,200.0
324
- 322,200.0
325
- 323,200.0
326
- 324,200.0
327
- 325,200.0
328
- 326,200.0
329
- 327,200.0
330
- 328,200.0
331
- 329,200.0
332
- 330,200.0
333
- 331,200.0
334
- 332,200.0
335
- 333,200.0
336
- 334,200.0
337
- 335,200.0
338
- 336,200.0
339
- 337,200.0
340
- 338,200.0
341
- 339,200.0
342
- 340,200.0
343
- 341,200.0
344
- 342,200.0
345
- 343,200.0
346
- 344,200.0
347
- 345,200.0
348
- 346,200.0
349
- 347,200.0
350
- 348,200.0
351
- 349,200.0
352
- 350,200.0
353
- 351,200.0
354
- 352,200.0
355
- 353,200.0
356
- 354,200.0
357
- 355,200.0
358
- 356,200.0
359
- 357,200.0
360
- 358,200.0
361
- 359,200.0
362
- 360,200.0
363
- 361,200.0
364
- 362,200.0
365
- 363,200.0
366
- 364,200.0
367
- 365,200.0
368
- 366,200.0
369
- 367,200.0
370
- 368,200.0
371
- 369,200.0
372
- 370,200.0
373
- 371,200.0
374
- 372,200.0
375
- 373,200.0
376
- 374,200.0
377
- 375,200.0
378
- 376,200.0
379
- 377,200.0
380
- 378,200.0
381
- 379,200.0
382
- 380,200.0
383
- 381,200.0
384
- 382,200.0
385
- 383,200.0
386
- 384,200.0
387
- 385,200.0
388
- 386,200.0
389
- 387,200.0
390
- 388,200.0
391
- 389,200.0
392
- 390,200.0
393
- 391,200.0
394
- 392,200.0
395
- 393,200.0
396
- 394,200.0
397
- 395,200.0
398
- 396,200.0
399
- 397,200.0
400
- 398,200.0
401
- 399,200.0
402
- 400,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt DELETED
@@ -1,42 +0,0 @@
1
- 2023-04-06 16:29:38 - r - INFO: - Hyperparameters:
2
- 2023-04-06 16:29:38 - r - INFO: - ================================================================================
3
- 2023-04-06 16:29:38 - r - INFO: - Name Value Type
4
- 2023-04-06 16:29:38 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-06 16:29:38 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-06 16:29:38 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-06 16:29:38 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-06 16:29:38 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-06 16:29:38 - r - INFO: - algo_name DoubleDQN <class 'str'>
10
- 2023-04-06 16:29:38 - r - INFO: - mode train <class 'str'>
11
- 2023-04-06 16:29:38 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-06 16:29:38 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-06 16:29:38 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-06 16:29:38 - r - INFO: - train_eps 400 <class 'int'>
15
- 2023-04-06 16:29:38 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-06 16:29:38 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-06 16:29:38 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-06 16:29:38 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-06 16:29:38 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-06 16:29:38 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
21
- 2023-04-06 16:29:38 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-06 16:29:38 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-06 16:29:38 - r - INFO: - n_workers 2 <class 'int'>
24
- 2023-04-06 16:29:38 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-06 16:29:38 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-06 16:29:38 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-06 16:29:38 - r - INFO: - gamma 0.95 <class 'float'>
28
- 2023-04-06 16:29:38 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-06 16:29:38 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-06 16:29:38 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-06 16:29:38 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-06 16:29:38 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-06 16:29:38 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938 <class 'str'>
34
- 2023-04-06 16:29:38 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/results <class 'str'>
35
- 2023-04-06 16:29:38 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/logs <class 'str'>
36
- 2023-04-06 16:29:38 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/traj <class 'str'>
37
- 2023-04-06 16:29:38 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/videos <class 'str'>
38
- 2023-04-06 16:29:38 - r - INFO: - ================================================================================
39
- 2023-04-06 16:29:40 - r - INFO: - n_states: 4, n_actions: 2
40
- 2023-04-06 16:29:40 - r - INFO: - Start training!
41
- 2023-04-06 16:29:40 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
42
- 2023-04-06 16:37:19 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d3a724152cea263dec5b58d80bee101405e7b3268a34c265d414ebbd771c5ac
3
- size 272407
 
 
 
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png DELETED
Binary file (58.2 kB)
 
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv DELETED
@@ -1,401 +0,0 @@
1
- episodes,rewards
2
- 0,16.0
3
- 1,18.0
4
- 2,18.0
5
- 3,28.0
6
- 4,16.0
7
- 5,15.0
8
- 6,14.0
9
- 7,15.0
10
- 8,11.0
11
- 9,49.0
12
- 10,31.0
13
- 11,31.0
14
- 12,11.0
15
- 13,14.0
16
- 14,13.0
17
- 15,21.0
18
- 16,9.0
19
- 17,28.0
20
- 18,10.0
21
- 19,19.0
22
- 20,15.0
23
- 21,12.0
24
- 22,13.0
25
- 23,18.0
26
- 24,15.0
27
- 25,14.0
28
- 26,13.0
29
- 27,11.0
30
- 28,12.0
31
- 29,14.0
32
- 30,12.0
33
- 31,10.0
34
- 32,9.0
35
- 33,13.0
36
- 34,16.0
37
- 35,17.0
38
- 36,15.0
39
- 37,12.0
40
- 38,10.0
41
- 39,18.0
42
- 40,10.0
43
- 41,11.0
44
- 42,21.0
45
- 43,16.0
46
- 44,15.0
47
- 45,19.0
48
- 46,15.0
49
- 47,20.0
50
- 48,15.0
51
- 49,14.0
52
- 50,12.0
53
- 51,10.0
54
- 52,12.0
55
- 53,11.0
56
- 54,12.0
57
- 55,10.0
58
- 56,11.0
59
- 57,9.0
60
- 58,15.0
61
- 59,10.0
62
- 60,15.0
63
- 61,11.0
64
- 62,12.0
65
- 63,15.0
66
- 64,13.0
67
- 65,11.0
68
- 66,12.0
69
- 67,12.0
70
- 68,10.0
71
- 69,11.0
72
- 70,11.0
73
- 71,9.0
74
- 72,9.0
75
- 73,11.0
76
- 74,9.0
77
- 75,10.0
78
- 76,9.0
79
- 77,10.0
80
- 78,9.0
81
- 79,10.0
82
- 80,10.0
83
- 81,9.0
84
- 82,15.0
85
- 83,10.0
86
- 84,10.0
87
- 85,11.0
88
- 86,11.0
89
- 87,13.0
90
- 88,13.0
91
- 89,9.0
92
- 90,16.0
93
- 91,12.0
94
- 92,15.0
95
- 93,9.0
96
- 94,10.0
97
- 95,10.0
98
- 96,11.0
99
- 97,11.0
100
- 98,9.0
101
- 99,12.0
102
- 100,16.0
103
- 101,10.0
104
- 102,15.0
105
- 103,9.0
106
- 104,9.0
107
- 105,10.0
108
- 106,11.0
109
- 107,10.0
110
- 108,13.0
111
- 109,11.0
112
- 110,9.0
113
- 111,14.0
114
- 112,10.0
115
- 113,12.0
116
- 114,10.0
117
- 115,10.0
118
- 116,10.0
119
- 117,13.0
120
- 118,10.0
121
- 119,11.0
122
- 120,9.0
123
- 121,11.0
124
- 122,10.0
125
- 123,10.0
126
- 124,12.0
127
- 125,41.0
128
- 126,9.0
129
- 127,31.0
130
- 128,14.0
131
- 129,14.0
132
- 130,14.0
133
- 131,11.0
134
- 132,14.0
135
- 133,12.0
136
- 134,16.0
137
- 135,11.0
138
- 136,12.0
139
- 137,16.0
140
- 138,12.0
141
- 139,14.0
142
- 140,12.0
143
- 141,18.0
144
- 142,15.0
145
- 143,18.0
146
- 144,14.0
147
- 145,14.0
148
- 146,18.0
149
- 147,17.0
150
- 148,24.0
151
- 149,13.0
152
- 150,18.0
153
- 151,15.0
154
- 152,20.0
155
- 153,17.0
156
- 154,16.0
157
- 155,17.0
158
- 156,14.0
159
- 157,26.0
160
- 158,26.0
161
- 159,42.0
162
- 160,25.0
163
- 161,58.0
164
- 162,48.0
165
- 163,48.0
166
- 164,61.0
167
- 165,115.0
168
- 166,156.0
169
- 167,56.0
170
- 168,61.0
171
- 169,83.0
172
- 170,36.0
173
- 171,47.0
174
- 172,31.0
175
- 173,27.0
176
- 174,50.0
177
- 175,34.0
178
- 176,32.0
179
- 177,49.0
180
- 178,30.0
181
- 179,50.0
182
- 180,34.0
183
- 181,27.0
184
- 182,49.0
185
- 183,35.0
186
- 184,52.0
187
- 185,35.0
188
- 186,47.0
189
- 187,50.0
190
- 188,35.0
191
- 189,54.0
192
- 190,33.0
193
- 191,50.0
194
- 192,63.0
195
- 193,121.0
196
- 194,86.0
197
- 195,46.0
198
- 196,54.0
199
- 197,42.0
200
- 198,73.0
201
- 199,45.0
202
- 200,48.0
203
- 201,72.0
204
- 202,60.0
205
- 203,96.0
206
- 204,40.0
207
- 205,46.0
208
- 206,65.0
209
- 207,84.0
210
- 208,115.0
211
- 209,78.0
212
- 210,33.0
213
- 211,40.0
214
- 212,32.0
215
- 213,39.0
216
- 214,47.0
217
- 215,37.0
218
- 216,53.0
219
- 217,37.0
220
- 218,56.0
221
- 219,36.0
222
- 220,101.0
223
- 221,105.0
224
- 222,172.0
225
- 223,116.0
226
- 224,200.0
227
- 225,162.0
228
- 226,200.0
229
- 227,200.0
230
- 228,200.0
231
- 229,200.0
232
- 230,200.0
233
- 231,200.0
234
- 232,200.0
235
- 233,200.0
236
- 234,200.0
237
- 235,200.0
238
- 236,200.0
239
- 237,200.0
240
- 238,200.0
241
- 239,200.0
242
- 240,200.0
243
- 241,200.0
244
- 242,200.0
245
- 243,200.0
246
- 244,200.0
247
- 245,200.0
248
- 246,200.0
249
- 247,200.0
250
- 248,200.0
251
- 249,200.0
252
- 250,200.0
253
- 251,200.0
254
- 252,200.0
255
- 253,200.0
256
- 254,200.0
257
- 255,200.0
258
- 256,200.0
259
- 257,200.0
260
- 258,200.0
261
- 259,200.0
262
- 260,200.0
263
- 261,200.0
264
- 262,200.0
265
- 263,200.0
266
- 264,200.0
267
- 265,200.0
268
- 266,200.0
269
- 267,200.0
270
- 268,200.0
271
- 269,200.0
272
- 270,200.0
273
- 271,200.0
274
- 272,200.0
275
- 273,200.0
276
- 274,200.0
277
- 275,200.0
278
- 276,200.0
279
- 277,200.0
280
- 278,200.0
281
- 279,200.0
282
- 280,200.0
283
- 281,200.0
284
- 282,200.0
285
- 283,200.0
286
- 284,200.0
287
- 285,200.0
288
- 286,200.0
289
- 287,200.0
290
- 288,200.0
291
- 289,199.0
292
- 290,200.0
293
- 291,190.0
294
- 292,179.0
295
- 293,189.0
296
- 294,193.0
297
- 295,200.0
298
- 296,200.0
299
- 297,200.0
300
- 298,195.0
301
- 299,200.0
302
- 300,186.0
303
- 301,175.0
304
- 302,177.0
305
- 303,185.0
306
- 304,167.0
307
- 305,172.0
308
- 306,164.0
309
- 307,146.0
310
- 308,187.0
311
- 309,150.0
312
- 310,146.0
313
- 311,165.0
314
- 312,200.0
315
- 313,200.0
316
- 314,200.0
317
- 315,200.0
318
- 316,200.0
319
- 317,158.0
320
- 318,181.0
321
- 319,174.0
322
- 320,175.0
323
- 321,176.0
324
- 322,170.0
325
- 323,161.0
326
- 324,180.0
327
- 325,200.0
328
- 326,198.0
329
- 327,179.0
330
- 328,192.0
331
- 329,157.0
332
- 330,151.0
333
- 331,198.0
334
- 332,154.0
335
- 333,165.0
336
- 334,200.0
337
- 335,179.0
338
- 336,200.0
339
- 337,191.0
340
- 338,177.0
341
- 339,200.0
342
- 340,171.0
343
- 341,200.0
344
- 342,200.0
345
- 343,200.0
346
- 344,200.0
347
- 345,200.0
348
- 346,200.0
349
- 347,163.0
350
- 348,134.0
351
- 349,200.0
352
- 350,140.0
353
- 351,200.0
354
- 352,200.0
355
- 353,139.0
356
- 354,152.0
357
- 355,136.0
358
- 356,200.0
359
- 357,200.0
360
- 358,173.0
361
- 359,200.0
362
- 360,155.0
363
- 361,134.0
364
- 362,200.0
365
- 363,186.0
366
- 364,142.0
367
- 365,200.0
368
- 366,200.0
369
- 367,128.0
370
- 368,200.0
371
- 369,200.0
372
- 370,200.0
373
- 371,200.0
374
- 372,200.0
375
- 373,200.0
376
- 374,200.0
377
- 375,142.0
378
- 376,162.0
379
- 377,180.0
380
- 378,120.0
381
- 379,190.0
382
- 380,169.0
383
- 381,125.0
384
- 382,189.0
385
- 383,158.0
386
- 384,197.0
387
- 385,200.0
388
- 386,200.0
389
- 387,139.0
390
- 388,158.0
391
- 389,165.0
392
- 390,200.0
393
- 391,200.0
394
- 392,113.0
395
- 393,115.0
396
- 394,117.0
397
- 395,119.0
398
- 396,110.0
399
- 397,119.0
400
- 398,200.0
401
- 399,133.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/{Train_CartPole-v1_DoubleDQN_ray_20230406-162938 β†’ Train_ray_CartPole-v1_DoubleDQN_20230516-115126}/config.yaml RENAMED
@@ -1,46 +1,43 @@
1
  general_cfg:
2
  algo_name: DoubleDQN
 
3
  device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
  load_checkpoint: false
 
8
  load_path: Train_CartPole-v1_DQN_20221026-054757
9
- max_steps: 200
 
10
  mode: train
 
11
  mp_backend: ray
12
  n_workers: 2
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
  seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 400
21
- wrapper: null
22
  algo_cfg:
23
  batch_size: 64
24
  buffer_size: 100000
 
25
  epsilon_decay: 500
26
  epsilon_end: 0.01
27
  epsilon_start: 0.95
28
- gamma: 0.95
29
  lr: 0.0001
30
  target_update: 4
31
  value_layers:
32
  - activation: relu
33
  layer_dim:
34
- - n_states
35
  - 256
36
  layer_type: linear
37
  - activation: relu
38
  layer_dim:
39
  - 256
40
- - 256
41
- layer_type: linear
42
- - activation: none
43
- layer_dim:
44
- - 256
45
- - n_actions
46
  layer_type: linear
 
 
 
 
 
 
 
 
1
  general_cfg:
2
  algo_name: DoubleDQN
3
+ collect_traj: false
4
  device: cpu
5
+ env_name: gym
 
 
6
  load_checkpoint: false
7
+ load_model_step: best
8
  load_path: Train_CartPole-v1_DQN_20221026-054757
9
+ max_episode: 100
10
+ max_step: 200
11
  mode: train
12
+ model_save_fre: 500
13
  mp_backend: ray
14
  n_workers: 2
15
+ online_eval: true
16
+ online_eval_episode: 10
 
 
17
  seed: 1
 
 
 
 
18
  algo_cfg:
19
  batch_size: 64
20
  buffer_size: 100000
21
+ buffer_type: REPLAY_QUE
22
  epsilon_decay: 500
23
  epsilon_end: 0.01
24
  epsilon_start: 0.95
25
+ gamma: 0.99
26
  lr: 0.0001
27
  target_update: 4
28
  value_layers:
29
  - activation: relu
30
  layer_dim:
 
31
  - 256
32
  layer_type: linear
33
  - activation: relu
34
  layer_dim:
35
  - 256
 
 
 
 
 
 
36
  layer_type: linear
37
+ env_cfg:
38
+ id: CartPole-v1
39
+ ignore_params:
40
+ - wrapper
41
+ - ignore_params
42
+ render_mode: null
43
+ wrapper: null
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - General Configs:
2
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
3
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
4
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
6
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - mode train <class 'str'>
7
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - max_episode 100 <class 'int'>
10
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - mp_backend ray <class 'str'>
13
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
17
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
18
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
19
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - load_model_step best <class 'str'>
20
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
21
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - Algo Configs:
22
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
23
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
24
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
25
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
26
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
27
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
28
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
29
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
30
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - batch_size 64 <class 'int'>
31
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - target_update 4 <class 'int'>
32
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
33
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
34
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
35
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - Env Configs:
36
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
37
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
38
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
39
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - render_mode None <class 'str'>
40
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - wrapper None <class 'str'>
41
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
42
+ 2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
43
+ 2023-05-16 11:51:32 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
44
+ 2023-05-16 11:51:37 - RayLog - INFO: - Worker 0 finished episode 0 with reward 16.0 in 16 steps
45
+ 2023-05-16 11:51:37 - RayLog - INFO: - Worker 1 finished episode 0 with reward 20.0 in 20 steps
46
+ 2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 2 with reward 11.0 in 11 steps
47
+ 2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 2 with reward 15.0 in 15 steps
48
+ 2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 4 with reward 13.0 in 13 steps
49
+ 2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 3 with reward 22.0 in 22 steps
50
+ 2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 9.0 in 9 steps
51
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 6 with reward 14.0 in 14 steps
52
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 7 with reward 12.0 in 12 steps
53
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 9 with reward 13.0 in 13 steps
54
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 10 with reward 14.0 in 14 steps
55
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 8 with reward 35.0 in 35 steps
56
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 13.0 in 13 steps
57
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 15.0 in 15 steps
58
+ 2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 13 with reward 12.0 in 12 steps
59
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 14 with reward 10.0 in 10 steps
60
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 15 with reward 11.0 in 11 steps
61
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 16 with reward 11.0 in 11 steps
62
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 18 with reward 12.0 in 12 steps
63
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 17 with reward 21.0 in 21 steps
64
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 19 with reward 15.0 in 15 steps
65
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 20 with reward 18.0 in 18 steps
66
+ 2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 13.0 in 13 steps
67
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 22 with reward 10.0 in 10 steps
68
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 23 with reward 16.0 in 16 steps
69
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 24 with reward 19.0 in 19 steps
70
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps
71
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 27 with reward 10.0 in 10 steps
72
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 10.0 in 10 steps
73
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 26 with reward 26.0 in 26 steps
74
+ 2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 12.0 in 12 steps
75
+ 2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 30 with reward 15.0 in 15 steps
76
+ 2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 32 with reward 10.0 in 10 steps
77
+ 2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 31 with reward 18.0 in 18 steps
78
+ 2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 33 with reward 13.0 in 13 steps
79
+ 2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 34 with reward 10.0 in 10 steps
80
+ 2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 36 with reward 9.0 in 9 steps
81
+ 2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 35 with reward 12.0 in 12 steps
82
+ 2023-05-16 11:51:45 - RayLog - INFO: - update_step: 500, online_eval_reward: 9.000
83
+ 2023-05-16 11:51:45 - RayLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
84
+ 2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 38 with reward 10.0 in 10 steps
85
+ 2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 37 with reward 13.0 in 13 steps
86
+ 2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 40 with reward 9.0 in 9 steps
87
+ 2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 39 with reward 14.0 in 14 steps
88
+ 2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 41 with reward 14.0 in 14 steps
89
+ 2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 42 with reward 20.0 in 20 steps
90
+ 2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 43 with reward 15.0 in 15 steps
91
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 44 with reward 19.0 in 19 steps
92
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 45 with reward 17.0 in 17 steps
93
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 47 with reward 12.0 in 12 steps
94
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 46 with reward 15.0 in 15 steps
95
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 48 with reward 14.0 in 14 steps
96
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 49 with reward 16.0 in 16 steps
97
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 50 with reward 9.0 in 9 steps
98
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 51 with reward 13.0 in 13 steps
99
+ 2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 52 with reward 10.0 in 10 steps
100
+ 2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 53 with reward 13.0 in 13 steps
101
+ 2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 54 with reward 13.0 in 13 steps
102
+ 2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 56 with reward 14.0 in 14 steps
103
+ 2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 55 with reward 20.0 in 20 steps
104
+ 2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 57 with reward 14.0 in 14 steps
105
+ 2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 58 with reward 16.0 in 16 steps
106
+ 2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 59 with reward 11.0 in 11 steps
107
+ 2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 60 with reward 14.0 in 14 steps
108
+ 2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 61 with reward 9.0 in 9 steps
109
+ 2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 63 with reward 19.0 in 19 steps
110
+ 2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 62 with reward 23.0 in 23 steps
111
+ 2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 64 with reward 18.0 in 18 steps
112
+ 2023-05-16 11:51:49 - RayLog - INFO: - Worker 1 finished episode 65 with reward 26.0 in 26 steps
113
+ 2023-05-16 11:51:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 69.000
114
+ 2023-05-16 11:51:49 - RayLog - INFO: - current update step obtain a better online_eval_reward: 69.000, save the best model!
115
+ 2023-05-16 11:51:50 - RayLog - INFO: - Worker 1 finished episode 67 with reward 93.0 in 93 steps
116
+ 2023-05-16 11:51:51 - RayLog - INFO: - Worker 0 finished episode 66 with reward 127.0 in 127 steps
117
+ 2023-05-16 11:51:51 - RayLog - INFO: - Worker 1 finished episode 68 with reward 40.0 in 40 steps
118
+ 2023-05-16 11:51:52 - RayLog - INFO: - Worker 0 finished episode 69 with reward 54.0 in 54 steps
119
+ 2023-05-16 11:51:52 - RayLog - INFO: - Worker 1 finished episode 70 with reward 48.0 in 48 steps
120
+ 2023-05-16 11:51:53 - RayLog - INFO: - Worker 0 finished episode 71 with reward 62.0 in 62 steps
121
+ 2023-05-16 11:51:53 - RayLog - INFO: - Worker 1 finished episode 72 with reward 60.0 in 60 steps
122
+ 2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 74 with reward 35.0 in 35 steps
123
+ 2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 73 with reward 47.0 in 47 steps
124
+ 2023-05-16 11:51:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 63.000
125
+ 2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 75 with reward 38.0 in 38 steps
126
+ 2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 76 with reward 46.0 in 46 steps
127
+ 2023-05-16 11:51:55 - RayLog - INFO: - Worker 1 finished episode 77 with reward 40.0 in 40 steps
128
+ 2023-05-16 11:51:55 - RayLog - INFO: - Worker 0 finished episode 78 with reward 57.0 in 57 steps
129
+ 2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 79 with reward 38.0 in 38 steps
130
+ 2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 81 with reward 33.0 in 33 steps
131
+ 2023-05-16 11:51:56 - RayLog - INFO: - Worker 0 finished episode 80 with reward 51.0 in 51 steps
132
+ 2023-05-16 11:51:57 - RayLog - INFO: - Worker 1 finished episode 82 with reward 44.0 in 44 steps
133
+ 2023-05-16 11:51:58 - RayLog - INFO: - Worker 0 finished episode 83 with reward 70.0 in 70 steps
134
+ 2023-05-16 11:51:58 - RayLog - INFO: - Worker 1 finished episode 84 with reward 55.0 in 55 steps
135
+ 2023-05-16 11:51:58 - RayLog - INFO: - update_step: 2000, online_eval_reward: 82.000
136
+ 2023-05-16 11:51:58 - RayLog - INFO: - current update step obtain a better online_eval_reward: 82.000, save the best model!
137
+ 2023-05-16 11:51:59 - RayLog - INFO: - Worker 0 finished episode 85 with reward 66.0 in 66 steps
138
+ 2023-05-16 11:51:59 - RayLog - INFO: - Worker 1 finished episode 86 with reward 56.0 in 56 steps
139
+ 2023-05-16 11:52:00 - RayLog - INFO: - Worker 1 finished episode 88 with reward 45.0 in 45 steps
140
+ 2023-05-16 11:52:00 - RayLog - INFO: - Worker 0 finished episode 87 with reward 68.0 in 68 steps
141
+ 2023-05-16 11:52:01 - RayLog - INFO: - Worker 1 finished episode 89 with reward 50.0 in 50 steps
142
+ 2023-05-16 11:52:02 - RayLog - INFO: - Worker 0 finished episode 90 with reward 79.0 in 79 steps
143
+ 2023-05-16 11:52:02 - RayLog - INFO: - Worker 1 finished episode 91 with reward 57.0 in 57 steps
144
+ 2023-05-16 11:52:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 77.000
145
+ 2023-05-16 11:52:04 - RayLog - INFO: - Worker 1 finished episode 93 with reward 66.0 in 66 steps
146
+ 2023-05-16 11:52:04 - RayLog - INFO: - Worker 0 finished episode 92 with reward 84.0 in 84 steps
147
+ 2023-05-16 11:52:05 - RayLog - INFO: - Worker 1 finished episode 94 with reward 56.0 in 56 steps
148
+ 2023-05-16 11:52:07 - RayLog - INFO: - Worker 0 finished episode 95 with reward 134.0 in 134 steps
149
+ 2023-05-16 11:52:08 - RayLog - INFO: - Worker 1 finished episode 96 with reward 115.0 in 115 steps
150
+ 2023-05-16 11:52:10 - RayLog - INFO: - update_step: 3000, online_eval_reward: 200.000
151
+ 2023-05-16 11:52:10 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
152
+ 2023-05-16 11:52:12 - RayLog - INFO: - Worker 0 finished episode 97 with reward 200.0 in 200 steps
153
+ 2023-05-16 11:52:13 - RayLog - INFO: - Worker 1 finished episode 98 with reward 200.0 in 200 steps
154
+ 2023-05-16 11:52:15 - RayLog - INFO: - update_step: 3500, online_eval_reward: 200.000
155
+ 2023-05-16 11:52:16 - RayLog - INFO: - Worker 0 finished episode 99 with reward 200.0 in 200 steps
156
+ 2023-05-16 11:52:17 - RayLog - INFO: - Worker 1 finished episode 100 with reward 200.0 in 200 steps
157
+ 2023-05-16 11:52:19 - SimpleLog - INFO: - Finish training! total time consumed: 53.70s
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best ADDED
Binary file (545 kB). View file
 
CartPole-v1/{Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth β†’ Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c80e643e52ee0e109e55ba083247021287455374ec28f27c4f2705e51fee23
3
- size 272471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6be5f2b8ae4530630b850e07d2bbd6010678cb75c3d3050606cdfa0e1f6acd
3
+ size 40
CartPole-v1/{Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth β†’ Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c80e643e52ee0e109e55ba083247021287455374ec28f27c4f2705e51fee23
3
- size 272471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa028324617b734607430bc18aa93daae8536fcfea762ed7cdd92c65a472dd0
3
+ size 10028
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37344be608143375d6347aff7b1395cef1e5b52479a11b5faf17e1f631046d1d
3
+ size 40
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c34b8e5d11e95894f9f70682c415e3475f765542af5aa31b961c6d0d8e11923
3
+ size 177587
CartPole-v1/{Train_CartPole-v1_DoubleDQN_mp_20230406-160028 β†’ Train_single_CartPole-v1_DoubleDQN_20230516-114540}/config.yaml RENAMED
@@ -1,46 +1,43 @@
1
  general_cfg:
2
  algo_name: DoubleDQN
 
3
  device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
  load_checkpoint: false
 
8
  load_path: Train_CartPole-v1_DQN_20221026-054757
9
- max_steps: 200
 
10
  mode: train
11
- mp_backend: mp
 
12
  n_workers: 2
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
  seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 400
21
- wrapper: null
22
  algo_cfg:
23
  batch_size: 64
24
  buffer_size: 100000
 
25
  epsilon_decay: 500
26
  epsilon_end: 0.01
27
  epsilon_start: 0.95
28
- gamma: 0.95
29
  lr: 0.0001
30
  target_update: 4
31
  value_layers:
32
  - activation: relu
33
  layer_dim:
34
- - n_states
35
  - 256
36
  layer_type: linear
37
  - activation: relu
38
  layer_dim:
39
  - 256
40
- - 256
41
- layer_type: linear
42
- - activation: none
43
- layer_dim:
44
- - 256
45
- - n_actions
46
  layer_type: linear
 
 
 
 
 
 
 
 
1
  general_cfg:
2
  algo_name: DoubleDQN
3
+ collect_traj: false
4
  device: cpu
5
+ env_name: gym
 
 
6
  load_checkpoint: false
7
+ load_model_step: best
8
  load_path: Train_CartPole-v1_DQN_20221026-054757
9
+ max_episode: 100
10
+ max_step: 200
11
  mode: train
12
+ model_save_fre: 500
13
+ mp_backend: single
14
  n_workers: 2
15
+ online_eval: true
16
+ online_eval_episode: 10
 
 
17
  seed: 1
 
 
 
 
18
  algo_cfg:
19
  batch_size: 64
20
  buffer_size: 100000
21
+ buffer_type: REPLAY_QUE
22
  epsilon_decay: 500
23
  epsilon_end: 0.01
24
  epsilon_start: 0.95
25
+ gamma: 0.99
26
  lr: 0.0001
27
  target_update: 4
28
  value_layers:
29
  - activation: relu
30
  layer_dim:
 
31
  - 256
32
  layer_type: linear
33
  - activation: relu
34
  layer_dim:
35
  - 256
 
 
 
 
 
 
36
  layer_type: linear
37
+ env_cfg:
38
+ id: CartPole-v1
39
+ ignore_params:
40
+ - wrapper
41
+ - ignore_params
42
+ render_mode: null
43
+ wrapper: null
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - General Configs:
2
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
3
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
4
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
6
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - mode train <class 'str'>
7
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - max_episode 100 <class 'int'>
10
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - mp_backend single <class 'str'>
13
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
17
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
18
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
19
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - load_model_step best <class 'str'>
20
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
21
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - Algo Configs:
22
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
23
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
24
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
25
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
26
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
27
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
28
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
29
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
30
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - batch_size 64 <class 'int'>
31
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - target_update 4 <class 'int'>
32
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
33
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
34
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
35
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - Env Configs:
36
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
37
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
38
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
39
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - render_mode None <class 'str'>
40
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - wrapper None <class 'str'>
41
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
42
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
43
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
44
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - Start training!
45
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 0, ep_reward: 25.0, ep_step: 25
46
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 1, ep_reward: 17.0, ep_step: 17
47
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 2, ep_reward: 19.0, ep_step: 19
48
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 3, ep_reward: 14.0, ep_step: 14
49
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 4, ep_reward: 14.0, ep_step: 14
50
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 5, ep_reward: 21.0, ep_step: 21
51
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 6, ep_reward: 22.0, ep_step: 22
52
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 7, ep_reward: 13.0, ep_step: 13
53
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 8, ep_reward: 27.0, ep_step: 27
54
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 9, ep_reward: 11.0, ep_step: 11
55
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 10, ep_reward: 14.0, ep_step: 14
56
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 11, ep_reward: 24.0, ep_step: 24
57
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 12, ep_reward: 23.0, ep_step: 23
58
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 13, ep_reward: 12.0, ep_step: 12
59
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 14, ep_reward: 12.0, ep_step: 12
60
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 15, ep_reward: 13.0, ep_step: 13
61
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 16, ep_reward: 11.0, ep_step: 11
62
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 17, ep_reward: 15.0, ep_step: 15
63
+ 2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 18, ep_reward: 12.0, ep_step: 12
64
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 19, ep_reward: 27.0, ep_step: 27
65
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 20, ep_reward: 14.0, ep_step: 14
66
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 21, ep_reward: 19.0, ep_step: 19
67
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 22, ep_reward: 10.0, ep_step: 10
68
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 23, ep_reward: 10.0, ep_step: 10
69
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 24, ep_reward: 15.0, ep_step: 15
70
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 25, ep_reward: 15.0, ep_step: 15
71
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 26, ep_reward: 14.0, ep_step: 14
72
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 27, ep_reward: 11.0, ep_step: 11
73
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 28, ep_reward: 10.0, ep_step: 10
74
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 29, ep_reward: 12.0, ep_step: 12
75
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 30, ep_reward: 10.0, ep_step: 10
76
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 31, ep_reward: 9.0, ep_step: 9
77
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 32, ep_reward: 11.0, ep_step: 11
78
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 33, ep_reward: 9.0, ep_step: 9
79
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 34, ep_reward: 13.0, ep_step: 13
80
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 35, ep_reward: 10.0, ep_step: 10
81
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 36, ep_reward: 9.0, ep_step: 9
82
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 37, ep_reward: 10.0, ep_step: 10
83
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 9.000
84
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
85
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 38, ep_reward: 14.0, ep_step: 14
86
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 39, ep_reward: 11.0, ep_step: 11
87
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 40, ep_reward: 9.0, ep_step: 9
88
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 41, ep_reward: 9.0, ep_step: 9
89
+ 2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 42, ep_reward: 9.0, ep_step: 9
90
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 43, ep_reward: 11.0, ep_step: 11
91
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 44, ep_reward: 21.0, ep_step: 21
92
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 45, ep_reward: 13.0, ep_step: 13
93
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 46, ep_reward: 12.0, ep_step: 12
94
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 47, ep_reward: 30.0, ep_step: 30
95
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 48, ep_reward: 20.0, ep_step: 20
96
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 49, ep_reward: 28.0, ep_step: 28
97
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 50, ep_reward: 22.0, ep_step: 22
98
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 51, ep_reward: 20.0, ep_step: 20
99
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 52, ep_reward: 26.0, ep_step: 26
100
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 53, ep_reward: 24.0, ep_step: 24
101
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 54, ep_reward: 30.0, ep_step: 30
102
+ 2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 55, ep_reward: 26.0, ep_step: 26
103
+ 2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 56, ep_reward: 41.0, ep_step: 41
104
+ 2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 57, ep_reward: 58.0, ep_step: 58
105
+ 2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 58, ep_reward: 59.0, ep_step: 59
106
+ 2023-05-16 11:45:43 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 63.000
107
+ 2023-05-16 11:45:43 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 63.000, save the best model!
108
+ 2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 59, ep_reward: 58.0, ep_step: 58
109
+ 2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 60, ep_reward: 47.0, ep_step: 47
110
+ 2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 61, ep_reward: 84.0, ep_step: 84
111
+ 2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 62, ep_reward: 44.0, ep_step: 44
112
+ 2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 63, ep_reward: 59.0, ep_step: 59
113
+ 2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 64, ep_reward: 39.0, ep_step: 39
114
+ 2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 65, ep_reward: 53.0, ep_step: 53
115
+ 2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 66, ep_reward: 70.0, ep_step: 70
116
+ 2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 67, ep_reward: 58.0, ep_step: 58
117
+ 2023-05-16 11:45:45 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 65.000
118
+ 2023-05-16 11:45:45 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 65.000, save the best model!
119
+ 2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 68, ep_reward: 101.0, ep_step: 101
120
+ 2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 69, ep_reward: 52.0, ep_step: 52
121
+ 2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 70, ep_reward: 58.0, ep_step: 58
122
+ 2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 71, ep_reward: 61.0, ep_step: 61
123
+ 2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 72, ep_reward: 91.0, ep_step: 91
124
+ 2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 73, ep_reward: 54.0, ep_step: 54
125
+ 2023-05-16 11:45:46 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 65.000
126
+ 2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 74, ep_reward: 98.0, ep_step: 98
127
+ 2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 75, ep_reward: 67.0, ep_step: 67
128
+ 2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 76, ep_reward: 70.0, ep_step: 70
129
+ 2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 77, ep_reward: 74.0, ep_step: 74
130
+ 2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 78, ep_reward: 72.0, ep_step: 72
131
+ 2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 79, ep_reward: 81.0, ep_step: 81
132
+ 2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 80, ep_reward: 82.0, ep_step: 82
133
+ 2023-05-16 11:45:48 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 94.000
134
+ 2023-05-16 11:45:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 94.000, save the best model!
135
+ 2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 81, ep_reward: 97.0, ep_step: 97
136
+ 2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 82, ep_reward: 89.0, ep_step: 89
137
+ 2023-05-16 11:45:49 - SimpleLog - INFO: - episode: 83, ep_reward: 200.0, ep_step: 200
138
+ 2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 84, ep_reward: 142.0, ep_step: 142
139
+ 2023-05-16 11:45:50 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 153.000
140
+ 2023-05-16 11:45:50 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 153.000, save the best model!
141
+ 2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 85, ep_reward: 114.0, ep_step: 114
142
+ 2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 86, ep_reward: 162.0, ep_step: 162
143
+ 2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 87, ep_reward: 200.0, ep_step: 200
144
+ 2023-05-16 11:45:51 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 200.000
145
+ 2023-05-16 11:45:51 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
146
+ 2023-05-16 11:45:52 - SimpleLog - INFO: - episode: 88, ep_reward: 200.0, ep_step: 200
147
+ 2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200
148
+ 2023-05-16 11:45:53 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 200.000
149
+ 2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 90, ep_reward: 200.0, ep_step: 200
150
+ 2023-05-16 11:45:54 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200
151
+ 2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200
152
+ 2023-05-16 11:45:55 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 200.000
153
+ 2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200
154
+ 2023-05-16 11:45:56 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200
155
+ 2023-05-16 11:45:57 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 200.000
156
+ 2023-05-16 11:45:57 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200
157
+ 2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200
158
+ 2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200
159
+ 2023-05-16 11:45:58 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 200.000
160
+ 2023-05-16 11:45:59 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200
161
+ 2023-05-16 11:46:00 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200
162
+ 2023-05-16 11:46:00 - SimpleLog - INFO: - Finish training! total time consumed: 20.03s
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500 ADDED
Binary file (545 kB). View file
 
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000 ADDED
Binary file (545 kB). View file