johnjim0816 commited on
Commit
54d82df
β€’
1 Parent(s): 3a4b440

update CartPole-v1 NoisyDQN

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. CartPole-v1/Test_CartPole-v1_NoisyDQN_20230318-162630/config.yaml +0 -29
  2. CartPole-v1/Test_CartPole-v1_NoisyDQN_20230318-162630/logs/log.txt +0 -50
  3. CartPole-v1/Test_CartPole-v1_NoisyDQN_20230318-162630/results/learning_curve.png +0 -0
  4. CartPole-v1/Test_CartPole-v1_NoisyDQN_mp_20230413-221012/logs/log.txt +0 -54
  5. CartPole-v1/Test_CartPole-v1_NoisyDQN_mp_20230413-221012/results/learning_curve.png +0 -0
  6. CartPole-v1/Test_CartPole-v1_NoisyDQN_ray_20230414-101926/config.yaml +0 -36
  7. CartPole-v1/Test_CartPole-v1_NoisyDQN_ray_20230414-101926/logs/log.txt +0 -54
  8. CartPole-v1/Test_CartPole-v1_NoisyDQN_ray_20230414-101926/results/learning_curve.png +0 -0
  9. CartPole-v1/Test_CartPole-v1_REINFORCE_20221203-143430/results/res.csv +0 -11
  10. CartPole-v1/Test_CartPole-v1_RainbowDQN_20230114-222446/results/res.csv +0 -11
  11. CartPole-v1/Test_CartPole-v1_SAC_D_20230305-113238/results/res.csv +0 -11
  12. CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/config.yaml +0 -44
  13. CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/logs/log.txt +0 -60
  14. CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/models/checkpoint.pt +0 -3
  15. CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/results/learning_curve.png +0 -0
  16. CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/results/res.csv +0 -21
  17. CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/config.yaml +0 -29
  18. CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/logs/log.txt +0 -153
  19. CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/models/checkpoint.pt +0 -3
  20. CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/results/learning_curve.png +0 -0
  21. CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/results/res.csv +0 -101
  22. CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/logs/log.txt +0 -44
  23. CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/models/checkpoint.pt +0 -3
  24. CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/results/learning_curve.png +0 -0
  25. CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/results/res.csv +0 -202
  26. CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/logs/log.txt +0 -44
  27. CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/models/checkpoint.pt +0 -3
  28. CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/results/learning_curve.png +0 -0
  29. CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/results/res.csv +0 -221
  30. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/config.yaml +0 -0
  31. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/logs/log.txt +0 -0
  32. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/models/checkpoint.pt +0 -0
  33. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/results/learning_curve.png +0 -0
  34. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/results/res.csv +0 -0
  35. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/config.yaml +0 -0
  36. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/logs/log.txt +0 -0
  37. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/models/checkpoint.pt +0 -0
  38. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/results/learning_curve.png +0 -0
  39. {Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/results/res.csv +0 -0
  40. ClassControl/CartPole-v1/.DS_Store +0 -0
  41. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/config.yaml +0 -0
  42. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/logs/log.txt +0 -0
  43. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/results/learning_curve.png +0 -0
  44. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/results/res.csv +0 -0
  45. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/traj/traj.pkl +0 -0
  46. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/config.yaml +0 -0
  47. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/logs/log.txt +0 -0
  48. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/models/actor_checkpoint.pt +0 -0
  49. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/models/critic_checkpoint.pt +0 -0
  50. {CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/results/learning_curve.png +0 -0
CartPole-v1/Test_CartPole-v1_NoisyDQN_20230318-162630/config.yaml DELETED
@@ -1,29 +0,0 @@
1
- general_cfg:
2
- algo_name: NoisyDQN
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_NoisyDQN_20230318-162139
9
- max_steps: 200
10
- mode: test
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 100
18
- wrapper: null
19
- algo_cfg:
20
- batch_size: 64
21
- buffer_size: 100000
22
- epsilon_decay: 500
23
- epsilon_end: 0.01
24
- epsilon_start: 0.95
25
- gamma: 0.95
26
- hidden_dim: 256
27
- lr: 0.0001
28
- target_update: 4
29
- tau: 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_NoisyDQN_20230318-162630/logs/log.txt DELETED
@@ -1,50 +0,0 @@
1
- 2023-03-18 16:26:30 - r - INFO: - Hyperparameters:
2
- 2023-03-18 16:26:30 - r - INFO: - ================================================================================
3
- 2023-03-18 16:26:30 - r - INFO: - Name Value Type
4
- 2023-03-18 16:26:30 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-03-18 16:26:30 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-03-18 16:26:30 - r - INFO: - wrapper None <class 'str'>
7
- 2023-03-18 16:26:30 - r - INFO: - render 0 <class 'bool'>
8
- 2023-03-18 16:26:30 - r - INFO: - algo_name NoisyDQN <class 'str'>
9
- 2023-03-18 16:26:30 - r - INFO: - mode test <class 'str'>
10
- 2023-03-18 16:26:30 - r - INFO: - seed 1 <class 'int'>
11
- 2023-03-18 16:26:30 - r - INFO: - device cpu <class 'str'>
12
- 2023-03-18 16:26:30 - r - INFO: - train_eps 100 <class 'int'>
13
- 2023-03-18 16:26:30 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-03-18 16:26:30 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-03-18 16:26:30 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-03-18 16:26:30 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-03-18 16:26:30 - r - INFO: - load_checkpoint 1 <class 'bool'>
18
- 2023-03-18 16:26:30 - r - INFO: - load_path Train_CartPole-v1_NoisyDQN_20230318-162139 <class 'str'>
19
- 2023-03-18 16:26:30 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-03-18 16:26:30 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-03-18 16:26:30 - r - INFO: - epsilon_start 0.95 <class 'float'>
22
- 2023-03-18 16:26:30 - r - INFO: - tau 1.0 <class 'float'>
23
- 2023-03-18 16:26:30 - r - INFO: - epsilon_end 0.01 <class 'float'>
24
- 2023-03-18 16:26:30 - r - INFO: - epsilon_decay 500 <class 'int'>
25
- 2023-03-18 16:26:30 - r - INFO: - hidden_dim 256 <class 'int'>
26
- 2023-03-18 16:26:30 - r - INFO: - gamma 0.95 <class 'float'>
27
- 2023-03-18 16:26:30 - r - INFO: - lr 0.0001 <class 'float'>
28
- 2023-03-18 16:26:30 - r - INFO: - buffer_size 100000 <class 'int'>
29
- 2023-03-18 16:26:30 - r - INFO: - batch_size 64 <class 'int'>
30
- 2023-03-18 16:26:30 - r - INFO: - target_update 4 <class 'int'>
31
- 2023-03-18 16:26:30 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_NoisyDQN_20230318-162630 <class 'str'>
32
- 2023-03-18 16:26:30 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_NoisyDQN_20230318-162630/results <class 'str'>
33
- 2023-03-18 16:26:30 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_NoisyDQN_20230318-162630/logs <class 'str'>
34
- 2023-03-18 16:26:30 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_NoisyDQN_20230318-162630/traj <class 'str'>
35
- 2023-03-18 16:26:30 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_NoisyDQN_20230318-162630/tb_logs <class 'str'>
36
- 2023-03-18 16:26:30 - r - INFO: - ================================================================================
37
- 2023-03-18 16:26:30 - r - INFO: - n_states: 4, n_actions: 2
38
- 2023-03-18 16:26:30 - r - INFO: - Start testing!
39
- 2023-03-18 16:26:30 - r - INFO: - Env: CartPole-v1, Algorithm: NoisyDQN, Device: cpu
40
- 2023-03-18 16:26:30 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
41
- 2023-03-18 16:26:30 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
42
- 2023-03-18 16:26:30 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
43
- 2023-03-18 16:26:30 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
44
- 2023-03-18 16:26:30 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
45
- 2023-03-18 16:26:30 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
46
- 2023-03-18 16:26:30 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
47
- 2023-03-18 16:26:30 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
48
- 2023-03-18 16:26:30 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
49
- 2023-03-18 16:26:30 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
50
- 2023-03-18 16:26:30 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_NoisyDQN_20230318-162630/results/learning_curve.png DELETED
Binary file (26.1 kB)
 
CartPole-v1/Test_CartPole-v1_NoisyDQN_mp_20230413-221012/logs/log.txt DELETED
@@ -1,54 +0,0 @@
1
- 2023-04-13 22:10:12 - r - INFO: - Hyperparameters:
2
- 2023-04-13 22:10:12 - r - INFO: - ================================================================================
3
- 2023-04-13 22:10:12 - r - INFO: - Name Value Type
4
- 2023-04-13 22:10:12 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-13 22:10:12 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-13 22:10:12 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-13 22:10:12 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-13 22:10:12 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-13 22:10:12 - r - INFO: - algo_name NoisyDQN <class 'str'>
10
- 2023-04-13 22:10:12 - r - INFO: - mode test <class 'str'>
11
- 2023-04-13 22:10:12 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-13 22:10:12 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-13 22:10:12 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-13 22:10:12 - r - INFO: - train_eps 200 <class 'int'>
15
- 2023-04-13 22:10:12 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-13 22:10:12 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-13 22:10:12 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-13 22:10:12 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-13 22:10:12 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-13 22:10:12 - r - INFO: - load_path Train_CartPole-v1_NoisyDQN_mp_20230413-220639 <class 'str'>
21
- 2023-04-13 22:10:12 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-13 22:10:12 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-13 22:10:12 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-13 22:10:12 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-13 22:10:12 - r - INFO: - tau 1.0 <class 'float'>
26
- 2023-04-13 22:10:12 - r - INFO: - epsilon_end 0.01 <class 'float'>
27
- 2023-04-13 22:10:12 - r - INFO: - epsilon_decay 500 <class 'int'>
28
- 2023-04-13 22:10:12 - r - INFO: - hidden_dim 256 <class 'int'>
29
- 2023-04-13 22:10:12 - r - INFO: - gamma 0.95 <class 'float'>
30
- 2023-04-13 22:10:12 - r - INFO: - lr 0.0001 <class 'float'>
31
- 2023-04-13 22:10:12 - r - INFO: - buffer_size 100000 <class 'int'>
32
- 2023-04-13 22:10:12 - r - INFO: - batch_size 64 <class 'int'>
33
- 2023-04-13 22:10:12 - r - INFO: - target_update 4 <class 'int'>
34
- 2023-04-13 22:10:12 - r - INFO: - id CartPole-v1 <class 'str'>
35
- 2023-04-13 22:10:12 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230413-221012 <class 'str'>
36
- 2023-04-13 22:10:12 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230413-221012/results <class 'str'>
37
- 2023-04-13 22:10:12 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230413-221012/logs <class 'str'>
38
- 2023-04-13 22:10:12 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230413-221012/traj <class 'str'>
39
- 2023-04-13 22:10:12 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230413-221012/videos <class 'str'>
40
- 2023-04-13 22:10:12 - r - INFO: - ================================================================================
41
- 2023-04-13 22:10:12 - r - INFO: - n_states: 4, n_actions: 2
42
- 2023-04-13 22:10:12 - r - INFO: - Start testing!
43
- 2023-04-13 22:10:12 - r - INFO: - Env: gym, Algorithm: NoisyDQN, Device: cpu
44
- 2023-04-13 22:10:12 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
45
- 2023-04-13 22:10:12 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
46
- 2023-04-13 22:10:12 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
47
- 2023-04-13 22:10:12 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
48
- 2023-04-13 22:10:12 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
49
- 2023-04-13 22:10:12 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
50
- 2023-04-13 22:10:12 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
51
- 2023-04-13 22:10:12 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
52
- 2023-04-13 22:10:12 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
53
- 2023-04-13 22:10:12 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
54
- 2023-04-13 22:10:12 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_NoisyDQN_mp_20230413-221012/results/learning_curve.png DELETED
Binary file (27.7 kB)
 
CartPole-v1/Test_CartPole-v1_NoisyDQN_ray_20230414-101926/config.yaml DELETED
@@ -1,36 +0,0 @@
1
- general_cfg:
2
- algo_name: NoisyDQN
3
- device: cpu
4
- env_name: gym
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_gym_NoisyDQN_20230414-101351
9
- max_steps: 200
10
- mode: test
11
- mp_backend: ray
12
- n_workers: 1
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 200
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.95
29
- hidden_dim: 256
30
- lr: 0.0001
31
- target_update: 4
32
- tau: 1.0
33
- env_cfg:
34
- id: CartPole-v1
35
- new_step_api: true
36
- render_mode: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_NoisyDQN_ray_20230414-101926/logs/log.txt DELETED
@@ -1,54 +0,0 @@
1
- 2023-04-14 10:19:26 - r - INFO: - Hyperparameters:
2
- 2023-04-14 10:19:26 - r - INFO: - ================================================================================
3
- 2023-04-14 10:19:26 - r - INFO: - Name Value Type
4
- 2023-04-14 10:19:26 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-14 10:19:26 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-14 10:19:26 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-14 10:19:26 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-14 10:19:26 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-14 10:19:26 - r - INFO: - algo_name NoisyDQN <class 'str'>
10
- 2023-04-14 10:19:26 - r - INFO: - mode test <class 'str'>
11
- 2023-04-14 10:19:26 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-14 10:19:26 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-14 10:19:26 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-14 10:19:26 - r - INFO: - train_eps 200 <class 'int'>
15
- 2023-04-14 10:19:26 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-14 10:19:26 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-14 10:19:26 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-14 10:19:26 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-14 10:19:26 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-14 10:19:26 - r - INFO: - load_path Train_gym_NoisyDQN_20230414-101351 <class 'str'>
21
- 2023-04-14 10:19:26 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-14 10:19:26 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-14 10:19:26 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-14 10:19:26 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-14 10:19:26 - r - INFO: - tau 1.0 <class 'float'>
26
- 2023-04-14 10:19:26 - r - INFO: - epsilon_end 0.01 <class 'float'>
27
- 2023-04-14 10:19:26 - r - INFO: - epsilon_decay 500 <class 'int'>
28
- 2023-04-14 10:19:26 - r - INFO: - hidden_dim 256 <class 'int'>
29
- 2023-04-14 10:19:26 - r - INFO: - gamma 0.95 <class 'float'>
30
- 2023-04-14 10:19:26 - r - INFO: - lr 0.0001 <class 'float'>
31
- 2023-04-14 10:19:26 - r - INFO: - buffer_size 100000 <class 'int'>
32
- 2023-04-14 10:19:26 - r - INFO: - batch_size 64 <class 'int'>
33
- 2023-04-14 10:19:26 - r - INFO: - target_update 4 <class 'int'>
34
- 2023-04-14 10:19:26 - r - INFO: - id CartPole-v1 <class 'str'>
35
- 2023-04-14 10:19:26 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230414-101926 <class 'str'>
36
- 2023-04-14 10:19:26 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230414-101926/results <class 'str'>
37
- 2023-04-14 10:19:26 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230414-101926/logs <class 'str'>
38
- 2023-04-14 10:19:26 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230414-101926/traj <class 'str'>
39
- 2023-04-14 10:19:26 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_NoisyDQN_20230414-101926/videos <class 'str'>
40
- 2023-04-14 10:19:26 - r - INFO: - ================================================================================
41
- 2023-04-14 10:19:26 - r - INFO: - n_states: 4, n_actions: 2
42
- 2023-04-14 10:19:26 - r - INFO: - Start testing!
43
- 2023-04-14 10:19:26 - r - INFO: - Env: gym, Algorithm: NoisyDQN, Device: cpu
44
- 2023-04-14 10:19:26 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
45
- 2023-04-14 10:19:26 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
46
- 2023-04-14 10:19:26 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
47
- 2023-04-14 10:19:26 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
48
- 2023-04-14 10:19:26 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
49
- 2023-04-14 10:19:26 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
50
- 2023-04-14 10:19:26 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
51
- 2023-04-14 10:19:26 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
52
- 2023-04-14 10:19:27 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
53
- 2023-04-14 10:19:27 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
54
- 2023-04-14 10:19:27 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_NoisyDQN_ray_20230414-101926/results/learning_curve.png DELETED
Binary file (27.7 kB)
 
CartPole-v1/Test_CartPole-v1_REINFORCE_20221203-143430/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_RainbowDQN_20230114-222446/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_SAC_D_20230305-113238/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/config.yaml DELETED
@@ -1,44 +0,0 @@
1
- general_cfg:
2
- algo_name: DQN
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_mp_DQN_20230319-215340
9
- max_steps: 200
10
- mode: test
11
- n_workers: 1
12
- new_step_api: true
13
- render: false
14
- save_fig: true
15
- seed: 1
16
- show_fig: false
17
- test_eps: 20
18
- train_eps: 300
19
- wrapper: null
20
- algo_cfg:
21
- batch_size: 64
22
- buffer_size: 100000
23
- epsilon_decay: 500
24
- epsilon_end: 0.01
25
- epsilon_start: 0.95
26
- gamma: 0.95
27
- lr: 0.0001
28
- target_update: 4
29
- value_layers:
30
- - activation: relu
31
- layer_dim:
32
- - n_states
33
- - 256
34
- layer_type: linear
35
- - activation: relu
36
- layer_dim:
37
- - 256
38
- - 256
39
- layer_type: linear
40
- - activation: none
41
- layer_dim:
42
- - 256
43
- - n_actions
44
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/logs/log.txt DELETED
@@ -1,60 +0,0 @@
1
- 2023-03-19 21:58:06 - r - INFO: - Hyperparameters:
2
- 2023-03-19 21:58:06 - r - INFO: - ================================================================================
3
- 2023-03-19 21:58:06 - r - INFO: - Name Value Type
4
- 2023-03-19 21:58:06 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-03-19 21:58:06 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-03-19 21:58:06 - r - INFO: - wrapper None <class 'str'>
7
- 2023-03-19 21:58:06 - r - INFO: - render 0 <class 'bool'>
8
- 2023-03-19 21:58:06 - r - INFO: - algo_name DQN <class 'str'>
9
- 2023-03-19 21:58:06 - r - INFO: - mode test <class 'str'>
10
- 2023-03-19 21:58:06 - r - INFO: - seed 1 <class 'int'>
11
- 2023-03-19 21:58:06 - r - INFO: - device cpu <class 'str'>
12
- 2023-03-19 21:58:06 - r - INFO: - train_eps 300 <class 'int'>
13
- 2023-03-19 21:58:06 - r - INFO: - test_eps 20 <class 'int'>
14
- 2023-03-19 21:58:06 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-03-19 21:58:06 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-03-19 21:58:06 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-03-19 21:58:06 - r - INFO: - load_checkpoint 1 <class 'bool'>
18
- 2023-03-19 21:58:06 - r - INFO: - load_path Train_CartPole-v1_mp_DQN_20230319-215340 <class 'str'>
19
- 2023-03-19 21:58:06 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-03-19 21:58:06 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-03-19 21:58:06 - r - INFO: - n_workers 1 <class 'int'>
22
- 2023-03-19 21:58:06 - r - INFO: - epsilon_start 0.95 <class 'float'>
23
- 2023-03-19 21:58:06 - r - INFO: - epsilon_end 0.01 <class 'float'>
24
- 2023-03-19 21:58:06 - r - INFO: - epsilon_decay 500 <class 'int'>
25
- 2023-03-19 21:58:06 - r - INFO: - gamma 0.95 <class 'float'>
26
- 2023-03-19 21:58:06 - r - INFO: - lr 0.0001 <class 'float'>
27
- 2023-03-19 21:58:06 - r - INFO: - buffer_size 100000 <class 'int'>
28
- 2023-03-19 21:58:06 - r - INFO: - batch_size 64 <class 'int'>
29
- 2023-03-19 21:58:06 - r - INFO: - target_update 4 <class 'int'>
30
- 2023-03-19 21:58:06 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
31
- 2023-03-19 21:58:06 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DQN_20230319-215806 <class 'str'>
32
- 2023-03-19 21:58:06 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DQN_20230319-215806/results <class 'str'>
33
- 2023-03-19 21:58:06 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DQN_20230319-215806/logs <class 'str'>
34
- 2023-03-19 21:58:06 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DQN_20230319-215806/traj <class 'str'>
35
- 2023-03-19 21:58:06 - r - INFO: - tb_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DQN_20230319-215806/tb_logs <class 'str'>
36
- 2023-03-19 21:58:06 - r - INFO: - ================================================================================
37
- 2023-03-19 21:58:06 - r - INFO: - n_states: 4, n_actions: 2
38
- 2023-03-19 21:58:06 - r - INFO: - Start testing!
39
- 2023-03-19 21:58:06 - r - INFO: - Env: CartPole-v1, Algorithm: DQN, Device: cpu
40
- 2023-03-19 21:58:07 - r - INFO: - Episode: 1/20, Reward: 200.000, Step: 200
41
- 2023-03-19 21:58:07 - r - INFO: - Episode: 2/20, Reward: 200.000, Step: 200
42
- 2023-03-19 21:58:07 - r - INFO: - Episode: 3/20, Reward: 200.000, Step: 200
43
- 2023-03-19 21:58:07 - r - INFO: - Episode: 4/20, Reward: 200.000, Step: 200
44
- 2023-03-19 21:58:07 - r - INFO: - Episode: 5/20, Reward: 200.000, Step: 200
45
- 2023-03-19 21:58:07 - r - INFO: - Episode: 6/20, Reward: 200.000, Step: 200
46
- 2023-03-19 21:58:07 - r - INFO: - Episode: 7/20, Reward: 200.000, Step: 200
47
- 2023-03-19 21:58:07 - r - INFO: - Episode: 8/20, Reward: 200.000, Step: 200
48
- 2023-03-19 21:58:07 - r - INFO: - Episode: 9/20, Reward: 200.000, Step: 200
49
- 2023-03-19 21:58:07 - r - INFO: - Episode: 10/20, Reward: 200.000, Step: 200
50
- 2023-03-19 21:58:07 - r - INFO: - Episode: 11/20, Reward: 200.000, Step: 200
51
- 2023-03-19 21:58:07 - r - INFO: - Episode: 12/20, Reward: 200.000, Step: 200
52
- 2023-03-19 21:58:07 - r - INFO: - Episode: 13/20, Reward: 200.000, Step: 200
53
- 2023-03-19 21:58:07 - r - INFO: - Episode: 14/20, Reward: 200.000, Step: 200
54
- 2023-03-19 21:58:07 - r - INFO: - Episode: 15/20, Reward: 200.000, Step: 200
55
- 2023-03-19 21:58:07 - r - INFO: - Episode: 16/20, Reward: 200.000, Step: 200
56
- 2023-03-19 21:58:07 - r - INFO: - Episode: 17/20, Reward: 200.000, Step: 200
57
- 2023-03-19 21:58:07 - r - INFO: - Episode: 18/20, Reward: 200.000, Step: 200
58
- 2023-03-19 21:58:07 - r - INFO: - Episode: 19/20, Reward: 200.000, Step: 200
59
- 2023-03-19 21:58:07 - r - INFO: - Episode: 20/20, Reward: 200.000, Step: 200
60
- 2023-03-19 21:58:07 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:308eccb972f5142d25c00579d63ecd9ae50df94fcdd9bb172d00217e9db3dd12
3
- size 272407
 
 
 
 
CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/results/learning_curve.png DELETED
Binary file (28.1 kB)
 
CartPole-v1/Test_CartPole-v1_mp_DQN_20230319-215806/results/res.csv DELETED
@@ -1,21 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
12
- 10,200.0,200
13
- 11,200.0,200
14
- 12,200.0,200
15
- 13,200.0,200
16
- 14,200.0,200
17
- 15,200.0,200
18
- 16,200.0,200
19
- 17,200.0,200
20
- 18,200.0,200
21
- 19,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/config.yaml DELETED
@@ -1,29 +0,0 @@
1
- general_cfg:
2
- algo_name: NoisyDQN
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_NoisyDQN
9
- max_steps: 200
10
- mode: train
11
- new_step_api: true
12
- render: false
13
- save_fig: true
14
- seed: 1
15
- show_fig: false
16
- test_eps: 10
17
- train_eps: 100
18
- wrapper: null
19
- algo_cfg:
20
- batch_size: 64
21
- buffer_size: 100000
22
- epsilon_decay: 500
23
- epsilon_end: 0.01
24
- epsilon_start: 0.95
25
- gamma: 0.95
26
- hidden_dim: 256
27
- lr: 0.0001
28
- target_update: 4
29
- tau: 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/logs/log.txt DELETED
@@ -1,153 +0,0 @@
1
- 2023-03-18 16:21:39 - r - INFO: - Hyperparameters:
2
- 2023-03-18 16:21:39 - r - INFO: - ================================================================================
3
- 2023-03-18 16:21:39 - r - INFO: - Name Value Type
4
- 2023-03-18 16:21:39 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-03-18 16:21:39 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-03-18 16:21:39 - r - INFO: - wrapper None <class 'str'>
7
- 2023-03-18 16:21:39 - r - INFO: - render 0 <class 'bool'>
8
- 2023-03-18 16:21:39 - r - INFO: - algo_name NoisyDQN <class 'str'>
9
- 2023-03-18 16:21:39 - r - INFO: - mode train <class 'str'>
10
- 2023-03-18 16:21:39 - r - INFO: - seed 1 <class 'int'>
11
- 2023-03-18 16:21:39 - r - INFO: - device cpu <class 'str'>
12
- 2023-03-18 16:21:39 - r - INFO: - train_eps 100 <class 'int'>
13
- 2023-03-18 16:21:39 - r - INFO: - test_eps 10 <class 'int'>
14
- 2023-03-18 16:21:39 - r - INFO: - eval_eps 10 <class 'int'>
15
- 2023-03-18 16:21:39 - r - INFO: - eval_per_episode 5 <class 'int'>
16
- 2023-03-18 16:21:39 - r - INFO: - max_steps 200 <class 'int'>
17
- 2023-03-18 16:21:39 - r - INFO: - load_checkpoint 0 <class 'bool'>
18
- 2023-03-18 16:21:39 - r - INFO: - load_path Train_CartPole-v1_NoisyDQN <class 'str'>
19
- 2023-03-18 16:21:39 - r - INFO: - show_fig 0 <class 'bool'>
20
- 2023-03-18 16:21:39 - r - INFO: - save_fig 1 <class 'bool'>
21
- 2023-03-18 16:21:39 - r - INFO: - epsilon_start 0.95 <class 'float'>
22
- 2023-03-18 16:21:39 - r - INFO: - tau 1.0 <class 'float'>
23
- 2023-03-18 16:21:39 - r - INFO: - epsilon_end 0.01 <class 'float'>
24
- 2023-03-18 16:21:39 - r - INFO: - epsilon_decay 500 <class 'int'>
25
- 2023-03-18 16:21:39 - r - INFO: - hidden_dim 256 <class 'int'>
26
- 2023-03-18 16:21:39 - r - INFO: - gamma 0.95 <class 'float'>
27
- 2023-03-18 16:21:39 - r - INFO: - lr 0.0001 <class 'float'>
28
- 2023-03-18 16:21:39 - r - INFO: - buffer_size 100000 <class 'int'>
29
- 2023-03-18 16:21:39 - r - INFO: - batch_size 64 <class 'int'>
30
- 2023-03-18 16:21:39 - r - INFO: - target_update 4 <class 'int'>
31
- 2023-03-18 16:21:39 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_NoisyDQN_20230318-162139 <class 'str'>
32
- 2023-03-18 16:21:39 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_NoisyDQN_20230318-162139/results <class 'str'>
33
- 2023-03-18 16:21:39 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_NoisyDQN_20230318-162139/logs <class 'str'>
34
- 2023-03-18 16:21:39 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_NoisyDQN_20230318-162139/traj <class 'str'>
35
- 2023-03-18 16:21:39 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_NoisyDQN_20230318-162139/tb_logs <class 'str'>
36
- 2023-03-18 16:21:39 - r - INFO: - ================================================================================
37
- 2023-03-18 16:21:39 - r - INFO: - n_states: 4, n_actions: 2
38
- 2023-03-18 16:21:39 - r - INFO: - Start training!
39
- 2023-03-18 16:21:39 - r - INFO: - Env: CartPole-v1, Algorithm: NoisyDQN, Device: cpu
40
- 2023-03-18 16:21:39 - r - INFO: - Episode: 1/100, Reward: 16.000, Step: 16
41
- 2023-03-18 16:21:39 - r - INFO: - Episode: 2/100, Reward: 16.000, Step: 16
42
- 2023-03-18 16:21:39 - r - INFO: - Episode: 3/100, Reward: 18.000, Step: 18
43
- 2023-03-18 16:21:39 - r - INFO: - Episode: 4/100, Reward: 14.000, Step: 14
44
- 2023-03-18 16:21:39 - r - INFO: - Episode: 5/100, Reward: 22.000, Step: 22
45
- 2023-03-18 16:21:39 - r - INFO: - Current episode 5 has the best eval reward: 9.300
46
- 2023-03-18 16:21:39 - r - INFO: - Episode: 6/100, Reward: 27.000, Step: 27
47
- 2023-03-18 16:21:40 - r - INFO: - Episode: 7/100, Reward: 9.000, Step: 9
48
- 2023-03-18 16:21:40 - r - INFO: - Episode: 8/100, Reward: 13.000, Step: 13
49
- 2023-03-18 16:21:40 - r - INFO: - Episode: 9/100, Reward: 17.000, Step: 17
50
- 2023-03-18 16:21:40 - r - INFO: - Episode: 10/100, Reward: 37.000, Step: 37
51
- 2023-03-18 16:21:40 - r - INFO: - Current episode 10 has the best eval reward: 9.500
52
- 2023-03-18 16:21:40 - r - INFO: - Episode: 11/100, Reward: 15.000, Step: 15
53
- 2023-03-18 16:21:40 - r - INFO: - Episode: 12/100, Reward: 22.000, Step: 22
54
- 2023-03-18 16:21:40 - r - INFO: - Episode: 13/100, Reward: 9.000, Step: 9
55
- 2023-03-18 16:21:40 - r - INFO: - Episode: 14/100, Reward: 14.000, Step: 14
56
- 2023-03-18 16:21:40 - r - INFO: - Episode: 15/100, Reward: 12.000, Step: 12
57
- 2023-03-18 16:21:40 - r - INFO: - Current episode 15 has the best eval reward: 9.700
58
- 2023-03-18 16:21:40 - r - INFO: - Episode: 16/100, Reward: 16.000, Step: 16
59
- 2023-03-18 16:21:40 - r - INFO: - Episode: 17/100, Reward: 16.000, Step: 16
60
- 2023-03-18 16:21:40 - r - INFO: - Episode: 18/100, Reward: 14.000, Step: 14
61
- 2023-03-18 16:21:40 - r - INFO: - Episode: 19/100, Reward: 11.000, Step: 11
62
- 2023-03-18 16:21:40 - r - INFO: - Episode: 20/100, Reward: 13.000, Step: 13
63
- 2023-03-18 16:21:40 - r - INFO: - Current episode 20 has the best eval reward: 9.700
64
- 2023-03-18 16:21:40 - r - INFO: - Episode: 21/100, Reward: 13.000, Step: 13
65
- 2023-03-18 16:21:42 - r - INFO: - Episode: 22/100, Reward: 14.000, Step: 14
66
- 2023-03-18 16:21:42 - r - INFO: - Episode: 23/100, Reward: 14.000, Step: 14
67
- 2023-03-18 16:21:42 - r - INFO: - Episode: 24/100, Reward: 37.000, Step: 37
68
- 2023-03-18 16:21:42 - r - INFO: - Episode: 25/100, Reward: 12.000, Step: 12
69
- 2023-03-18 16:21:42 - r - INFO: - Episode: 26/100, Reward: 18.000, Step: 18
70
- 2023-03-18 16:21:42 - r - INFO: - Episode: 27/100, Reward: 13.000, Step: 13
71
- 2023-03-18 16:21:42 - r - INFO: - Episode: 28/100, Reward: 20.000, Step: 20
72
- 2023-03-18 16:21:43 - r - INFO: - Episode: 29/100, Reward: 17.000, Step: 17
73
- 2023-03-18 16:21:43 - r - INFO: - Episode: 30/100, Reward: 10.000, Step: 10
74
- 2023-03-18 16:21:43 - r - INFO: - Current episode 30 has the best eval reward: 13.700
75
- 2023-03-18 16:21:43 - r - INFO: - Episode: 31/100, Reward: 10.000, Step: 10
76
- 2023-03-18 16:21:43 - r - INFO: - Episode: 32/100, Reward: 12.000, Step: 12
77
- 2023-03-18 16:21:43 - r - INFO: - Episode: 33/100, Reward: 11.000, Step: 11
78
- 2023-03-18 16:21:43 - r - INFO: - Episode: 34/100, Reward: 12.000, Step: 12
79
- 2023-03-18 16:21:43 - r - INFO: - Episode: 35/100, Reward: 17.000, Step: 17
80
- 2023-03-18 16:21:43 - r - INFO: - Current episode 35 has the best eval reward: 32.500
81
- 2023-03-18 16:21:43 - r - INFO: - Episode: 36/100, Reward: 17.000, Step: 17
82
- 2023-03-18 16:21:43 - r - INFO: - Episode: 37/100, Reward: 17.000, Step: 17
83
- 2023-03-18 16:21:43 - r - INFO: - Episode: 38/100, Reward: 23.000, Step: 23
84
- 2023-03-18 16:21:43 - r - INFO: - Episode: 39/100, Reward: 35.000, Step: 35
85
- 2023-03-18 16:21:43 - r - INFO: - Episode: 40/100, Reward: 46.000, Step: 46
86
- 2023-03-18 16:21:44 - r - INFO: - Episode: 41/100, Reward: 10.000, Step: 10
87
- 2023-03-18 16:21:44 - r - INFO: - Episode: 42/100, Reward: 13.000, Step: 13
88
- 2023-03-18 16:21:44 - r - INFO: - Episode: 43/100, Reward: 27.000, Step: 27
89
- 2023-03-18 16:21:44 - r - INFO: - Episode: 44/100, Reward: 43.000, Step: 43
90
- 2023-03-18 16:21:44 - r - INFO: - Episode: 45/100, Reward: 23.000, Step: 23
91
- 2023-03-18 16:21:44 - r - INFO: - Episode: 46/100, Reward: 31.000, Step: 31
92
- 2023-03-18 16:21:44 - r - INFO: - Episode: 47/100, Reward: 36.000, Step: 36
93
- 2023-03-18 16:21:44 - r - INFO: - Episode: 48/100, Reward: 27.000, Step: 27
94
- 2023-03-18 16:21:44 - r - INFO: - Episode: 49/100, Reward: 27.000, Step: 27
95
- 2023-03-18 16:21:44 - r - INFO: - Episode: 50/100, Reward: 40.000, Step: 40
96
- 2023-03-18 16:21:44 - r - INFO: - Current episode 50 has the best eval reward: 36.900
97
- 2023-03-18 16:21:45 - r - INFO: - Episode: 51/100, Reward: 47.000, Step: 47
98
- 2023-03-18 16:21:45 - r - INFO: - Episode: 52/100, Reward: 60.000, Step: 60
99
- 2023-03-18 16:21:45 - r - INFO: - Episode: 53/100, Reward: 104.000, Step: 104
100
- 2023-03-18 16:21:45 - r - INFO: - Episode: 54/100, Reward: 70.000, Step: 70
101
- 2023-03-18 16:21:45 - r - INFO: - Episode: 55/100, Reward: 65.000, Step: 65
102
- 2023-03-18 16:21:46 - r - INFO: - Episode: 56/100, Reward: 96.000, Step: 96
103
- 2023-03-18 16:21:46 - r - INFO: - Episode: 57/100, Reward: 34.000, Step: 34
104
- 2023-03-18 16:21:46 - r - INFO: - Episode: 58/100, Reward: 30.000, Step: 30
105
- 2023-03-18 16:21:46 - r - INFO: - Episode: 59/100, Reward: 63.000, Step: 63
106
- 2023-03-18 16:21:46 - r - INFO: - Episode: 60/100, Reward: 32.000, Step: 32
107
- 2023-03-18 16:21:46 - r - INFO: - Current episode 60 has the best eval reward: 104.900
108
- 2023-03-18 16:21:47 - r - INFO: - Episode: 61/100, Reward: 36.000, Step: 36
109
- 2023-03-18 16:21:47 - r - INFO: - Episode: 62/100, Reward: 26.000, Step: 26
110
- 2023-03-18 16:21:47 - r - INFO: - Episode: 63/100, Reward: 29.000, Step: 29
111
- 2023-03-18 16:21:47 - r - INFO: - Episode: 64/100, Reward: 58.000, Step: 58
112
- 2023-03-18 16:21:47 - r - INFO: - Episode: 65/100, Reward: 123.000, Step: 123
113
- 2023-03-18 16:21:47 - r - INFO: - Episode: 66/100, Reward: 74.000, Step: 74
114
- 2023-03-18 16:21:48 - r - INFO: - Episode: 67/100, Reward: 56.000, Step: 56
115
- 2023-03-18 16:21:48 - r - INFO: - Episode: 68/100, Reward: 76.000, Step: 76
116
- 2023-03-18 16:21:48 - r - INFO: - Episode: 69/100, Reward: 63.000, Step: 63
117
- 2023-03-18 16:21:48 - r - INFO: - Episode: 70/100, Reward: 55.000, Step: 55
118
- 2023-03-18 16:21:48 - r - INFO: - Episode: 71/100, Reward: 76.000, Step: 76
119
- 2023-03-18 16:21:49 - r - INFO: - Episode: 72/100, Reward: 59.000, Step: 59
120
- 2023-03-18 16:21:49 - r - INFO: - Episode: 73/100, Reward: 70.000, Step: 70
121
- 2023-03-18 16:21:49 - r - INFO: - Episode: 74/100, Reward: 98.000, Step: 98
122
- 2023-03-18 16:21:49 - r - INFO: - Episode: 75/100, Reward: 60.000, Step: 60
123
- 2023-03-18 16:21:50 - r - INFO: - Episode: 76/100, Reward: 114.000, Step: 114
124
- 2023-03-18 16:21:50 - r - INFO: - Episode: 77/100, Reward: 200.000, Step: 200
125
- 2023-03-18 16:21:51 - r - INFO: - Episode: 78/100, Reward: 199.000, Step: 199
126
- 2023-03-18 16:21:51 - r - INFO: - Episode: 79/100, Reward: 200.000, Step: 200
127
- 2023-03-18 16:21:52 - r - INFO: - Episode: 80/100, Reward: 200.000, Step: 200
128
- 2023-03-18 16:21:52 - r - INFO: - Current episode 80 has the best eval reward: 200.000
129
- 2023-03-18 16:21:53 - r - INFO: - Episode: 81/100, Reward: 200.000, Step: 200
130
- 2023-03-18 16:21:53 - r - INFO: - Episode: 82/100, Reward: 200.000, Step: 200
131
- 2023-03-18 16:21:54 - r - INFO: - Episode: 83/100, Reward: 200.000, Step: 200
132
- 2023-03-18 16:21:55 - r - INFO: - Episode: 84/100, Reward: 200.000, Step: 200
133
- 2023-03-18 16:21:55 - r - INFO: - Episode: 85/100, Reward: 200.000, Step: 200
134
- 2023-03-18 16:21:56 - r - INFO: - Current episode 85 has the best eval reward: 200.000
135
- 2023-03-18 16:21:56 - r - INFO: - Episode: 86/100, Reward: 200.000, Step: 200
136
- 2023-03-18 16:21:57 - r - INFO: - Episode: 87/100, Reward: 200.000, Step: 200
137
- 2023-03-18 16:21:57 - r - INFO: - Episode: 88/100, Reward: 200.000, Step: 200
138
- 2023-03-18 16:21:58 - r - INFO: - Episode: 89/100, Reward: 200.000, Step: 200
139
- 2023-03-18 16:21:58 - r - INFO: - Episode: 90/100, Reward: 200.000, Step: 200
140
- 2023-03-18 16:21:59 - r - INFO: - Current episode 90 has the best eval reward: 200.000
141
- 2023-03-18 16:21:59 - r - INFO: - Episode: 91/100, Reward: 200.000, Step: 200
142
- 2023-03-18 16:22:00 - r - INFO: - Episode: 92/100, Reward: 200.000, Step: 200
143
- 2023-03-18 16:22:01 - r - INFO: - Episode: 93/100, Reward: 200.000, Step: 200
144
- 2023-03-18 16:22:01 - r - INFO: - Episode: 94/100, Reward: 200.000, Step: 200
145
- 2023-03-18 16:22:02 - r - INFO: - Episode: 95/100, Reward: 200.000, Step: 200
146
- 2023-03-18 16:22:02 - r - INFO: - Current episode 95 has the best eval reward: 200.000
147
- 2023-03-18 16:22:03 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200
148
- 2023-03-18 16:22:03 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200
149
- 2023-03-18 16:22:04 - r - INFO: - Episode: 98/100, Reward: 200.000, Step: 200
150
- 2023-03-18 16:22:04 - r - INFO: - Episode: 99/100, Reward: 200.000, Step: 200
151
- 2023-03-18 16:22:05 - r - INFO: - Episode: 100/100, Reward: 200.000, Step: 200
152
- 2023-03-18 16:22:05 - r - INFO: - Current episode 100 has the best eval reward: 200.000
153
- 2023-03-18 16:22:05 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c018ce117a1834aec6f585e6be780865631d49a8a81fb42ee4db9602b49793f5
3
- size 804667
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/results/learning_curve.png DELETED
Binary file (45.5 kB)
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_20230318-162139/results/res.csv DELETED
@@ -1,101 +0,0 @@
1
- episodes,rewards,steps
2
- 0,16.0,16
3
- 1,16.0,16
4
- 2,18.0,18
5
- 3,14.0,14
6
- 4,22.0,22
7
- 5,27.0,27
8
- 6,9.0,9
9
- 7,13.0,13
10
- 8,17.0,17
11
- 9,37.0,37
12
- 10,15.0,15
13
- 11,22.0,22
14
- 12,9.0,9
15
- 13,14.0,14
16
- 14,12.0,12
17
- 15,16.0,16
18
- 16,16.0,16
19
- 17,14.0,14
20
- 18,11.0,11
21
- 19,13.0,13
22
- 20,13.0,13
23
- 21,14.0,14
24
- 22,14.0,14
25
- 23,37.0,37
26
- 24,12.0,12
27
- 25,18.0,18
28
- 26,13.0,13
29
- 27,20.0,20
30
- 28,17.0,17
31
- 29,10.0,10
32
- 30,10.0,10
33
- 31,12.0,12
34
- 32,11.0,11
35
- 33,12.0,12
36
- 34,17.0,17
37
- 35,17.0,17
38
- 36,17.0,17
39
- 37,23.0,23
40
- 38,35.0,35
41
- 39,46.0,46
42
- 40,10.0,10
43
- 41,13.0,13
44
- 42,27.0,27
45
- 43,43.0,43
46
- 44,23.0,23
47
- 45,31.0,31
48
- 46,36.0,36
49
- 47,27.0,27
50
- 48,27.0,27
51
- 49,40.0,40
52
- 50,47.0,47
53
- 51,60.0,60
54
- 52,104.0,104
55
- 53,70.0,70
56
- 54,65.0,65
57
- 55,96.0,96
58
- 56,34.0,34
59
- 57,30.0,30
60
- 58,63.0,63
61
- 59,32.0,32
62
- 60,36.0,36
63
- 61,26.0,26
64
- 62,29.0,29
65
- 63,58.0,58
66
- 64,123.0,123
67
- 65,74.0,74
68
- 66,56.0,56
69
- 67,76.0,76
70
- 68,63.0,63
71
- 69,55.0,55
72
- 70,76.0,76
73
- 71,59.0,59
74
- 72,70.0,70
75
- 73,98.0,98
76
- 74,60.0,60
77
- 75,114.0,114
78
- 76,200.0,200
79
- 77,199.0,199
80
- 78,200.0,200
81
- 79,200.0,200
82
- 80,200.0,200
83
- 81,200.0,200
84
- 82,200.0,200
85
- 83,200.0,200
86
- 84,200.0,200
87
- 85,200.0,200
88
- 86,200.0,200
89
- 87,200.0,200
90
- 88,200.0,200
91
- 89,200.0,200
92
- 90,200.0,200
93
- 91,200.0,200
94
- 92,200.0,200
95
- 93,200.0,200
96
- 94,200.0,200
97
- 95,200.0,200
98
- 96,200.0,200
99
- 97,200.0,200
100
- 98,200.0,200
101
- 99,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/logs/log.txt DELETED
@@ -1,44 +0,0 @@
1
- 2023-04-13 22:06:39 - r - INFO: - Hyperparameters:
2
- 2023-04-13 22:06:39 - r - INFO: - ================================================================================
3
- 2023-04-13 22:06:39 - r - INFO: - Name Value Type
4
- 2023-04-13 22:06:39 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-13 22:06:39 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-13 22:06:39 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-13 22:06:39 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-13 22:06:39 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-13 22:06:39 - r - INFO: - algo_name NoisyDQN <class 'str'>
10
- 2023-04-13 22:06:39 - r - INFO: - mode train <class 'str'>
11
- 2023-04-13 22:06:39 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-13 22:06:39 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-13 22:06:39 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-13 22:06:39 - r - INFO: - train_eps 200 <class 'int'>
15
- 2023-04-13 22:06:39 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-13 22:06:39 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-13 22:06:39 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-13 22:06:39 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-13 22:06:39 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-13 22:06:39 - r - INFO: - load_path Train_CartPole-v1_NoisyDQN <class 'str'>
21
- 2023-04-13 22:06:39 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-13 22:06:39 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-13 22:06:39 - r - INFO: - n_workers 2 <class 'int'>
24
- 2023-04-13 22:06:39 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-13 22:06:39 - r - INFO: - tau 1.0 <class 'float'>
26
- 2023-04-13 22:06:39 - r - INFO: - epsilon_end 0.01 <class 'float'>
27
- 2023-04-13 22:06:39 - r - INFO: - epsilon_decay 500 <class 'int'>
28
- 2023-04-13 22:06:39 - r - INFO: - hidden_dim 256 <class 'int'>
29
- 2023-04-13 22:06:39 - r - INFO: - gamma 0.95 <class 'float'>
30
- 2023-04-13 22:06:39 - r - INFO: - lr 0.0001 <class 'float'>
31
- 2023-04-13 22:06:39 - r - INFO: - buffer_size 100000 <class 'int'>
32
- 2023-04-13 22:06:39 - r - INFO: - batch_size 64 <class 'int'>
33
- 2023-04-13 22:06:39 - r - INFO: - target_update 4 <class 'int'>
34
- 2023-04-13 22:06:39 - r - INFO: - id CartPole-v1 <class 'str'>
35
- 2023-04-13 22:06:39 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230413-220639 <class 'str'>
36
- 2023-04-13 22:06:39 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230413-220639/results <class 'str'>
37
- 2023-04-13 22:06:39 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230413-220639/logs <class 'str'>
38
- 2023-04-13 22:06:39 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230413-220639/traj <class 'str'>
39
- 2023-04-13 22:06:39 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230413-220639/videos <class 'str'>
40
- 2023-04-13 22:06:39 - r - INFO: - ================================================================================
41
- 2023-04-13 22:06:39 - r - INFO: - n_states: 4, n_actions: 2
42
- 2023-04-13 22:06:39 - r - INFO: - Start training!
43
- 2023-04-13 22:06:39 - r - INFO: - Env: gym, Algorithm: NoisyDQN, Device: cpu
44
- 2023-04-13 22:07:32 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab846435c3ac88c2507622e5f1050d4be6bfc94e33845dc5d7de2fac6df2c92f
3
- size 804667
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/results/learning_curve.png DELETED
Binary file (42.8 kB)
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_mp_20230413-220639/results/res.csv DELETED
@@ -1,202 +0,0 @@
1
- episodes,rewards
2
- 0,24.0
3
- 1,18.0
4
- 2,18.0
5
- 3,19.0
6
- 4,27.0
7
- 5,17.0
8
- 6,15.0
9
- 7,13.0
10
- 8,21.0
11
- 9,29.0
12
- 10,16.0
13
- 11,33.0
14
- 12,9.0
15
- 13,13.0
16
- 14,11.0
17
- 15,17.0
18
- 16,10.0
19
- 17,25.0
20
- 18,13.0
21
- 19,19.0
22
- 20,11.0
23
- 21,11.0
24
- 22,10.0
25
- 23,12.0
26
- 24,14.0
27
- 25,11.0
28
- 26,17.0
29
- 27,15.0
30
- 28,10.0
31
- 29,17.0
32
- 30,10.0
33
- 31,11.0
34
- 32,10.0
35
- 33,18.0
36
- 34,12.0
37
- 35,11.0
38
- 36,18.0
39
- 37,13.0
40
- 38,11.0
41
- 39,35.0
42
- 40,9.0
43
- 41,12.0
44
- 42,13.0
45
- 43,9.0
46
- 44,9.0
47
- 45,28.0
48
- 46,10.0
49
- 47,10.0
50
- 48,9.0
51
- 49,10.0
52
- 50,10.0
53
- 51,9.0
54
- 52,11.0
55
- 53,13.0
56
- 54,13.0
57
- 55,14.0
58
- 56,9.0
59
- 57,31.0
60
- 58,64.0
61
- 59,55.0
62
- 60,25.0
63
- 61,56.0
64
- 62,40.0
65
- 63,91.0
66
- 64,39.0
67
- 65,33.0
68
- 66,53.0
69
- 67,55.0
70
- 68,43.0
71
- 69,96.0
72
- 70,50.0
73
- 71,56.0
74
- 72,55.0
75
- 73,50.0
76
- 74,64.0
77
- 75,102.0
78
- 76,63.0
79
- 77,41.0
80
- 78,88.0
81
- 79,69.0
82
- 80,186.0
83
- 81,161.0
84
- 82,106.0
85
- 83,129.0
86
- 84,200.0
87
- 85,200.0
88
- 86,200.0
89
- 87,200.0
90
- 88,200.0
91
- 89,200.0
92
- 90,200.0
93
- 91,200.0
94
- 92,200.0
95
- 93,200.0
96
- 94,200.0
97
- 95,200.0
98
- 96,200.0
99
- 97,200.0
100
- 98,200.0
101
- 99,200.0
102
- 100,200.0
103
- 101,200.0
104
- 102,200.0
105
- 103,200.0
106
- 104,200.0
107
- 105,200.0
108
- 106,200.0
109
- 107,200.0
110
- 108,200.0
111
- 109,200.0
112
- 110,200.0
113
- 111,200.0
114
- 112,200.0
115
- 113,200.0
116
- 114,200.0
117
- 115,200.0
118
- 116,200.0
119
- 117,200.0
120
- 118,200.0
121
- 119,200.0
122
- 120,200.0
123
- 121,200.0
124
- 122,200.0
125
- 123,200.0
126
- 124,200.0
127
- 125,200.0
128
- 126,200.0
129
- 127,200.0
130
- 128,200.0
131
- 129,200.0
132
- 130,200.0
133
- 131,200.0
134
- 132,200.0
135
- 133,200.0
136
- 134,200.0
137
- 135,200.0
138
- 136,200.0
139
- 137,200.0
140
- 138,200.0
141
- 139,200.0
142
- 140,200.0
143
- 141,200.0
144
- 142,200.0
145
- 143,200.0
146
- 144,200.0
147
- 145,200.0
148
- 146,200.0
149
- 147,200.0
150
- 148,200.0
151
- 149,200.0
152
- 150,200.0
153
- 151,200.0
154
- 152,200.0
155
- 153,200.0
156
- 154,200.0
157
- 155,200.0
158
- 156,200.0
159
- 157,200.0
160
- 158,200.0
161
- 159,200.0
162
- 160,200.0
163
- 161,200.0
164
- 162,200.0
165
- 163,200.0
166
- 164,200.0
167
- 165,200.0
168
- 166,200.0
169
- 167,200.0
170
- 168,200.0
171
- 169,200.0
172
- 170,200.0
173
- 171,200.0
174
- 172,200.0
175
- 173,200.0
176
- 174,200.0
177
- 175,200.0
178
- 176,200.0
179
- 177,200.0
180
- 178,200.0
181
- 179,200.0
182
- 180,200.0
183
- 181,200.0
184
- 182,200.0
185
- 183,200.0
186
- 184,200.0
187
- 185,200.0
188
- 186,200.0
189
- 187,200.0
190
- 188,200.0
191
- 189,200.0
192
- 190,200.0
193
- 191,200.0
194
- 192,200.0
195
- 193,200.0
196
- 194,200.0
197
- 195,200.0
198
- 196,200.0
199
- 197,200.0
200
- 198,200.0
201
- 199,200.0
202
- 200,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/logs/log.txt DELETED
@@ -1,44 +0,0 @@
1
- 2023-04-14 10:13:51 - r - INFO: - Hyperparameters:
2
- 2023-04-14 10:13:51 - r - INFO: - ================================================================================
3
- 2023-04-14 10:13:51 - r - INFO: - Name Value Type
4
- 2023-04-14 10:13:51 - r - INFO: - env_name gym <class 'str'>
5
- 2023-04-14 10:13:51 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-14 10:13:51 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-14 10:13:51 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-14 10:13:51 - r - INFO: - render_mode None <class 'str'>
9
- 2023-04-14 10:13:51 - r - INFO: - algo_name NoisyDQN <class 'str'>
10
- 2023-04-14 10:13:51 - r - INFO: - mode train <class 'str'>
11
- 2023-04-14 10:13:51 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-14 10:13:51 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-14 10:13:51 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-14 10:13:51 - r - INFO: - train_eps 220 <class 'int'>
15
- 2023-04-14 10:13:51 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-14 10:13:51 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-14 10:13:51 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-14 10:13:51 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-14 10:13:51 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-14 10:13:51 - r - INFO: - load_path Train_CartPole-v1_NoisyDQN <class 'str'>
21
- 2023-04-14 10:13:51 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-14 10:13:51 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-14 10:13:51 - r - INFO: - n_workers 2 <class 'int'>
24
- 2023-04-14 10:13:51 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-14 10:13:51 - r - INFO: - tau 1.0 <class 'float'>
26
- 2023-04-14 10:13:51 - r - INFO: - epsilon_end 0.01 <class 'float'>
27
- 2023-04-14 10:13:51 - r - INFO: - epsilon_decay 500 <class 'int'>
28
- 2023-04-14 10:13:51 - r - INFO: - hidden_dim 256 <class 'int'>
29
- 2023-04-14 10:13:51 - r - INFO: - gamma 0.95 <class 'float'>
30
- 2023-04-14 10:13:51 - r - INFO: - lr 0.0001 <class 'float'>
31
- 2023-04-14 10:13:51 - r - INFO: - buffer_size 100000 <class 'int'>
32
- 2023-04-14 10:13:51 - r - INFO: - batch_size 64 <class 'int'>
33
- 2023-04-14 10:13:51 - r - INFO: - target_update 4 <class 'int'>
34
- 2023-04-14 10:13:51 - r - INFO: - id CartPole-v1 <class 'str'>
35
- 2023-04-14 10:13:51 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230414-101351 <class 'str'>
36
- 2023-04-14 10:13:51 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230414-101351/results <class 'str'>
37
- 2023-04-14 10:13:51 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230414-101351/logs <class 'str'>
38
- 2023-04-14 10:13:51 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230414-101351/traj <class 'str'>
39
- 2023-04-14 10:13:51 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_NoisyDQN_20230414-101351/videos <class 'str'>
40
- 2023-04-14 10:13:51 - r - INFO: - ================================================================================
41
- 2023-04-14 10:13:54 - r - INFO: - n_states: 4, n_actions: 2
42
- 2023-04-14 10:13:54 - r - INFO: - Start training!
43
- 2023-04-14 10:13:54 - r - INFO: - Env: gym, Algorithm: NoisyDQN, Device: cpu
44
- 2023-04-14 10:18:26 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5596bf84dd1a6b891f4a5790c45ae7c9f937bf6471c2218350adb67e6787b2d
3
- size 804667
 
 
 
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/results/learning_curve.png DELETED
Binary file (44.6 kB)
 
CartPole-v1/Train_CartPole-v1_NoisyDQN_ray_20230414-101351/results/res.csv DELETED
@@ -1,221 +0,0 @@
1
- episodes,rewards
2
- 0,18.0
3
- 1,18.0
4
- 2,19.0
5
- 3,28.0
6
- 4,17.0
7
- 5,15.0
8
- 6,13.0
9
- 7,15.0
10
- 8,38.0
11
- 9,31.0
12
- 10,11.0
13
- 11,31.0
14
- 12,10.0
15
- 13,17.0
16
- 14,14.0
17
- 15,13.0
18
- 16,21.0
19
- 17,10.0
20
- 18,10.0
21
- 19,10.0
22
- 20,11.0
23
- 21,13.0
24
- 22,19.0
25
- 23,12.0
26
- 24,13.0
27
- 25,18.0
28
- 26,15.0
29
- 27,13.0
30
- 28,14.0
31
- 29,12.0
32
- 30,11.0
33
- 31,12.0
34
- 32,14.0
35
- 33,9.0
36
- 34,10.0
37
- 35,16.0
38
- 36,13.0
39
- 37,15.0
40
- 38,12.0
41
- 39,11.0
42
- 40,14.0
43
- 41,28.0
44
- 42,16.0
45
- 43,11.0
46
- 44,12.0
47
- 45,11.0
48
- 46,10.0
49
- 47,13.0
50
- 48,10.0
51
- 49,9.0
52
- 50,13.0
53
- 51,11.0
54
- 52,12.0
55
- 53,10.0
56
- 54,9.0
57
- 55,12.0
58
- 56,10.0
59
- 57,15.0
60
- 58,12.0
61
- 59,12.0
62
- 60,9.0
63
- 61,16.0
64
- 62,15.0
65
- 63,16.0
66
- 64,11.0
67
- 65,18.0
68
- 66,14.0
69
- 67,12.0
70
- 68,17.0
71
- 69,13.0
72
- 70,18.0
73
- 71,15.0
74
- 72,19.0
75
- 73,26.0
76
- 74,39.0
77
- 75,28.0
78
- 76,25.0
79
- 77,66.0
80
- 78,24.0
81
- 79,29.0
82
- 80,24.0
83
- 81,25.0
84
- 82,24.0
85
- 83,20.0
86
- 84,46.0
87
- 85,37.0
88
- 86,16.0
89
- 87,24.0
90
- 88,29.0
91
- 89,86.0
92
- 90,128.0
93
- 91,85.0
94
- 92,79.0
95
- 93,58.0
96
- 94,94.0
97
- 95,57.0
98
- 96,53.0
99
- 97,36.0
100
- 98,65.0
101
- 99,64.0
102
- 100,56.0
103
- 101,58.0
104
- 102,200.0
105
- 103,123.0
106
- 104,109.0
107
- 105,67.0
108
- 106,107.0
109
- 107,165.0
110
- 108,200.0
111
- 109,200.0
112
- 110,200.0
113
- 111,170.0
114
- 112,146.0
115
- 113,200.0
116
- 114,200.0
117
- 115,200.0
118
- 116,200.0
119
- 117,200.0
120
- 118,200.0
121
- 119,200.0
122
- 120,200.0
123
- 121,200.0
124
- 122,200.0
125
- 123,200.0
126
- 124,200.0
127
- 125,200.0
128
- 126,200.0
129
- 127,200.0
130
- 128,200.0
131
- 129,200.0
132
- 130,200.0
133
- 131,200.0
134
- 132,200.0
135
- 133,200.0
136
- 134,200.0
137
- 135,200.0
138
- 136,200.0
139
- 137,200.0
140
- 138,200.0
141
- 139,200.0
142
- 140,200.0
143
- 141,200.0
144
- 142,200.0
145
- 143,200.0
146
- 144,200.0
147
- 145,200.0
148
- 146,200.0
149
- 147,200.0
150
- 148,200.0
151
- 149,200.0
152
- 150,200.0
153
- 151,200.0
154
- 152,200.0
155
- 153,200.0
156
- 154,200.0
157
- 155,200.0
158
- 156,200.0
159
- 157,200.0
160
- 158,200.0
161
- 159,191.0
162
- 160,191.0
163
- 161,190.0
164
- 162,192.0
165
- 163,200.0
166
- 164,199.0
167
- 165,200.0
168
- 166,200.0
169
- 167,200.0
170
- 168,200.0
171
- 169,200.0
172
- 170,200.0
173
- 171,200.0
174
- 172,200.0
175
- 173,200.0
176
- 174,200.0
177
- 175,200.0
178
- 176,200.0
179
- 177,200.0
180
- 178,200.0
181
- 179,200.0
182
- 180,200.0
183
- 181,200.0
184
- 182,200.0
185
- 183,200.0
186
- 184,200.0
187
- 185,200.0
188
- 186,200.0
189
- 187,200.0
190
- 188,200.0
191
- 189,200.0
192
- 190,200.0
193
- 191,200.0
194
- 192,200.0
195
- 193,200.0
196
- 194,200.0
197
- 195,200.0
198
- 196,200.0
199
- 197,200.0
200
- 198,200.0
201
- 199,200.0
202
- 200,200.0
203
- 201,200.0
204
- 202,200.0
205
- 203,200.0
206
- 204,200.0
207
- 205,200.0
208
- 206,200.0
209
- 207,200.0
210
- 208,200.0
211
- 209,200.0
212
- 210,200.0
213
- 211,200.0
214
- 212,200.0
215
- 213,200.0
216
- 214,200.0
217
- 215,200.0
218
- 216,200.0
219
- 217,200.0
220
- 218,200.0
221
- 219,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/config.yaml RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/logs/log.txt RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/models/checkpoint.pt RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/results/learning_curve.png RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Test_Acrobot-v1_DQN_20221122-120610/results/res.csv RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/config.yaml RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/logs/log.txt RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/models/checkpoint.pt RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/results/learning_curve.png RENAMED
File without changes
{Acrobot-v1 β†’ ClassControl/Acrobot-v1}/Train_Acrobot-v1_DQN_20221122-120436/results/res.csv RENAMED
File without changes
ClassControl/CartPole-v1/.DS_Store ADDED
Binary file (6.15 kB). View file
 
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/config.yaml RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/logs/log.txt RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/results/learning_curve.png RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/results/res.csv RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Collect_CartPole-v1_PPO_20221206-173222/traj/traj.pkl RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/config.yaml RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/logs/log.txt RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/models/actor_checkpoint.pt RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/models/critic_checkpoint.pt RENAMED
File without changes
{CartPole-v1 β†’ ClassControl/CartPole-v1}/Test_CartPole-v1_A2C_20221204-003659/results/learning_curve.png RENAMED
File without changes