Test_Pendulum-v1_A2C

#10
by gsc579 - opened
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general_cfg:
2
+ algo_name: A2C
3
+ collect_traj: false
4
+ device: cpu
5
+ env_name: gym
6
+ interact_summary_fre: 1
7
+ load_checkpoint: true
8
+ load_model_step: best
9
+ load_path: Train_Pendulum-v1_A2C_20230623-232832
10
+ max_episode: 50
11
+ max_step: 200
12
+ mode: test
13
+ model_save_fre: 10
14
+ model_summary_fre: 1
15
+ mp_backend: single
16
+ n_learners: 1
17
+ n_workers: 2
18
+ online_eval: true
19
+ online_eval_episode: 10
20
+ seed: 1
21
+ share_buffer: true
22
+ algo_cfg:
23
+ action_type: continuous
24
+ actor_hidden_dim: 256
25
+ actor_layers:
26
+ - activation: relu
27
+ layer_size:
28
+ - 256
29
+ layer_type: linear
30
+ actor_lr: 0.0001
31
+ batch_size: 256
32
+ buffer_type: ONPOLICY_QUE
33
+ critic_hidden_dim: 256
34
+ critic_layers:
35
+ - activation: relu
36
+ layer_size:
37
+ - 256
38
+ layer_type: linear
39
+ critic_loss_coef: 0.5
40
+ critic_lr: 0.005
41
+ entropy_coef: 0.01
42
+ gamma: 0.9
43
+ independ_actor: true
44
+ k_epochs: 4
45
+ lr: 0.0001
46
+ min_policy: 0
47
+ n_steps_per_learn: 1
48
+ sgd_batch_size: 32
49
+ share_optimizer: false
50
+ env_cfg:
51
+ id: Pendulum-v1
52
+ ignore_params:
53
+ - wrapper
54
+ - ignore_params
55
+ render_mode: null
56
+ wrapper: null
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/logs/log.txt ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - General Configs:
2
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - ================================================================================
3
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - Name Value Type
4
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - algo_name A2C <class 'str'>
6
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - mode test <class 'str'>
7
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - max_episode 50 <class 'int'>
10
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - mp_backend single <class 'str'>
13
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - n_learners 1 <class 'int'>
15
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - share_buffer 1 <class 'bool'>
16
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
17
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
18
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - model_save_fre 10 <class 'int'>
19
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
20
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - load_path Train_Pendulum-v1_A2C_20230623-232832 <class 'str'>
21
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - load_model_step best <class 'str'>
22
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - interact_summary_fre 1 <class 'int'>
23
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - model_summary_fre 1 <class 'int'>
24
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - ================================================================================
25
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - Algo Configs:
26
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - ================================================================================
27
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - Name Value Type
28
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - independ_actor 1 <class 'bool'>
29
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - share_optimizer 0 <class 'bool'>
30
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - action_type continuous <class 'str'>
31
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - gamma 0.9 <class 'float'>
32
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - k_epochs 4 <class 'int'>
33
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
34
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - actor_lr 0.0001 <class 'float'>
35
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - critic_lr 0.005 <class 'float'>
36
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - critic_loss_coef 0.5 <class 'float'>
37
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - entropy_coef 0.01 <class 'float'>
38
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - buffer_type ONPOLICY_QUE <class 'str'>
39
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - batch_size 256 <class 'int'>
40
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - sgd_batch_size 32 <class 'int'>
41
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - actor_hidden_dim 256 <class 'int'>
42
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - critic_hidden_dim 256 <class 'int'>
43
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - min_policy 0 <class 'int'>
44
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - n_steps_per_learn 1 <class 'int'>
45
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - actor_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
46
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - critic_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
47
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - ================================================================================
48
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - Env Configs:
49
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - ================================================================================
50
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - Name Value Type
51
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - id Pendulum-v1 <class 'str'>
52
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - render_mode None <class 'str'>
53
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - wrapper None <class 'str'>
54
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
55
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - ================================================================================
56
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - Start testing!
57
+ 2023-06-24 15:26:40 - SimpleLog - INFO: - Interactor 0 finished episode 1 with reward -661.605 in 200 steps
58
+ 2023-06-24 15:26:41 - SimpleLog - INFO: - Interactor 1 finished episode 2 with reward -520.007 in 200 steps
59
+ 2023-06-24 15:26:41 - SimpleLog - INFO: - Interactor 0 finished episode 3 with reward -801.194 in 200 steps
60
+ 2023-06-24 15:26:41 - SimpleLog - INFO: - Interactor 1 finished episode 4 with reward -776.387 in 200 steps
61
+ 2023-06-24 15:26:41 - SimpleLog - INFO: - Interactor 0 finished episode 5 with reward -1018.592 in 200 steps
62
+ 2023-06-24 15:26:41 - SimpleLog - INFO: - Interactor 1 finished episode 6 with reward -924.237 in 200 steps
63
+ 2023-06-24 15:26:42 - SimpleLog - INFO: - Interactor 0 finished episode 7 with reward -696.645 in 200 steps
64
+ 2023-06-24 15:26:42 - SimpleLog - INFO: - Interactor 0 finished episode 8 with reward -1031.963 in 200 steps
65
+ 2023-06-24 15:26:42 - SimpleLog - INFO: - Interactor 1 finished episode 9 with reward -521.132 in 200 steps
66
+ 2023-06-24 15:26:42 - SimpleLog - INFO: - Interactor 1 finished episode 10 with reward -931.846 in 200 steps
67
+ 2023-06-24 15:26:42 - SimpleLog - INFO: - Interactor 0 finished episode 11 with reward -301.204 in 200 steps
68
+ 2023-06-24 15:26:42 - SimpleLog - INFO: - Interactor 1 finished episode 12 with reward -645.720 in 200 steps
69
+ 2023-06-24 15:26:43 - SimpleLog - INFO: - update_step: 10, online_eval_reward: -760.746
70
+ 2023-06-24 15:26:43 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -760.746, save the best model!
71
+ 2023-06-24 15:26:43 - SimpleLog - INFO: - Interactor 0 finished episode 13 with reward -264.046 in 200 steps
72
+ 2023-06-24 15:26:43 - SimpleLog - INFO: - Interactor 1 finished episode 14 with reward -517.979 in 200 steps
73
+ 2023-06-24 15:26:43 - SimpleLog - INFO: - Interactor 0 finished episode 15 with reward -527.257 in 200 steps
74
+ 2023-06-24 15:26:43 - SimpleLog - INFO: - Interactor 1 finished episode 16 with reward -401.069 in 200 steps
75
+ 2023-06-24 15:26:43 - SimpleLog - INFO: - Interactor 0 finished episode 17 with reward -693.433 in 200 steps
76
+ 2023-06-24 15:26:44 - SimpleLog - INFO: - Interactor 0 finished episode 18 with reward -266.325 in 200 steps
77
+ 2023-06-24 15:26:44 - SimpleLog - INFO: - Interactor 1 finished episode 19 with reward -704.755 in 200 steps
78
+ 2023-06-24 15:26:44 - SimpleLog - INFO: - Interactor 1 finished episode 20 with reward -916.228 in 200 steps
79
+ 2023-06-24 15:26:44 - SimpleLog - INFO: - Interactor 0 finished episode 21 with reward -654.939 in 200 steps
80
+ 2023-06-24 15:26:44 - SimpleLog - INFO: - Interactor 1 finished episode 22 with reward -518.691 in 200 steps
81
+ 2023-06-24 15:26:44 - SimpleLog - INFO: - Interactor 0 finished episode 23 with reward -438.668 in 200 steps
82
+ 2023-06-24 15:26:44 - SimpleLog - INFO: - Interactor 1 finished episode 24 with reward -521.045 in 200 steps
83
+ 2023-06-24 15:26:45 - SimpleLog - INFO: - update_step: 20, online_eval_reward: -775.715
84
+ 2023-06-24 15:26:45 - SimpleLog - INFO: - Interactor 0 finished episode 25 with reward -657.087 in 200 steps
85
+ 2023-06-24 15:26:45 - SimpleLog - INFO: - Interactor 0 finished episode 26 with reward -520.093 in 200 steps
86
+ 2023-06-24 15:26:45 - SimpleLog - INFO: - Interactor 1 finished episode 27 with reward -520.386 in 200 steps
87
+ 2023-06-24 15:26:45 - SimpleLog - INFO: - Interactor 1 finished episode 28 with reward -883.519 in 200 steps
88
+ 2023-06-24 15:26:46 - SimpleLog - INFO: - Interactor 0 finished episode 29 with reward -7.415 in 200 steps
89
+ 2023-06-24 15:26:46 - SimpleLog - INFO: - Interactor 1 finished episode 30 with reward -932.779 in 200 steps
90
+ 2023-06-24 15:26:46 - SimpleLog - INFO: - Interactor 0 finished episode 31 with reward -872.634 in 200 steps
91
+ 2023-06-24 15:26:46 - SimpleLog - INFO: - Interactor 1 finished episode 32 with reward -924.046 in 200 steps
92
+ 2023-06-24 15:26:46 - SimpleLog - INFO: - Interactor 0 finished episode 33 with reward -779.892 in 200 steps
93
+ 2023-06-24 15:26:46 - SimpleLog - INFO: - Interactor 1 finished episode 34 with reward -1026.092 in 200 steps
94
+ 2023-06-24 15:26:47 - SimpleLog - INFO: - Interactor 0 finished episode 35 with reward -692.214 in 200 steps
95
+ 2023-06-24 15:26:47 - SimpleLog - INFO: - Interactor 0 finished episode 36 with reward -656.551 in 200 steps
96
+ 2023-06-24 15:26:47 - SimpleLog - INFO: - Interactor 1 finished episode 37 with reward -915.715 in 200 steps
97
+ 2023-06-24 15:26:47 - SimpleLog - INFO: - Interactor 1 finished episode 38 with reward -1004.479 in 200 steps
98
+ 2023-06-24 15:26:47 - SimpleLog - INFO: - update_step: 30, online_eval_reward: -381.387
99
+ 2023-06-24 15:26:47 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -381.387, save the best model!
100
+ 2023-06-24 15:26:47 - SimpleLog - INFO: - Interactor 0 finished episode 39 with reward -653.117 in 200 steps
101
+ 2023-06-24 15:26:48 - SimpleLog - INFO: - Interactor 1 finished episode 40 with reward -748.897 in 200 steps
102
+ 2023-06-24 15:26:48 - SimpleLog - INFO: - Interactor 0 finished episode 41 with reward -522.462 in 200 steps
103
+ 2023-06-24 15:26:48 - SimpleLog - INFO: - Interactor 1 finished episode 42 with reward -648.596 in 200 steps
104
+ 2023-06-24 15:26:48 - SimpleLog - INFO: - Interactor 0 finished episode 43 with reward -132.224 in 200 steps
105
+ 2023-06-24 15:26:48 - SimpleLog - INFO: - Interactor 0 finished episode 44 with reward -394.009 in 200 steps
106
+ 2023-06-24 15:26:48 - SimpleLog - INFO: - Interactor 1 finished episode 45 with reward -389.747 in 200 steps
107
+ 2023-06-24 15:26:48 - SimpleLog - INFO: - Interactor 1 finished episode 46 with reward -388.791 in 200 steps
108
+ 2023-06-24 15:26:49 - SimpleLog - INFO: - Interactor 0 finished episode 47 with reward -531.517 in 200 steps
109
+ 2023-06-24 15:26:49 - SimpleLog - INFO: - Interactor 1 finished episode 48 with reward -649.490 in 200 steps
110
+ 2023-06-24 15:26:49 - SimpleLog - INFO: - Interactor 0 finished episode 49 with reward -394.079 in 200 steps
111
+ 2023-06-24 15:26:49 - SimpleLog - INFO: - Interactor 1 finished episode 50 with reward -648.858 in 200 steps
112
+ 2023-06-24 15:26:50 - SimpleLog - INFO: - update_step: 40, online_eval_reward: -631.289
113
+ 2023-06-24 15:26:50 - SimpleLog - INFO: - Finish testing! Time cost: 9.316 s
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/models/10 ADDED
Binary file (13.5 kB). View file
 
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/models/20 ADDED
Binary file (13.5 kB). View file
 
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/models/30 ADDED
Binary file (13.5 kB). View file
 
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/models/40 ADDED
Binary file (13.5 kB). View file
 
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/models/best ADDED
Binary file (13.5 kB). View file
 
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/tb_logs/interact/events.out.tfevents.1687591600.ML3090.1078689.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b1996735bed932feac049dd225897dbbbc6090f25eb2bc991c5b5be85484db
3
+ size 4840
ClassControl/Pendulum-v1/Test_Pendulum-v1_A2C_20230624-152640/tb_logs/policy/events.out.tfevents.1687591600.ML3090.1078689.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8927a4b1ceeab3f208cac60fa6563dbb8a89cfdfc5a82bf3cce3267b1ac6f3
3
+ size 6604