gsc579 commited on
Commit
1ffcc88
1 Parent(s): b179a57

Train_Pendulum-v1_SAC

Browse files
Files changed (41) hide show
  1. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/config.yaml +81 -0
  2. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/logs/log.txt +487 -0
  3. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/100 +3 -0
  4. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1000 +3 -0
  5. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10000 +3 -0
  6. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10100 +3 -0
  7. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10200 +3 -0
  8. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10300 +3 -0
  9. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10800 +3 -0
  10. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10900 +3 -0
  11. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1100 +3 -0
  12. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11000 +3 -0
  13. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11100 +3 -0
  14. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11900 +3 -0
  15. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1200 +3 -0
  16. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12500 +3 -0
  17. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12700 +3 -0
  18. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12800 +3 -0
  19. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1300 +3 -0
  20. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13000 +3 -0
  21. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13800 +3 -0
  22. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13900 +3 -0
  23. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1400 +3 -0
  24. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14000 +3 -0
  25. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14900 +3 -0
  26. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1500 +3 -0
  27. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15000 +3 -0
  28. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15900 +3 -0
  29. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1600 +3 -0
  30. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16000 +3 -0
  31. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16700 +3 -0
  32. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1700 +3 -0
  33. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17000 +3 -0
  34. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17900 +3 -0
  35. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1800 +3 -0
  36. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18400 +3 -0
  37. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18700 +3 -0
  38. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18800 +3 -0
  39. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18900 +3 -0
  40. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/interact/events.out.tfevents.1687077551.ML3090.330549.0 +3 -0
  41. ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/policy/events.out.tfevents.1687077551.ML3090.330549.1 +3 -0
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/config.yaml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general_cfg:
2
+ algo_name: SAC
3
+ collect_traj: false
4
+ device: cpu
5
+ env_name: gym
6
+ interact_summary_fre: 1
7
+ load_checkpoint: false
8
+ load_model_step: best
9
+ load_path: Train_CartPole-v1_SAC_20230618-162702
10
+ max_episode: 200
11
+ max_step: 200
12
+ mode: train
13
+ model_save_fre: 100
14
+ model_summary_fre: 1
15
+ mp_backend: single
16
+ n_learners: 1
17
+ n_workers: 2
18
+ online_eval: true
19
+ online_eval_episode: 10
20
+ seed: 1
21
+ share_buffer: true
22
+ algo_cfg:
23
+ action_type: continuous
24
+ actor_layers:
25
+ - activation: relu
26
+ layer_size:
27
+ - 256
28
+ layer_type: linear
29
+ - activation: relu
30
+ layer_size:
31
+ - 256
32
+ layer_type: linear
33
+ actor_lr: 0.0003
34
+ alpha: 0.1
35
+ alpha_lr: 0.0001
36
+ automatic_entropy_tuning: false
37
+ batch_size: 64
38
+ buffer_size: 1000000
39
+ buffer_type: REPLAY_QUE
40
+ critic1_lr: 0.001
41
+ critic2_lr: 0.001
42
+ critic_layers:
43
+ - activation: relu
44
+ layer_size:
45
+ - 256
46
+ layer_type: linear
47
+ - activation: relu
48
+ layer_size:
49
+ - 256
50
+ layer_type: linear
51
+ epsilon_decay: 500
52
+ epsilon_end: 0.01
53
+ epsilon_start: 0.95
54
+ gamma: 0.95
55
+ hidden_dim: 64
56
+ independ_actor: true
57
+ lr: 0.0001
58
+ min_policy: 0
59
+ n_epochs: 1
60
+ n_steps_per_learn: 1
61
+ share_optimizer: false
62
+ start_steps: 10000
63
+ target_update: 1
64
+ target_update_fre: 1
65
+ tau: 0.005
66
+ value_layers:
67
+ - activation: relu
68
+ layer_size:
69
+ - 256
70
+ layer_type: linear
71
+ - activation: relu
72
+ layer_size:
73
+ - 256
74
+ layer_type: linear
75
+ env_cfg:
76
+ id: Pendulum-v1
77
+ ignore_params:
78
+ - wrapper
79
+ - ignore_params
80
+ render_mode: null
81
+ wrapper: null
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/logs/log.txt ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - General Configs:
2
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
3
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - Name Value Type
4
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - algo_name SAC <class 'str'>
6
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - mode train <class 'str'>
7
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - max_episode 200 <class 'int'>
10
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - mp_backend single <class 'str'>
13
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - n_learners 1 <class 'int'>
15
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - share_buffer 1 <class 'bool'>
16
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
17
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
18
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - model_save_fre 100 <class 'int'>
19
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
20
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - load_path Train_CartPole-v1_SAC_20230618-162702 <class 'str'>
21
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - load_model_step best <class 'str'>
22
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - interact_summary_fre 1 <class 'int'>
23
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - model_summary_fre 1 <class 'int'>
24
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
25
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - Algo Configs:
26
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
27
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - Name Value Type
28
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - critic1_lr 0.001 <class 'float'>
29
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - critic2_lr 0.001 <class 'float'>
30
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - actor_lr 0.0003 <class 'float'>
31
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
32
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - tau 0.005 <class 'float'>
33
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - alpha 0.1 <class 'float'>
34
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - automatic_entropy_tuning 0 <class 'bool'>
35
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - batch_size 64 <class 'int'>
36
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - hidden_dim 64 <class 'int'>
37
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - n_epochs 1 <class 'int'>
38
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - start_steps 10000 <class 'int'>
39
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - target_update_fre 1 <class 'int'>
40
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - buffer_size 1000000 <class 'int'>
41
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - min_policy 0 <class 'int'>
42
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - alpha_lr 0.0001 <class 'float'>
43
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - n_steps_per_learn 1 <class 'int'>
44
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - action_type continuous <class 'str'>
45
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - independ_actor 1 <class 'bool'>
46
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - share_optimizer 0 <class 'bool'>
47
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - actor_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
48
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - critic_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
49
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
50
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
51
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
52
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
53
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
54
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
55
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - target_update 1 <class 'int'>
56
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
57
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - Env Configs:
58
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
59
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - Name Value Type
60
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - id Pendulum-v1 <class 'str'>
61
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - render_mode None <class 'str'>
62
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - wrapper None <class 'str'>
63
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
64
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
65
+ 2023-06-18 16:39:11 - SimpleLog - INFO: - Start training!
66
+ 2023-06-18 16:39:13 - SimpleLog - INFO: - update_step: 100, online_eval_reward: -1380.018
67
+ 2023-06-18 16:39:13 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1380.018, save the best model!
68
+ 2023-06-18 16:39:13 - SimpleLog - INFO: - Interactor 0 finished episode 1 with reward -1247.240 in 200 steps
69
+ 2023-06-18 16:39:13 - SimpleLog - INFO: - Interactor 1 finished episode 2 with reward -1239.670 in 200 steps
70
+ 2023-06-18 16:39:14 - SimpleLog - INFO: - update_step: 200, online_eval_reward: -1594.462
71
+ 2023-06-18 16:39:15 - SimpleLog - INFO: - update_step: 300, online_eval_reward: -1575.933
72
+ 2023-06-18 16:39:15 - SimpleLog - INFO: - Interactor 0 finished episode 3 with reward -1465.696 in 200 steps
73
+ 2023-06-18 16:39:15 - SimpleLog - INFO: - Interactor 1 finished episode 4 with reward -1538.397 in 200 steps
74
+ 2023-06-18 16:39:16 - SimpleLog - INFO: - update_step: 400, online_eval_reward: -1550.200
75
+ 2023-06-18 16:39:17 - SimpleLog - INFO: - update_step: 500, online_eval_reward: -1517.290
76
+ 2023-06-18 16:39:18 - SimpleLog - INFO: - Interactor 0 finished episode 5 with reward -1516.613 in 200 steps
77
+ 2023-06-18 16:39:18 - SimpleLog - INFO: - Interactor 1 finished episode 6 with reward -1697.405 in 200 steps
78
+ 2023-06-18 16:39:18 - SimpleLog - INFO: - update_step: 600, online_eval_reward: -1474.664
79
+ 2023-06-18 16:39:19 - SimpleLog - INFO: - update_step: 700, online_eval_reward: -1480.853
80
+ 2023-06-18 16:39:20 - SimpleLog - INFO: - Interactor 0 finished episode 7 with reward -1479.039 in 200 steps
81
+ 2023-06-18 16:39:20 - SimpleLog - INFO: - Interactor 1 finished episode 8 with reward -1621.124 in 200 steps
82
+ 2023-06-18 16:39:21 - SimpleLog - INFO: - update_step: 800, online_eval_reward: -1444.541
83
+ 2023-06-18 16:39:22 - SimpleLog - INFO: - update_step: 900, online_eval_reward: -1432.778
84
+ 2023-06-18 16:39:22 - SimpleLog - INFO: - Interactor 0 finished episode 9 with reward -1375.822 in 200 steps
85
+ 2023-06-18 16:39:22 - SimpleLog - INFO: - Interactor 1 finished episode 10 with reward -1548.226 in 200 steps
86
+ 2023-06-18 16:39:23 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: -1379.687
87
+ 2023-06-18 16:39:23 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1379.687, save the best model!
88
+ 2023-06-18 16:39:24 - SimpleLog - INFO: - update_step: 1100, online_eval_reward: -1365.993
89
+ 2023-06-18 16:39:24 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1365.993, save the best model!
90
+ 2023-06-18 16:39:25 - SimpleLog - INFO: - Interactor 0 finished episode 11 with reward -1417.818 in 200 steps
91
+ 2023-06-18 16:39:25 - SimpleLog - INFO: - Interactor 1 finished episode 12 with reward -1484.637 in 200 steps
92
+ 2023-06-18 16:39:26 - SimpleLog - INFO: - update_step: 1200, online_eval_reward: -1289.023
93
+ 2023-06-18 16:39:26 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1289.023, save the best model!
94
+ 2023-06-18 16:39:27 - SimpleLog - INFO: - update_step: 1300, online_eval_reward: -1274.835
95
+ 2023-06-18 16:39:27 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1274.835, save the best model!
96
+ 2023-06-18 16:39:27 - SimpleLog - INFO: - Interactor 0 finished episode 13 with reward -1299.286 in 200 steps
97
+ 2023-06-18 16:39:27 - SimpleLog - INFO: - Interactor 1 finished episode 14 with reward -1410.494 in 200 steps
98
+ 2023-06-18 16:39:28 - SimpleLog - INFO: - update_step: 1400, online_eval_reward: -1234.458
99
+ 2023-06-18 16:39:28 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1234.458, save the best model!
100
+ 2023-06-18 16:39:29 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: -1167.915
101
+ 2023-06-18 16:39:29 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1167.915, save the best model!
102
+ 2023-06-18 16:39:30 - SimpleLog - INFO: - Interactor 0 finished episode 15 with reward -1114.071 in 200 steps
103
+ 2023-06-18 16:39:30 - SimpleLog - INFO: - Interactor 1 finished episode 16 with reward -1412.817 in 200 steps
104
+ 2023-06-18 16:39:30 - SimpleLog - INFO: - update_step: 1600, online_eval_reward: -1164.202
105
+ 2023-06-18 16:39:30 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1164.202, save the best model!
106
+ 2023-06-18 16:39:32 - SimpleLog - INFO: - update_step: 1700, online_eval_reward: -1135.373
107
+ 2023-06-18 16:39:32 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1135.373, save the best model!
108
+ 2023-06-18 16:39:32 - SimpleLog - INFO: - Interactor 0 finished episode 17 with reward -1036.607 in 200 steps
109
+ 2023-06-18 16:39:32 - SimpleLog - INFO: - Interactor 1 finished episode 18 with reward -1192.079 in 200 steps
110
+ 2023-06-18 16:39:33 - SimpleLog - INFO: - update_step: 1800, online_eval_reward: -1136.757
111
+ 2023-06-18 16:39:34 - SimpleLog - INFO: - update_step: 1900, online_eval_reward: -1096.014
112
+ 2023-06-18 16:39:34 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1096.014, save the best model!
113
+ 2023-06-18 16:39:35 - SimpleLog - INFO: - Interactor 0 finished episode 19 with reward -1018.449 in 200 steps
114
+ 2023-06-18 16:39:35 - SimpleLog - INFO: - Interactor 1 finished episode 20 with reward -1162.681 in 200 steps
115
+ 2023-06-18 16:39:35 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: -1065.251
116
+ 2023-06-18 16:39:35 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1065.251, save the best model!
117
+ 2023-06-18 16:39:37 - SimpleLog - INFO: - update_step: 2100, online_eval_reward: -1124.240
118
+ 2023-06-18 16:39:37 - SimpleLog - INFO: - Interactor 0 finished episode 21 with reward -971.930 in 200 steps
119
+ 2023-06-18 16:39:37 - SimpleLog - INFO: - Interactor 1 finished episode 22 with reward -1129.776 in 200 steps
120
+ 2023-06-18 16:39:38 - SimpleLog - INFO: - update_step: 2200, online_eval_reward: -880.723
121
+ 2023-06-18 16:39:38 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -880.723, save the best model!
122
+ 2023-06-18 16:39:39 - SimpleLog - INFO: - update_step: 2300, online_eval_reward: -993.409
123
+ 2023-06-18 16:39:40 - SimpleLog - INFO: - Interactor 0 finished episode 23 with reward -940.286 in 200 steps
124
+ 2023-06-18 16:39:40 - SimpleLog - INFO: - Interactor 1 finished episode 24 with reward -1006.670 in 200 steps
125
+ 2023-06-18 16:39:40 - SimpleLog - INFO: - update_step: 2400, online_eval_reward: -874.374
126
+ 2023-06-18 16:39:40 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -874.374, save the best model!
127
+ 2023-06-18 16:39:42 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: -1023.463
128
+ 2023-06-18 16:39:42 - SimpleLog - INFO: - Interactor 0 finished episode 25 with reward -854.571 in 200 steps
129
+ 2023-06-18 16:39:42 - SimpleLog - INFO: - Interactor 1 finished episode 26 with reward -995.997 in 200 steps
130
+ 2023-06-18 16:39:43 - SimpleLog - INFO: - update_step: 2600, online_eval_reward: -970.227
131
+ 2023-06-18 16:39:44 - SimpleLog - INFO: - update_step: 2700, online_eval_reward: -970.407
132
+ 2023-06-18 16:39:45 - SimpleLog - INFO: - Interactor 0 finished episode 27 with reward -891.371 in 200 steps
133
+ 2023-06-18 16:39:45 - SimpleLog - INFO: - Interactor 1 finished episode 28 with reward -884.375 in 200 steps
134
+ 2023-06-18 16:39:45 - SimpleLog - INFO: - update_step: 2800, online_eval_reward: -988.688
135
+ 2023-06-18 16:39:47 - SimpleLog - INFO: - update_step: 2900, online_eval_reward: -844.578
136
+ 2023-06-18 16:39:47 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -844.578, save the best model!
137
+ 2023-06-18 16:39:47 - SimpleLog - INFO: - Interactor 0 finished episode 29 with reward -874.244 in 200 steps
138
+ 2023-06-18 16:39:47 - SimpleLog - INFO: - Interactor 1 finished episode 30 with reward -950.054 in 200 steps
139
+ 2023-06-18 16:39:48 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: -931.215
140
+ 2023-06-18 16:39:49 - SimpleLog - INFO: - update_step: 3100, online_eval_reward: -969.424
141
+ 2023-06-18 16:39:50 - SimpleLog - INFO: - Interactor 0 finished episode 31 with reward -761.690 in 200 steps
142
+ 2023-06-18 16:39:50 - SimpleLog - INFO: - Interactor 1 finished episode 32 with reward -810.092 in 200 steps
143
+ 2023-06-18 16:39:50 - SimpleLog - INFO: - update_step: 3200, online_eval_reward: -858.450
144
+ 2023-06-18 16:39:52 - SimpleLog - INFO: - update_step: 3300, online_eval_reward: -974.745
145
+ 2023-06-18 16:39:52 - SimpleLog - INFO: - Interactor 0 finished episode 33 with reward -720.616 in 200 steps
146
+ 2023-06-18 16:39:52 - SimpleLog - INFO: - Interactor 1 finished episode 34 with reward -763.590 in 200 steps
147
+ 2023-06-18 16:39:53 - SimpleLog - INFO: - update_step: 3400, online_eval_reward: -865.729
148
+ 2023-06-18 16:39:54 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: -703.698
149
+ 2023-06-18 16:39:54 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -703.698, save the best model!
150
+ 2023-06-18 16:39:55 - SimpleLog - INFO: - Interactor 0 finished episode 35 with reward -697.547 in 200 steps
151
+ 2023-06-18 16:39:55 - SimpleLog - INFO: - Interactor 1 finished episode 36 with reward -750.590 in 200 steps
152
+ 2023-06-18 16:39:55 - SimpleLog - INFO: - update_step: 3600, online_eval_reward: -863.473
153
+ 2023-06-18 16:39:57 - SimpleLog - INFO: - update_step: 3700, online_eval_reward: -859.878
154
+ 2023-06-18 16:39:57 - SimpleLog - INFO: - Interactor 0 finished episode 37 with reward -731.446 in 200 steps
155
+ 2023-06-18 16:39:57 - SimpleLog - INFO: - Interactor 1 finished episode 38 with reward -874.953 in 200 steps
156
+ 2023-06-18 16:39:58 - SimpleLog - INFO: - update_step: 3800, online_eval_reward: -855.164
157
+ 2023-06-18 16:39:59 - SimpleLog - INFO: - update_step: 3900, online_eval_reward: -630.874
158
+ 2023-06-18 16:39:59 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -630.874, save the best model!
159
+ 2023-06-18 16:40:00 - SimpleLog - INFO: - Interactor 0 finished episode 39 with reward -545.310 in 200 steps
160
+ 2023-06-18 16:40:00 - SimpleLog - INFO: - Interactor 1 finished episode 40 with reward -751.373 in 200 steps
161
+ 2023-06-18 16:40:00 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: -756.478
162
+ 2023-06-18 16:40:02 - SimpleLog - INFO: - update_step: 4100, online_eval_reward: -863.402
163
+ 2023-06-18 16:40:02 - SimpleLog - INFO: - Interactor 0 finished episode 41 with reward -646.219 in 200 steps
164
+ 2023-06-18 16:40:02 - SimpleLog - INFO: - Interactor 1 finished episode 42 with reward -753.741 in 200 steps
165
+ 2023-06-18 16:40:03 - SimpleLog - INFO: - update_step: 4200, online_eval_reward: -626.108
166
+ 2023-06-18 16:40:03 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -626.108, save the best model!
167
+ 2023-06-18 16:40:04 - SimpleLog - INFO: - update_step: 4300, online_eval_reward: -748.564
168
+ 2023-06-18 16:40:05 - SimpleLog - INFO: - Interactor 0 finished episode 43 with reward -632.094 in 200 steps
169
+ 2023-06-18 16:40:05 - SimpleLog - INFO: - Interactor 1 finished episode 44 with reward -751.567 in 200 steps
170
+ 2023-06-18 16:40:05 - SimpleLog - INFO: - update_step: 4400, online_eval_reward: -649.743
171
+ 2023-06-18 16:40:06 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: -766.485
172
+ 2023-06-18 16:40:07 - SimpleLog - INFO: - Interactor 0 finished episode 45 with reward -627.878 in 200 steps
173
+ 2023-06-18 16:40:07 - SimpleLog - INFO: - Interactor 1 finished episode 46 with reward -628.071 in 200 steps
174
+ 2023-06-18 16:40:08 - SimpleLog - INFO: - update_step: 4600, online_eval_reward: -781.563
175
+ 2023-06-18 16:40:09 - SimpleLog - INFO: - update_step: 4700, online_eval_reward: -628.591
176
+ 2023-06-18 16:40:10 - SimpleLog - INFO: - Interactor 0 finished episode 47 with reward -509.268 in 200 steps
177
+ 2023-06-18 16:40:10 - SimpleLog - INFO: - Interactor 1 finished episode 48 with reward -377.213 in 200 steps
178
+ 2023-06-18 16:40:10 - SimpleLog - INFO: - update_step: 4800, online_eval_reward: -504.638
179
+ 2023-06-18 16:40:10 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -504.638, save the best model!
180
+ 2023-06-18 16:40:11 - SimpleLog - INFO: - update_step: 4900, online_eval_reward: -530.351
181
+ 2023-06-18 16:40:12 - SimpleLog - INFO: - Interactor 0 finished episode 49 with reward -383.350 in 200 steps
182
+ 2023-06-18 16:40:12 - SimpleLog - INFO: - Interactor 1 finished episode 50 with reward -501.978 in 200 steps
183
+ 2023-06-18 16:40:13 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: -504.956
184
+ 2023-06-18 16:40:14 - SimpleLog - INFO: - update_step: 5100, online_eval_reward: -632.072
185
+ 2023-06-18 16:40:14 - SimpleLog - INFO: - Interactor 0 finished episode 51 with reward -255.278 in 200 steps
186
+ 2023-06-18 16:40:14 - SimpleLog - INFO: - Interactor 1 finished episode 52 with reward -500.879 in 200 steps
187
+ 2023-06-18 16:40:15 - SimpleLog - INFO: - update_step: 5200, online_eval_reward: -641.763
188
+ 2023-06-18 16:40:16 - SimpleLog - INFO: - update_step: 5300, online_eval_reward: -508.721
189
+ 2023-06-18 16:40:17 - SimpleLog - INFO: - Interactor 0 finished episode 53 with reward -257.886 in 200 steps
190
+ 2023-06-18 16:40:17 - SimpleLog - INFO: - Interactor 1 finished episode 54 with reward -500.677 in 200 steps
191
+ 2023-06-18 16:40:18 - SimpleLog - INFO: - update_step: 5400, online_eval_reward: -624.826
192
+ 2023-06-18 16:40:19 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: -504.471
193
+ 2023-06-18 16:40:19 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -504.471, save the best model!
194
+ 2023-06-18 16:40:19 - SimpleLog - INFO: - Interactor 0 finished episode 55 with reward -259.141 in 200 steps
195
+ 2023-06-18 16:40:19 - SimpleLog - INFO: - Interactor 1 finished episode 56 with reward -501.679 in 200 steps
196
+ 2023-06-18 16:40:20 - SimpleLog - INFO: - update_step: 5600, online_eval_reward: -381.350
197
+ 2023-06-18 16:40:20 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -381.350, save the best model!
198
+ 2023-06-18 16:40:21 - SimpleLog - INFO: - update_step: 5700, online_eval_reward: -627.468
199
+ 2023-06-18 16:40:22 - SimpleLog - INFO: - Interactor 0 finished episode 57 with reward -384.113 in 200 steps
200
+ 2023-06-18 16:40:22 - SimpleLog - INFO: - Interactor 1 finished episode 58 with reward -401.693 in 200 steps
201
+ 2023-06-18 16:40:23 - SimpleLog - INFO: - update_step: 5800, online_eval_reward: -627.865
202
+ 2023-06-18 16:40:24 - SimpleLog - INFO: - update_step: 5900, online_eval_reward: -524.992
203
+ 2023-06-18 16:40:24 - SimpleLog - INFO: - Interactor 0 finished episode 59 with reward -386.282 in 200 steps
204
+ 2023-06-18 16:40:24 - SimpleLog - INFO: - Interactor 1 finished episode 60 with reward -377.906 in 200 steps
205
+ 2023-06-18 16:40:25 - SimpleLog - INFO: - update_step: 6000, online_eval_reward: -504.850
206
+ 2023-06-18 16:40:26 - SimpleLog - INFO: - update_step: 6100, online_eval_reward: -622.662
207
+ 2023-06-18 16:40:27 - SimpleLog - INFO: - Interactor 0 finished episode 61 with reward -264.980 in 200 steps
208
+ 2023-06-18 16:40:27 - SimpleLog - INFO: - Interactor 1 finished episode 62 with reward -376.024 in 200 steps
209
+ 2023-06-18 16:40:27 - SimpleLog - INFO: - update_step: 6200, online_eval_reward: -629.334
210
+ 2023-06-18 16:40:29 - SimpleLog - INFO: - update_step: 6300, online_eval_reward: -505.557
211
+ 2023-06-18 16:40:29 - SimpleLog - INFO: - Interactor 0 finished episode 63 with reward -258.428 in 200 steps
212
+ 2023-06-18 16:40:29 - SimpleLog - INFO: - Interactor 1 finished episode 64 with reward -377.041 in 200 steps
213
+ 2023-06-18 16:40:30 - SimpleLog - INFO: - update_step: 6400, online_eval_reward: -506.662
214
+ 2023-06-18 16:40:31 - SimpleLog - INFO: - update_step: 6500, online_eval_reward: -504.294
215
+ 2023-06-18 16:40:32 - SimpleLog - INFO: - Interactor 0 finished episode 65 with reward -144.788 in 200 steps
216
+ 2023-06-18 16:40:32 - SimpleLog - INFO: - Interactor 1 finished episode 66 with reward -250.861 in 200 steps
217
+ 2023-06-18 16:40:32 - SimpleLog - INFO: - update_step: 6600, online_eval_reward: -506.432
218
+ 2023-06-18 16:40:34 - SimpleLog - INFO: - update_step: 6700, online_eval_reward: -503.358
219
+ 2023-06-18 16:40:34 - SimpleLog - INFO: - Interactor 0 finished episode 67 with reward -384.900 in 200 steps
220
+ 2023-06-18 16:40:34 - SimpleLog - INFO: - Interactor 1 finished episode 68 with reward -503.686 in 200 steps
221
+ 2023-06-18 16:40:35 - SimpleLog - INFO: - update_step: 6800, online_eval_reward: -610.874
222
+ 2023-06-18 16:40:36 - SimpleLog - INFO: - update_step: 6900, online_eval_reward: -507.600
223
+ 2023-06-18 16:40:37 - SimpleLog - INFO: - Interactor 0 finished episode 69 with reward -6.160 in 200 steps
224
+ 2023-06-18 16:40:37 - SimpleLog - INFO: - Interactor 1 finished episode 70 with reward -249.738 in 200 steps
225
+ 2023-06-18 16:40:37 - SimpleLog - INFO: - update_step: 7000, online_eval_reward: -516.111
226
+ 2023-06-18 16:40:39 - SimpleLog - INFO: - update_step: 7100, online_eval_reward: -504.814
227
+ 2023-06-18 16:40:39 - SimpleLog - INFO: - Interactor 0 finished episode 71 with reward -130.383 in 200 steps
228
+ 2023-06-18 16:40:39 - SimpleLog - INFO: - Interactor 1 finished episode 72 with reward -248.035 in 200 steps
229
+ 2023-06-18 16:40:40 - SimpleLog - INFO: - update_step: 7200, online_eval_reward: -504.119
230
+ 2023-06-18 16:40:41 - SimpleLog - INFO: - update_step: 7300, online_eval_reward: -252.920
231
+ 2023-06-18 16:40:41 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -252.920, save the best model!
232
+ 2023-06-18 16:40:42 - SimpleLog - INFO: - Interactor 0 finished episode 73 with reward -129.843 in 200 steps
233
+ 2023-06-18 16:40:42 - SimpleLog - INFO: - Interactor 1 finished episode 74 with reward -123.091 in 200 steps
234
+ 2023-06-18 16:40:42 - SimpleLog - INFO: - update_step: 7400, online_eval_reward: -504.046
235
+ 2023-06-18 16:40:44 - SimpleLog - INFO: - update_step: 7500, online_eval_reward: -506.457
236
+ 2023-06-18 16:40:44 - SimpleLog - INFO: - Interactor 0 finished episode 75 with reward -9.244 in 200 steps
237
+ 2023-06-18 16:40:44 - SimpleLog - INFO: - Interactor 1 finished episode 76 with reward -374.890 in 200 steps
238
+ 2023-06-18 16:40:45 - SimpleLog - INFO: - update_step: 7600, online_eval_reward: -253.852
239
+ 2023-06-18 16:40:46 - SimpleLog - INFO: - update_step: 7700, online_eval_reward: -497.678
240
+ 2023-06-18 16:40:47 - SimpleLog - INFO: - Interactor 0 finished episode 77 with reward -257.436 in 200 steps
241
+ 2023-06-18 16:40:47 - SimpleLog - INFO: - Interactor 1 finished episode 78 with reward -248.164 in 200 steps
242
+ 2023-06-18 16:40:47 - SimpleLog - INFO: - update_step: 7800, online_eval_reward: -518.276
243
+ 2023-06-18 16:40:49 - SimpleLog - INFO: - update_step: 7900, online_eval_reward: -496.236
244
+ 2023-06-18 16:40:49 - SimpleLog - INFO: - Interactor 0 finished episode 79 with reward -129.262 in 200 steps
245
+ 2023-06-18 16:40:49 - SimpleLog - INFO: - Interactor 1 finished episode 80 with reward -378.180 in 200 steps
246
+ 2023-06-18 16:40:50 - SimpleLog - INFO: - update_step: 8000, online_eval_reward: -505.930
247
+ 2023-06-18 16:40:51 - SimpleLog - INFO: - update_step: 8100, online_eval_reward: -505.368
248
+ 2023-06-18 16:40:52 - SimpleLog - INFO: - Interactor 0 finished episode 81 with reward -382.849 in 200 steps
249
+ 2023-06-18 16:40:52 - SimpleLog - INFO: - Interactor 1 finished episode 82 with reward -501.592 in 200 steps
250
+ 2023-06-18 16:40:52 - SimpleLog - INFO: - update_step: 8200, online_eval_reward: -512.991
251
+ 2023-06-18 16:40:54 - SimpleLog - INFO: - update_step: 8300, online_eval_reward: -504.157
252
+ 2023-06-18 16:40:54 - SimpleLog - INFO: - Interactor 0 finished episode 83 with reward -289.496 in 200 steps
253
+ 2023-06-18 16:40:54 - SimpleLog - INFO: - Interactor 1 finished episode 84 with reward -379.101 in 200 steps
254
+ 2023-06-18 16:40:55 - SimpleLog - INFO: - update_step: 8400, online_eval_reward: -471.989
255
+ 2023-06-18 16:40:56 - SimpleLog - INFO: - update_step: 8500, online_eval_reward: -378.024
256
+ 2023-06-18 16:40:57 - SimpleLog - INFO: - Interactor 0 finished episode 85 with reward -383.413 in 200 steps
257
+ 2023-06-18 16:40:57 - SimpleLog - INFO: - Interactor 1 finished episode 86 with reward -377.348 in 200 steps
258
+ 2023-06-18 16:40:57 - SimpleLog - INFO: - update_step: 8600, online_eval_reward: -504.045
259
+ 2023-06-18 16:40:59 - SimpleLog - INFO: - update_step: 8700, online_eval_reward: -622.723
260
+ 2023-06-18 16:40:59 - SimpleLog - INFO: - Interactor 0 finished episode 87 with reward -258.051 in 200 steps
261
+ 2023-06-18 16:40:59 - SimpleLog - INFO: - Interactor 1 finished episode 88 with reward -378.418 in 200 steps
262
+ 2023-06-18 16:41:00 - SimpleLog - INFO: - update_step: 8800, online_eval_reward: -504.854
263
+ 2023-06-18 16:41:01 - SimpleLog - INFO: - update_step: 8900, online_eval_reward: -501.113
264
+ 2023-06-18 16:41:02 - SimpleLog - INFO: - Interactor 0 finished episode 89 with reward -265.978 in 200 steps
265
+ 2023-06-18 16:41:02 - SimpleLog - INFO: - Interactor 1 finished episode 90 with reward -377.551 in 200 steps
266
+ 2023-06-18 16:41:02 - SimpleLog - INFO: - update_step: 9000, online_eval_reward: -627.357
267
+ 2023-06-18 16:41:04 - SimpleLog - INFO: - update_step: 9100, online_eval_reward: -378.242
268
+ 2023-06-18 16:41:04 - SimpleLog - INFO: - Interactor 0 finished episode 91 with reward -376.627 in 200 steps
269
+ 2023-06-18 16:41:04 - SimpleLog - INFO: - Interactor 1 finished episode 92 with reward -379.475 in 200 steps
270
+ 2023-06-18 16:41:05 - SimpleLog - INFO: - update_step: 9200, online_eval_reward: -523.777
271
+ 2023-06-18 16:41:06 - SimpleLog - INFO: - update_step: 9300, online_eval_reward: -392.154
272
+ 2023-06-18 16:41:07 - SimpleLog - INFO: - Interactor 0 finished episode 93 with reward -257.879 in 200 steps
273
+ 2023-06-18 16:41:07 - SimpleLog - INFO: - Interactor 1 finished episode 94 with reward -364.066 in 200 steps
274
+ 2023-06-18 16:41:07 - SimpleLog - INFO: - update_step: 9400, online_eval_reward: -505.073
275
+ 2023-06-18 16:41:09 - SimpleLog - INFO: - update_step: 9500, online_eval_reward: -392.630
276
+ 2023-06-18 16:41:09 - SimpleLog - INFO: - Interactor 0 finished episode 95 with reward -256.403 in 200 steps
277
+ 2023-06-18 16:41:09 - SimpleLog - INFO: - Interactor 1 finished episode 96 with reward -378.919 in 200 steps
278
+ 2023-06-18 16:41:10 - SimpleLog - INFO: - update_step: 9600, online_eval_reward: -611.083
279
+ 2023-06-18 16:41:11 - SimpleLog - INFO: - update_step: 9700, online_eval_reward: -575.006
280
+ 2023-06-18 16:41:12 - SimpleLog - INFO: - Interactor 0 finished episode 97 with reward -257.988 in 200 steps
281
+ 2023-06-18 16:41:12 - SimpleLog - INFO: - Interactor 1 finished episode 98 with reward -375.953 in 200 steps
282
+ 2023-06-18 16:41:12 - SimpleLog - INFO: - update_step: 9800, online_eval_reward: -504.909
283
+ 2023-06-18 16:41:14 - SimpleLog - INFO: - update_step: 9900, online_eval_reward: -521.546
284
+ 2023-06-18 16:41:14 - SimpleLog - INFO: - Interactor 0 finished episode 99 with reward -257.814 in 200 steps
285
+ 2023-06-18 16:41:14 - SimpleLog - INFO: - Interactor 1 finished episode 100 with reward -376.679 in 200 steps
286
+ 2023-06-18 16:41:15 - SimpleLog - INFO: - update_step: 10000, online_eval_reward: -498.104
287
+ 2023-06-18 16:41:16 - SimpleLog - INFO: - update_step: 10100, online_eval_reward: -505.148
288
+ 2023-06-18 16:41:17 - SimpleLog - INFO: - Interactor 0 finished episode 101 with reward -383.380 in 200 steps
289
+ 2023-06-18 16:41:17 - SimpleLog - INFO: - Interactor 1 finished episode 102 with reward -376.594 in 200 steps
290
+ 2023-06-18 16:41:17 - SimpleLog - INFO: - update_step: 10200, online_eval_reward: -502.743
291
+ 2023-06-18 16:41:18 - SimpleLog - INFO: - update_step: 10300, online_eval_reward: -495.894
292
+ 2023-06-18 16:41:19 - SimpleLog - INFO: - Interactor 0 finished episode 103 with reward -258.215 in 200 steps
293
+ 2023-06-18 16:41:19 - SimpleLog - INFO: - Interactor 1 finished episode 104 with reward -252.459 in 200 steps
294
+ 2023-06-18 16:41:20 - SimpleLog - INFO: - update_step: 10400, online_eval_reward: -428.521
295
+ 2023-06-18 16:41:21 - SimpleLog - INFO: - update_step: 10500, online_eval_reward: -509.165
296
+ 2023-06-18 16:41:21 - SimpleLog - INFO: - Interactor 0 finished episode 105 with reward -237.254 in 200 steps
297
+ 2023-06-18 16:41:21 - SimpleLog - INFO: - Interactor 1 finished episode 106 with reward -249.686 in 200 steps
298
+ 2023-06-18 16:41:22 - SimpleLog - INFO: - update_step: 10600, online_eval_reward: -378.710
299
+ 2023-06-18 16:41:23 - SimpleLog - INFO: - update_step: 10700, online_eval_reward: -467.916
300
+ 2023-06-18 16:41:24 - SimpleLog - INFO: - Interactor 0 finished episode 107 with reward -380.161 in 200 steps
301
+ 2023-06-18 16:41:24 - SimpleLog - INFO: - Interactor 1 finished episode 108 with reward -377.340 in 200 steps
302
+ 2023-06-18 16:41:25 - SimpleLog - INFO: - update_step: 10800, online_eval_reward: -504.152
303
+ 2023-06-18 16:41:26 - SimpleLog - INFO: - update_step: 10900, online_eval_reward: -616.149
304
+ 2023-06-18 16:41:26 - SimpleLog - INFO: - Interactor 0 finished episode 109 with reward -256.526 in 200 steps
305
+ 2023-06-18 16:41:26 - SimpleLog - INFO: - Interactor 1 finished episode 110 with reward -250.052 in 200 steps
306
+ 2023-06-18 16:41:27 - SimpleLog - INFO: - update_step: 11000, online_eval_reward: -624.551
307
+ 2023-06-18 16:41:28 - SimpleLog - INFO: - update_step: 11100, online_eval_reward: -626.679
308
+ 2023-06-18 16:41:29 - SimpleLog - INFO: - Interactor 0 finished episode 111 with reward -255.673 in 200 steps
309
+ 2023-06-18 16:41:29 - SimpleLog - INFO: - Interactor 1 finished episode 112 with reward -249.592 in 200 steps
310
+ 2023-06-18 16:41:30 - SimpleLog - INFO: - update_step: 11200, online_eval_reward: -504.051
311
+ 2023-06-18 16:41:31 - SimpleLog - INFO: - update_step: 11300, online_eval_reward: -627.940
312
+ 2023-06-18 16:41:31 - SimpleLog - INFO: - Interactor 0 finished episode 113 with reward -257.611 in 200 steps
313
+ 2023-06-18 16:41:31 - SimpleLog - INFO: - Interactor 1 finished episode 114 with reward -375.881 in 200 steps
314
+ 2023-06-18 16:41:32 - SimpleLog - INFO: - update_step: 11400, online_eval_reward: -603.049
315
+ 2023-06-18 16:41:33 - SimpleLog - INFO: - update_step: 11500, online_eval_reward: -471.987
316
+ 2023-06-18 16:41:34 - SimpleLog - INFO: - Interactor 0 finished episode 115 with reward -382.501 in 200 steps
317
+ 2023-06-18 16:41:34 - SimpleLog - INFO: - Interactor 1 finished episode 116 with reward -252.038 in 200 steps
318
+ 2023-06-18 16:41:35 - SimpleLog - INFO: - update_step: 11600, online_eval_reward: -503.790
319
+ 2023-06-18 16:41:36 - SimpleLog - INFO: - update_step: 11700, online_eval_reward: -503.284
320
+ 2023-06-18 16:41:36 - SimpleLog - INFO: - Interactor 0 finished episode 117 with reward -264.405 in 200 steps
321
+ 2023-06-18 16:41:36 - SimpleLog - INFO: - Interactor 1 finished episode 118 with reward -502.513 in 200 steps
322
+ 2023-06-18 16:41:37 - SimpleLog - INFO: - update_step: 11800, online_eval_reward: -627.830
323
+ 2023-06-18 16:41:38 - SimpleLog - INFO: - update_step: 11900, online_eval_reward: -622.682
324
+ 2023-06-18 16:41:39 - SimpleLog - INFO: - Interactor 0 finished episode 119 with reward -344.907 in 200 steps
325
+ 2023-06-18 16:41:39 - SimpleLog - INFO: - Interactor 1 finished episode 120 with reward -405.190 in 200 steps
326
+ 2023-06-18 16:41:40 - SimpleLog - INFO: - update_step: 12000, online_eval_reward: -503.674
327
+ 2023-06-18 16:41:41 - SimpleLog - INFO: - update_step: 12100, online_eval_reward: -628.032
328
+ 2023-06-18 16:41:42 - SimpleLog - INFO: - Interactor 0 finished episode 121 with reward -384.215 in 200 steps
329
+ 2023-06-18 16:41:42 - SimpleLog - INFO: - Interactor 1 finished episode 122 with reward -432.613 in 200 steps
330
+ 2023-06-18 16:41:42 - SimpleLog - INFO: - update_step: 12200, online_eval_reward: -503.886
331
+ 2023-06-18 16:41:44 - SimpleLog - INFO: - update_step: 12300, online_eval_reward: -503.432
332
+ 2023-06-18 16:41:44 - SimpleLog - INFO: - Interactor 0 finished episode 123 with reward -383.556 in 200 steps
333
+ 2023-06-18 16:41:44 - SimpleLog - INFO: - Interactor 1 finished episode 124 with reward -415.292 in 200 steps
334
+ 2023-06-18 16:41:45 - SimpleLog - INFO: - update_step: 12400, online_eval_reward: -549.750
335
+ 2023-06-18 16:41:46 - SimpleLog - INFO: - update_step: 12500, online_eval_reward: -500.803
336
+ 2023-06-18 16:41:47 - SimpleLog - INFO: - Interactor 0 finished episode 125 with reward -384.129 in 200 steps
337
+ 2023-06-18 16:41:47 - SimpleLog - INFO: - Interactor 1 finished episode 126 with reward -378.644 in 200 steps
338
+ 2023-06-18 16:41:47 - SimpleLog - INFO: - update_step: 12600, online_eval_reward: -614.173
339
+ 2023-06-18 16:41:49 - SimpleLog - INFO: - update_step: 12700, online_eval_reward: -504.940
340
+ 2023-06-18 16:41:49 - SimpleLog - INFO: - Interactor 0 finished episode 127 with reward -384.349 in 200 steps
341
+ 2023-06-18 16:41:49 - SimpleLog - INFO: - Interactor 1 finished episode 128 with reward -378.360 in 200 steps
342
+ 2023-06-18 16:41:50 - SimpleLog - INFO: - update_step: 12800, online_eval_reward: -627.797
343
+ 2023-06-18 16:41:51 - SimpleLog - INFO: - update_step: 12900, online_eval_reward: -597.391
344
+ 2023-06-18 16:41:52 - SimpleLog - INFO: - Interactor 0 finished episode 129 with reward -383.978 in 200 steps
345
+ 2023-06-18 16:41:52 - SimpleLog - INFO: - Interactor 1 finished episode 130 with reward -380.662 in 200 steps
346
+ 2023-06-18 16:41:52 - SimpleLog - INFO: - update_step: 13000, online_eval_reward: -504.481
347
+ 2023-06-18 16:41:54 - SimpleLog - INFO: - update_step: 13100, online_eval_reward: -502.841
348
+ 2023-06-18 16:41:54 - SimpleLog - INFO: - Interactor 0 finished episode 131 with reward -383.082 in 200 steps
349
+ 2023-06-18 16:41:54 - SimpleLog - INFO: - Interactor 1 finished episode 132 with reward -377.854 in 200 steps
350
+ 2023-06-18 16:41:55 - SimpleLog - INFO: - update_step: 13200, online_eval_reward: -606.172
351
+ 2023-06-18 16:41:56 - SimpleLog - INFO: - update_step: 13300, online_eval_reward: -627.079
352
+ 2023-06-18 16:41:57 - SimpleLog - INFO: - Interactor 0 finished episode 133 with reward -261.498 in 200 steps
353
+ 2023-06-18 16:41:57 - SimpleLog - INFO: - Interactor 1 finished episode 134 with reward -378.427 in 200 steps
354
+ 2023-06-18 16:41:57 - SimpleLog - INFO: - update_step: 13400, online_eval_reward: -621.482
355
+ 2023-06-18 16:41:59 - SimpleLog - INFO: - update_step: 13500, online_eval_reward: -627.802
356
+ 2023-06-18 16:41:59 - SimpleLog - INFO: - Interactor 0 finished episode 135 with reward -259.911 in 200 steps
357
+ 2023-06-18 16:41:59 - SimpleLog - INFO: - Interactor 1 finished episode 136 with reward -254.313 in 200 steps
358
+ 2023-06-18 16:42:00 - SimpleLog - INFO: - update_step: 13600, online_eval_reward: -554.930
359
+ 2023-06-18 16:42:01 - SimpleLog - INFO: - update_step: 13700, online_eval_reward: -568.048
360
+ 2023-06-18 16:42:02 - SimpleLog - INFO: - Interactor 0 finished episode 137 with reward -292.799 in 200 steps
361
+ 2023-06-18 16:42:02 - SimpleLog - INFO: - Interactor 1 finished episode 138 with reward -378.388 in 200 steps
362
+ 2023-06-18 16:42:02 - SimpleLog - INFO: - update_step: 13800, online_eval_reward: -536.149
363
+ 2023-06-18 16:42:03 - SimpleLog - INFO: - update_step: 13900, online_eval_reward: -506.025
364
+ 2023-06-18 16:42:04 - SimpleLog - INFO: - Interactor 0 finished episode 139 with reward -259.192 in 200 steps
365
+ 2023-06-18 16:42:04 - SimpleLog - INFO: - Interactor 1 finished episode 140 with reward -378.459 in 200 steps
366
+ 2023-06-18 16:42:05 - SimpleLog - INFO: - update_step: 14000, online_eval_reward: -521.703
367
+ 2023-06-18 16:42:06 - SimpleLog - INFO: - update_step: 14100, online_eval_reward: -510.447
368
+ 2023-06-18 16:42:07 - SimpleLog - INFO: - Interactor 0 finished episode 141 with reward -261.268 in 200 steps
369
+ 2023-06-18 16:42:07 - SimpleLog - INFO: - Interactor 1 finished episode 142 with reward -378.171 in 200 steps
370
+ 2023-06-18 16:42:07 - SimpleLog - INFO: - update_step: 14200, online_eval_reward: -522.750
371
+ 2023-06-18 16:42:09 - SimpleLog - INFO: - update_step: 14300, online_eval_reward: -499.859
372
+ 2023-06-18 16:42:09 - SimpleLog - INFO: - Interactor 0 finished episode 143 with reward -258.966 in 200 steps
373
+ 2023-06-18 16:42:09 - SimpleLog - INFO: - Interactor 1 finished episode 144 with reward -377.578 in 200 steps
374
+ 2023-06-18 16:42:10 - SimpleLog - INFO: - update_step: 14400, online_eval_reward: -503.152
375
+ 2023-06-18 16:42:11 - SimpleLog - INFO: - update_step: 14500, online_eval_reward: -504.464
376
+ 2023-06-18 16:42:12 - SimpleLog - INFO: - Interactor 0 finished episode 145 with reward -259.010 in 200 steps
377
+ 2023-06-18 16:42:12 - SimpleLog - INFO: - Interactor 1 finished episode 146 with reward -378.754 in 200 steps
378
+ 2023-06-18 16:42:12 - SimpleLog - INFO: - update_step: 14600, online_eval_reward: -407.771
379
+ 2023-06-18 16:42:13 - SimpleLog - INFO: - update_step: 14700, online_eval_reward: -504.594
380
+ 2023-06-18 16:42:14 - SimpleLog - INFO: - Interactor 0 finished episode 147 with reward -265.800 in 200 steps
381
+ 2023-06-18 16:42:14 - SimpleLog - INFO: - Interactor 1 finished episode 148 with reward -379.020 in 200 steps
382
+ 2023-06-18 16:42:15 - SimpleLog - INFO: - update_step: 14800, online_eval_reward: -521.266
383
+ 2023-06-18 16:42:16 - SimpleLog - INFO: - update_step: 14900, online_eval_reward: -504.800
384
+ 2023-06-18 16:42:17 - SimpleLog - INFO: - Interactor 0 finished episode 149 with reward -259.717 in 200 steps
385
+ 2023-06-18 16:42:17 - SimpleLog - INFO: - Interactor 1 finished episode 150 with reward -371.442 in 200 steps
386
+ 2023-06-18 16:42:17 - SimpleLog - INFO: - update_step: 15000, online_eval_reward: -557.607
387
+ 2023-06-18 16:42:18 - SimpleLog - INFO: - update_step: 15100, online_eval_reward: -503.430
388
+ 2023-06-18 16:42:19 - SimpleLog - INFO: - Interactor 0 finished episode 151 with reward -259.884 in 200 steps
389
+ 2023-06-18 16:42:19 - SimpleLog - INFO: - Interactor 1 finished episode 152 with reward -378.490 in 200 steps
390
+ 2023-06-18 16:42:20 - SimpleLog - INFO: - update_step: 15200, online_eval_reward: -503.907
391
+ 2023-06-18 16:42:21 - SimpleLog - INFO: - update_step: 15300, online_eval_reward: -504.686
392
+ 2023-06-18 16:42:22 - SimpleLog - INFO: - Interactor 0 finished episode 153 with reward -137.131 in 200 steps
393
+ 2023-06-18 16:42:22 - SimpleLog - INFO: - Interactor 1 finished episode 154 with reward -256.148 in 200 steps
394
+ 2023-06-18 16:42:22 - SimpleLog - INFO: - update_step: 15400, online_eval_reward: -503.950
395
+ 2023-06-18 16:42:23 - SimpleLog - INFO: - update_step: 15500, online_eval_reward: -377.807
396
+ 2023-06-18 16:42:24 - SimpleLog - INFO: - Interactor 0 finished episode 155 with reward -182.937 in 200 steps
397
+ 2023-06-18 16:42:24 - SimpleLog - INFO: - Interactor 1 finished episode 156 with reward -228.954 in 200 steps
398
+ 2023-06-18 16:42:25 - SimpleLog - INFO: - update_step: 15600, online_eval_reward: -633.895
399
+ 2023-06-18 16:42:26 - SimpleLog - INFO: - update_step: 15700, online_eval_reward: -504.156
400
+ 2023-06-18 16:42:27 - SimpleLog - INFO: - Interactor 0 finished episode 157 with reward -135.201 in 200 steps
401
+ 2023-06-18 16:42:27 - SimpleLog - INFO: - Interactor 1 finished episode 158 with reward -127.702 in 200 steps
402
+ 2023-06-18 16:42:27 - SimpleLog - INFO: - update_step: 15800, online_eval_reward: -504.442
403
+ 2023-06-18 16:42:28 - SimpleLog - INFO: - update_step: 15900, online_eval_reward: -378.320
404
+ 2023-06-18 16:42:29 - SimpleLog - INFO: - Interactor 0 finished episode 159 with reward -140.012 in 200 steps
405
+ 2023-06-18 16:42:29 - SimpleLog - INFO: - Interactor 1 finished episode 160 with reward -254.871 in 200 steps
406
+ 2023-06-18 16:42:30 - SimpleLog - INFO: - update_step: 16000, online_eval_reward: -758.459
407
+ 2023-06-18 16:42:31 - SimpleLog - INFO: - update_step: 16100, online_eval_reward: -528.790
408
+ 2023-06-18 16:42:32 - SimpleLog - INFO: - Interactor 0 finished episode 161 with reward -260.244 in 200 steps
409
+ 2023-06-18 16:42:32 - SimpleLog - INFO: - Interactor 1 finished episode 162 with reward -378.559 in 200 steps
410
+ 2023-06-18 16:42:32 - SimpleLog - INFO: - update_step: 16200, online_eval_reward: -512.799
411
+ 2023-06-18 16:42:33 - SimpleLog - INFO: - update_step: 16300, online_eval_reward: -500.630
412
+ 2023-06-18 16:42:34 - SimpleLog - INFO: - Interactor 0 finished episode 163 with reward -135.817 in 200 steps
413
+ 2023-06-18 16:42:34 - SimpleLog - INFO: - Interactor 1 finished episode 164 with reward -260.366 in 200 steps
414
+ 2023-06-18 16:42:35 - SimpleLog - INFO: - update_step: 16400, online_eval_reward: -514.297
415
+ 2023-06-18 16:42:36 - SimpleLog - INFO: - update_step: 16500, online_eval_reward: -504.048
416
+ 2023-06-18 16:42:37 - SimpleLog - INFO: - Interactor 0 finished episode 165 with reward -13.423 in 200 steps
417
+ 2023-06-18 16:42:37 - SimpleLog - INFO: - Interactor 1 finished episode 166 with reward -132.449 in 200 steps
418
+ 2023-06-18 16:42:37 - SimpleLog - INFO: - update_step: 16600, online_eval_reward: -504.361
419
+ 2023-06-18 16:42:39 - SimpleLog - INFO: - update_step: 16700, online_eval_reward: -503.206
420
+ 2023-06-18 16:42:39 - SimpleLog - INFO: - Interactor 0 finished episode 167 with reward -139.457 in 200 steps
421
+ 2023-06-18 16:42:39 - SimpleLog - INFO: - Interactor 1 finished episode 168 with reward -374.078 in 200 steps
422
+ 2023-06-18 16:42:40 - SimpleLog - INFO: - update_step: 16800, online_eval_reward: -453.025
423
+ 2023-06-18 16:42:41 - SimpleLog - INFO: - update_step: 16900, online_eval_reward: -377.225
424
+ 2023-06-18 16:42:42 - SimpleLog - INFO: - Interactor 0 finished episode 169 with reward -277.210 in 200 steps
425
+ 2023-06-18 16:42:42 - SimpleLog - INFO: - Interactor 1 finished episode 170 with reward -252.749 in 200 steps
426
+ 2023-06-18 16:42:42 - SimpleLog - INFO: - update_step: 17000, online_eval_reward: -494.961
427
+ 2023-06-18 16:42:44 - SimpleLog - INFO: - update_step: 17100, online_eval_reward: -628.388
428
+ 2023-06-18 16:42:44 - SimpleLog - INFO: - Interactor 0 finished episode 171 with reward -186.962 in 200 steps
429
+ 2023-06-18 16:42:44 - SimpleLog - INFO: - Interactor 1 finished episode 172 with reward -279.426 in 200 steps
430
+ 2023-06-18 16:42:45 - SimpleLog - INFO: - update_step: 17200, online_eval_reward: -403.423
431
+ 2023-06-18 16:42:46 - SimpleLog - INFO: - update_step: 17300, online_eval_reward: -377.556
432
+ 2023-06-18 16:42:47 - SimpleLog - INFO: - Interactor 0 finished episode 173 with reward -129.997 in 200 steps
433
+ 2023-06-18 16:42:47 - SimpleLog - INFO: - Interactor 1 finished episode 174 with reward -249.593 in 200 steps
434
+ 2023-06-18 16:42:47 - SimpleLog - INFO: - update_step: 17400, online_eval_reward: -125.928
435
+ 2023-06-18 16:42:47 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -125.928, save the best model!
436
+ 2023-06-18 16:42:49 - SimpleLog - INFO: - update_step: 17500, online_eval_reward: -252.264
437
+ 2023-06-18 16:42:49 - SimpleLog - INFO: - Interactor 0 finished episode 175 with reward -333.461 in 200 steps
438
+ 2023-06-18 16:42:49 - SimpleLog - INFO: - Interactor 1 finished episode 176 with reward -380.684 in 200 steps
439
+ 2023-06-18 16:42:50 - SimpleLog - INFO: - update_step: 17600, online_eval_reward: -377.967
440
+ 2023-06-18 16:42:51 - SimpleLog - INFO: - update_step: 17700, online_eval_reward: -502.527
441
+ 2023-06-18 16:42:52 - SimpleLog - INFO: - Interactor 0 finished episode 177 with reward -382.695 in 200 steps
442
+ 2023-06-18 16:42:52 - SimpleLog - INFO: - Interactor 1 finished episode 178 with reward -377.542 in 200 steps
443
+ 2023-06-18 16:42:52 - SimpleLog - INFO: - update_step: 17800, online_eval_reward: -503.022
444
+ 2023-06-18 16:42:54 - SimpleLog - INFO: - update_step: 17900, online_eval_reward: -509.103
445
+ 2023-06-18 16:42:54 - SimpleLog - INFO: - Interactor 0 finished episode 179 with reward -382.172 in 200 steps
446
+ 2023-06-18 16:42:54 - SimpleLog - INFO: - Interactor 1 finished episode 180 with reward -500.410 in 200 steps
447
+ 2023-06-18 16:42:55 - SimpleLog - INFO: - update_step: 18000, online_eval_reward: -518.620
448
+ 2023-06-18 16:42:56 - SimpleLog - INFO: - update_step: 18100, online_eval_reward: -502.824
449
+ 2023-06-18 16:42:57 - SimpleLog - INFO: - Interactor 0 finished episode 181 with reward -381.368 in 200 steps
450
+ 2023-06-18 16:42:57 - SimpleLog - INFO: - Interactor 1 finished episode 182 with reward -500.005 in 200 steps
451
+ 2023-06-18 16:42:58 - SimpleLog - INFO: - update_step: 18200, online_eval_reward: -748.451
452
+ 2023-06-18 16:42:59 - SimpleLog - INFO: - update_step: 18300, online_eval_reward: -519.708
453
+ 2023-06-18 16:42:59 - SimpleLog - INFO: - Interactor 0 finished episode 183 with reward -382.336 in 200 steps
454
+ 2023-06-18 16:42:59 - SimpleLog - INFO: - Interactor 1 finished episode 184 with reward -377.370 in 200 steps
455
+ 2023-06-18 16:43:00 - SimpleLog - INFO: - update_step: 18400, online_eval_reward: -503.253
456
+ 2023-06-18 16:43:01 - SimpleLog - INFO: - update_step: 18500, online_eval_reward: -504.352
457
+ 2023-06-18 16:43:02 - SimpleLog - INFO: - Interactor 0 finished episode 185 with reward -381.589 in 200 steps
458
+ 2023-06-18 16:43:02 - SimpleLog - INFO: - Interactor 1 finished episode 186 with reward -498.094 in 200 steps
459
+ 2023-06-18 16:43:03 - SimpleLog - INFO: - update_step: 18600, online_eval_reward: -621.553
460
+ 2023-06-18 16:43:04 - SimpleLog - INFO: - update_step: 18700, online_eval_reward: -503.396
461
+ 2023-06-18 16:43:04 - SimpleLog - INFO: - Interactor 0 finished episode 187 with reward -381.901 in 200 steps
462
+ 2023-06-18 16:43:04 - SimpleLog - INFO: - Interactor 1 finished episode 188 with reward -500.203 in 200 steps
463
+ 2023-06-18 16:43:05 - SimpleLog - INFO: - update_step: 18800, online_eval_reward: -378.566
464
+ 2023-06-18 16:43:06 - SimpleLog - INFO: - update_step: 18900, online_eval_reward: -503.793
465
+ 2023-06-18 16:43:07 - SimpleLog - INFO: - Interactor 0 finished episode 189 with reward -381.694 in 200 steps
466
+ 2023-06-18 16:43:07 - SimpleLog - INFO: - Interactor 1 finished episode 190 with reward -500.428 in 200 steps
467
+ 2023-06-18 16:43:08 - SimpleLog - INFO: - update_step: 19000, online_eval_reward: -502.545
468
+ 2023-06-18 16:43:09 - SimpleLog - INFO: - update_step: 19100, online_eval_reward: -501.932
469
+ 2023-06-18 16:43:10 - SimpleLog - INFO: - Interactor 0 finished episode 191 with reward -268.149 in 200 steps
470
+ 2023-06-18 16:43:10 - SimpleLog - INFO: - Interactor 1 finished episode 192 with reward -376.541 in 200 steps
471
+ 2023-06-18 16:43:10 - SimpleLog - INFO: - update_step: 19200, online_eval_reward: -558.227
472
+ 2023-06-18 16:43:12 - SimpleLog - INFO: - update_step: 19300, online_eval_reward: -517.582
473
+ 2023-06-18 16:43:12 - SimpleLog - INFO: - Interactor 0 finished episode 193 with reward -259.277 in 200 steps
474
+ 2023-06-18 16:43:12 - SimpleLog - INFO: - Interactor 1 finished episode 194 with reward -377.712 in 200 steps
475
+ 2023-06-18 16:43:13 - SimpleLog - INFO: - update_step: 19400, online_eval_reward: -500.636
476
+ 2023-06-18 16:43:14 - SimpleLog - INFO: - update_step: 19500, online_eval_reward: -377.581
477
+ 2023-06-18 16:43:15 - SimpleLog - INFO: - Interactor 0 finished episode 195 with reward -258.805 in 200 steps
478
+ 2023-06-18 16:43:15 - SimpleLog - INFO: - Interactor 1 finished episode 196 with reward -374.762 in 200 steps
479
+ 2023-06-18 16:43:15 - SimpleLog - INFO: - update_step: 19600, online_eval_reward: -125.934
480
+ 2023-06-18 16:43:17 - SimpleLog - INFO: - update_step: 19700, online_eval_reward: -502.898
481
+ 2023-06-18 16:43:17 - SimpleLog - INFO: - Interactor 0 finished episode 197 with reward -134.881 in 200 steps
482
+ 2023-06-18 16:43:17 - SimpleLog - INFO: - Interactor 1 finished episode 198 with reward -274.250 in 200 steps
483
+ 2023-06-18 16:43:18 - SimpleLog - INFO: - update_step: 19800, online_eval_reward: -377.947
484
+ 2023-06-18 16:43:19 - SimpleLog - INFO: - update_step: 19900, online_eval_reward: -380.967
485
+ 2023-06-18 16:43:20 - SimpleLog - INFO: - Interactor 0 finished episode 199 with reward -131.490 in 200 steps
486
+ 2023-06-18 16:43:20 - SimpleLog - INFO: - Interactor 1 finished episode 200 with reward -254.782 in 200 steps
487
+ 2023-06-18 16:43:20 - SimpleLog - INFO: - Finish training! Time cost: 248.411 s
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6607973c1581ba4c3b8799d4376360708a60cd801b603549605dde4ac4d26aaa
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f343139d2775d5e39cc655b23e115f6180e6624cc12df75622fb1425a82a709
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7053581566e0070608f66e30f3b1a2806ff44a347e49bea615c53fb71fa80b7b
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:744e25f444f803c9287d9e4a1778778f566112f62a8730f705f72525c72e1647
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10200 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64688b841083f83a8b2c3998d3e3402f2cf928babf50cb4b4c8be1be6ee7c644
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10300 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc1df6de020679ff0466c501b4dcbd3c3522f6a25b75d0bb4dc2b1a19f41dd7
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b938f5f4cb442cddc47aa55d7fceef27a593f8b7d3b60bea04cf9da6a3234a8d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c645b696b549ff06072a9dc08e1478faee681db9dc566d69ca2a83f62475750c
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ced2e7040447f293adafcb38f7ea7903cc9ece300709f70f440c115842fa99
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb1075600544d7b6feb3fdd4703c3ea6db3f69724260eda201ccad904cc6009
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11100 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14bed910238b9f159851d3bbc21a3fc9ddc23af43406ea918fd64c2907d2e265
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20252803de7591bea432335559dc71a1b7ada510e680176edb733bc6dbbd582d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1200 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c889b18b770bd8a886aa129a1628ed420ea454dfc2704787276cc937d4098d66
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12500 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68aa800bba3c2802c5637067f57c9cca77e04058b5da96d25a7331ba49682b1b
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12700 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:424478d6a4b86e391c98ac9b137586649aa95cabf0fac752ec1ee8685a3a3819
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371ed90e0af2ee87bbfa8fee59da5efcea081f075249dfab81ae409724eeded2
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1300 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd3d70aa912612a5d055e6502f9cf009599074ac0b841ba71a3455c6d1f127d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0f609feb4677dfbe68264cbb61c49a87b8386b030969a6e4af1289e662a60c
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ddd9fd9d8a6bf55e63cc0cfb012aaaccc5848146d72c1f2c1c3ab8552823e9
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c499fcd51c0f094d6a0e3b2887bcaa98ce0cac2e923a83a28af23340d84b9d2d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1400 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e4c9dc95391267a927149672e1728991bd99e985939a2435138f7cc6c166d00
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f180ddc8d15f5412da6ee16d3de69f74befb87170ad2ceac2fcee4dcc5596276
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c08c221756645d0de274659d2435f31946a4b7a24a0b355e2354b9022f7c09c
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1500 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a943247b25e7fbc314d3fd8e0ae37fcbcfb87c4696f32a6d66f8eebdab073756
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408f74f6096fea01d95ae21f6c6c21dd785dded9e3bbaa8d4d0734a0ba3b5225
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f63f5c2abcca6135f121b189e30ead7a57f61d5b3812693a3bb49c59c46eb2
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1600 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d88a0f2d2a8778ac14e22b53f2e0d89802ad96f184afddac590b35e42234816
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a19455607f9eeb21f47874bc3f21010616e1b9c6b55d545f7e02ec889fc43830
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16700 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc08b72260f458bea0645814fe58029ea5c6bc8152a3bfcc6340660b9b0871d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1700 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4ce31ef505b8fe1bc4901f88563a581cbbe7589646aade15f98ac611452a3d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d3c01b0b87bcf3ef87a2d3004ba9ce4c435f92260c7497311a710fbc5fa9c8d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9ef7679e4048b9bd2fad9d46e5eec731287f7417f728a50e82de637df645800
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d96b95eda647674a386e958056398a06d6e0914151fd5be14db66038e67eb42d
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18400 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16c5a09e7119bf7018a03c3ce02aa73f594d77f3c11c57231bd8ab793466a7c5
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18700 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6c639431e9ffab0dc399adcc3a0efeacc59fc1623884e6e785d0660c164787
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18800 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:963ab3a1615a535fdce86220bf3dfb9a876e6b53fe8c0f66557a8f3dabc7d89f
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18900 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d765380a45d7704c4945a56bee144bd25693374b6b715f17610fc45e14304547
3
+ size 1356936
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/interact/events.out.tfevents.1687077551.ML3090.330549.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2fdc28d315e8a1ee8ff541b15b2faddec4cb3cc990850353dbc27d6aea5558d
3
+ size 19786
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/policy/events.out.tfevents.1687077551.ML3090.330549.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fdd019c428a762b1ca0b2a815d9daec2aa8b2682774e3c3236ef73182f1485b
3
+ size 5561148