johnjim0816
commited on
Commit
•
8ba2a32
1
Parent(s):
54d82df
update Cartpole-v1 PER DQN
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/config.yaml +0 -48
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/logs/log.txt +0 -54
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/models/checkpoint.pt +0 -3
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/learning_curve.png +0 -0
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/res.csv +0 -11
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/config.yaml +0 -55
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/logs/log.txt +0 -58
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/learning_curve.png +0 -0
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/res.csv +0 -11
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/config.yaml +0 -55
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/logs/log.txt +0 -58
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/models/checkpoint.pt +0 -3
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/learning_curve.png +0 -0
- ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/res.csv +0 -11
- ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/logs/log.txt +0 -57
- ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/results/trajs_0.pkl +0 -3
- ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/model/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.1 +0 -3
- ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/videos/video.gif +0 -3
- ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926 → Test_single_CartPole-v1_PER_DQN_20230518-232330}/config.yaml +17 -13
- ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/logs/log.txt +61 -0
- ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/interact/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/interact/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.0} +1 -1
- ClassControl/CartPole-v1/{Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs/events.out.tfevents.1680275682.DESKTOP-H34HQIQ.290376.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/model/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.1} +1 -1
- ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/config.yaml +0 -48
- ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/logs/log.txt +0 -260
- ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/models/checkpoint.pt +0 -3
- ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/learning_curve.png +0 -0
- ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/res.csv +0 -201
- ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/tb_logs/events.out.tfevents.1680277069.DESKTOP-H34HQIQ.305216.0 +0 -3
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/config.yaml +0 -48
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/logs/log.txt +0 -267
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/models/checkpoint.pt +0 -3
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/learning_curve.png +0 -0
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/res.csv +0 -201
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs/events.out.tfevents.1680274695.DESKTOP-H34HQIQ.317208.0 +0 -3
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/config.yaml +0 -55
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/logs/log.txt +0 -48
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/models/checkpoint.pt +0 -3
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/learning_curve.png +0 -0
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/res.csv +0 -202
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/config.yaml +0 -55
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/logs/log.txt +0 -48
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/models/checkpoint.pt +0 -3
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/learning_curve.png +0 -0
- ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/res.csv +0 -251
- ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/config.yaml +0 -45
- ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/logs/log.txt +0 -166
- ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1000 +0 -0
- ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1500 +0 -0
- ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2000 +0 -0
- ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2500 +0 -0
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/config.yaml
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PER_DQN
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_PER_DQN_20230331-225815
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
new_step_api: true
|
12 |
-
render: false
|
13 |
-
save_fig: true
|
14 |
-
seed: 1
|
15 |
-
show_fig: false
|
16 |
-
test_eps: 10
|
17 |
-
train_eps: 100
|
18 |
-
wrapper: null
|
19 |
-
algo_cfg:
|
20 |
-
batch_size: 64
|
21 |
-
buffer_size: 100000
|
22 |
-
epsilon_decay: 500
|
23 |
-
epsilon_end: 0.01
|
24 |
-
epsilon_start: 0.95
|
25 |
-
gamma: 0.99
|
26 |
-
hidden_dim: 256
|
27 |
-
lr: 0.0001
|
28 |
-
per_alpha: 0.6
|
29 |
-
per_beta: 0.4
|
30 |
-
per_beta_annealing: 0.001
|
31 |
-
per_epsilon: 0.01
|
32 |
-
target_update: 4
|
33 |
-
value_layers:
|
34 |
-
- activation: relu
|
35 |
-
layer_dim:
|
36 |
-
- n_states
|
37 |
-
- 256
|
38 |
-
layer_type: linear
|
39 |
-
- activation: relu
|
40 |
-
layer_dim:
|
41 |
-
- 256
|
42 |
-
- 256
|
43 |
-
layer_type: linear
|
44 |
-
- activation: none
|
45 |
-
layer_dim:
|
46 |
-
- 256
|
47 |
-
- n_actions
|
48 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/logs/log.txt
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
2023-03-31 23:14:42 - r - INFO: - Hyperparameters:
|
2 |
-
2023-03-31 23:14:42 - r - INFO: - ================================================================================
|
3 |
-
2023-03-31 23:14:42 - r - INFO: - Name Value Type
|
4 |
-
2023-03-31 23:14:42 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-03-31 23:14:42 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-03-31 23:14:42 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-03-31 23:14:42 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-03-31 23:14:42 - r - INFO: - algo_name PER_DQN <class 'str'>
|
9 |
-
2023-03-31 23:14:42 - r - INFO: - mode test <class 'str'>
|
10 |
-
2023-03-31 23:14:42 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-03-31 23:14:42 - r - INFO: - device cuda <class 'str'>
|
12 |
-
2023-03-31 23:14:42 - r - INFO: - train_eps 100 <class 'int'>
|
13 |
-
2023-03-31 23:14:42 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2023-03-31 23:14:42 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2023-03-31 23:14:42 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2023-03-31 23:14:42 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2023-03-31 23:14:42 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
18 |
-
2023-03-31 23:14:42 - r - INFO: - load_path Train_CartPole-v1_PER_DQN_20230331-225815 <class 'str'>
|
19 |
-
2023-03-31 23:14:42 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2023-03-31 23:14:42 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2023-03-31 23:14:42 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
22 |
-
2023-03-31 23:14:42 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
23 |
-
2023-03-31 23:14:42 - r - INFO: - epsilon_decay 500 <class 'int'>
|
24 |
-
2023-03-31 23:14:42 - r - INFO: - hidden_dim 256 <class 'int'>
|
25 |
-
2023-03-31 23:14:42 - r - INFO: - gamma 0.99 <class 'float'>
|
26 |
-
2023-03-31 23:14:42 - r - INFO: - lr 0.0001 <class 'float'>
|
27 |
-
2023-03-31 23:14:42 - r - INFO: - buffer_size 100000 <class 'int'>
|
28 |
-
2023-03-31 23:14:42 - r - INFO: - per_alpha 0.6 <class 'float'>
|
29 |
-
2023-03-31 23:14:42 - r - INFO: - per_beta 0.4 <class 'float'>
|
30 |
-
2023-03-31 23:14:42 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
|
31 |
-
2023-03-31 23:14:42 - r - INFO: - per_epsilon 0.01 <class 'float'>
|
32 |
-
2023-03-31 23:14:42 - r - INFO: - batch_size 64 <class 'int'>
|
33 |
-
2023-03-31 23:14:42 - r - INFO: - target_update 4 <class 'int'>
|
34 |
-
2023-03-31 23:14:42 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
35 |
-
2023-03-31 23:14:42 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442 <class 'str'>
|
36 |
-
2023-03-31 23:14:42 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/results <class 'str'>
|
37 |
-
2023-03-31 23:14:42 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/logs <class 'str'>
|
38 |
-
2023-03-31 23:14:42 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/traj <class 'str'>
|
39 |
-
2023-03-31 23:14:42 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs <class 'str'>
|
40 |
-
2023-03-31 23:14:42 - r - INFO: - ================================================================================
|
41 |
-
2023-03-31 23:14:42 - r - INFO: - n_states: 4, n_actions: 2
|
42 |
-
2023-03-31 23:14:43 - r - INFO: - Start testing!
|
43 |
-
2023-03-31 23:14:43 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda
|
44 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
45 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
46 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
47 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
48 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
49 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
50 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
51 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
52 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
53 |
-
2023-03-31 23:14:44 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
54 |
-
2023-03-31 23:14:44 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c438616b97ca890557a9e9b1cd42decfc5decc64e5aee660d89158290e92683d
|
3 |
-
size 272471
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/learning_curve.png
DELETED
Binary file (26.1 kB)
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/config.yaml
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PER_DQN
|
3 |
-
device: cpu
|
4 |
-
env_name: gym
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_gym_PER_DQN_20230415-215002
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
mp_backend: mp
|
12 |
-
n_workers: 1
|
13 |
-
new_step_api: true
|
14 |
-
render: false
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
-
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 200
|
21 |
-
wrapper: null
|
22 |
-
algo_cfg:
|
23 |
-
batch_size: 64
|
24 |
-
buffer_size: 100000
|
25 |
-
epsilon_decay: 500
|
26 |
-
epsilon_end: 0.01
|
27 |
-
epsilon_start: 0.95
|
28 |
-
gamma: 0.99
|
29 |
-
hidden_dim: 256
|
30 |
-
lr: 0.0001
|
31 |
-
per_alpha: 0.6
|
32 |
-
per_beta: 0.4
|
33 |
-
per_beta_annealing: 0.001
|
34 |
-
per_epsilon: 0.01
|
35 |
-
target_update: 4
|
36 |
-
value_layers:
|
37 |
-
- activation: relu
|
38 |
-
layer_dim:
|
39 |
-
- n_states
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: relu
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- 256
|
46 |
-
layer_type: linear
|
47 |
-
- activation: none
|
48 |
-
layer_dim:
|
49 |
-
- 256
|
50 |
-
- n_actions
|
51 |
-
layer_type: linear
|
52 |
-
env_cfg:
|
53 |
-
id: CartPole-v1
|
54 |
-
new_step_api: true
|
55 |
-
render_mode: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/logs/log.txt
DELETED
@@ -1,58 +0,0 @@
|
|
1 |
-
2023-04-15 21:51:47 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-15 21:51:47 - r - INFO: - ================================================================================
|
3 |
-
2023-04-15 21:51:47 - r - INFO: - Name Value Type
|
4 |
-
2023-04-15 21:51:47 - r - INFO: - env_name gym <class 'str'>
|
5 |
-
2023-04-15 21:51:47 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-15 21:51:47 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-15 21:51:47 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-15 21:51:47 - r - INFO: - render_mode None <class 'str'>
|
9 |
-
2023-04-15 21:51:47 - r - INFO: - algo_name PER_DQN <class 'str'>
|
10 |
-
2023-04-15 21:51:47 - r - INFO: - mode test <class 'str'>
|
11 |
-
2023-04-15 21:51:47 - r - INFO: - mp_backend mp <class 'str'>
|
12 |
-
2023-04-15 21:51:47 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-15 21:51:47 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-15 21:51:47 - r - INFO: - train_eps 200 <class 'int'>
|
15 |
-
2023-04-15 21:51:47 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-15 21:51:47 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-15 21:51:47 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-15 21:51:47 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-15 21:51:47 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
20 |
-
2023-04-15 21:51:47 - r - INFO: - load_path Train_gym_PER_DQN_20230415-215002 <class 'str'>
|
21 |
-
2023-04-15 21:51:47 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-15 21:51:47 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-15 21:51:47 - r - INFO: - n_workers 1 <class 'int'>
|
24 |
-
2023-04-15 21:51:47 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-15 21:51:47 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-15 21:51:47 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-15 21:51:47 - r - INFO: - hidden_dim 256 <class 'int'>
|
28 |
-
2023-04-15 21:51:47 - r - INFO: - gamma 0.99 <class 'float'>
|
29 |
-
2023-04-15 21:51:47 - r - INFO: - lr 0.0001 <class 'float'>
|
30 |
-
2023-04-15 21:51:47 - r - INFO: - buffer_size 100000 <class 'int'>
|
31 |
-
2023-04-15 21:51:47 - r - INFO: - per_alpha 0.6 <class 'float'>
|
32 |
-
2023-04-15 21:51:47 - r - INFO: - per_beta 0.4 <class 'float'>
|
33 |
-
2023-04-15 21:51:47 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
|
34 |
-
2023-04-15 21:51:47 - r - INFO: - per_epsilon 0.01 <class 'float'>
|
35 |
-
2023-04-15 21:51:47 - r - INFO: - batch_size 64 <class 'int'>
|
36 |
-
2023-04-15 21:51:47 - r - INFO: - target_update 4 <class 'int'>
|
37 |
-
2023-04-15 21:51:47 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
38 |
-
2023-04-15 21:51:47 - r - INFO: - id CartPole-v1 <class 'str'>
|
39 |
-
2023-04-15 21:51:47 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147 <class 'str'>
|
40 |
-
2023-04-15 21:51:47 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/results <class 'str'>
|
41 |
-
2023-04-15 21:51:47 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/logs <class 'str'>
|
42 |
-
2023-04-15 21:51:47 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/traj <class 'str'>
|
43 |
-
2023-04-15 21:51:47 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/videos <class 'str'>
|
44 |
-
2023-04-15 21:51:47 - r - INFO: - ================================================================================
|
45 |
-
2023-04-15 21:51:47 - r - INFO: - n_states: 4, n_actions: 2
|
46 |
-
2023-04-15 21:51:47 - r - INFO: - Start testing!
|
47 |
-
2023-04-15 21:51:47 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
|
48 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
49 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
50 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
51 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
52 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
53 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
54 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
55 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
56 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
57 |
-
2023-04-15 21:51:47 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
58 |
-
2023-04-15 21:51:47 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/learning_curve.png
DELETED
Binary file (27.1 kB)
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/config.yaml
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PER_DQN
|
3 |
-
device: cpu
|
4 |
-
env_name: gym
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_PER_DQN_ray_20230415-215738
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
mp_backend: ray
|
12 |
-
n_workers: 1
|
13 |
-
new_step_api: true
|
14 |
-
render: false
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
-
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 250
|
21 |
-
wrapper: null
|
22 |
-
algo_cfg:
|
23 |
-
batch_size: 64
|
24 |
-
buffer_size: 100000
|
25 |
-
epsilon_decay: 500
|
26 |
-
epsilon_end: 0.01
|
27 |
-
epsilon_start: 0.95
|
28 |
-
gamma: 0.99
|
29 |
-
hidden_dim: 256
|
30 |
-
lr: 0.0001
|
31 |
-
per_alpha: 0.6
|
32 |
-
per_beta: 0.4
|
33 |
-
per_beta_annealing: 0.001
|
34 |
-
per_epsilon: 0.01
|
35 |
-
target_update: 4
|
36 |
-
value_layers:
|
37 |
-
- activation: relu
|
38 |
-
layer_dim:
|
39 |
-
- n_states
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: relu
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- 256
|
46 |
-
layer_type: linear
|
47 |
-
- activation: none
|
48 |
-
layer_dim:
|
49 |
-
- 256
|
50 |
-
- n_actions
|
51 |
-
layer_type: linear
|
52 |
-
env_cfg:
|
53 |
-
id: CartPole-v1
|
54 |
-
new_step_api: true
|
55 |
-
render_mode: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/logs/log.txt
DELETED
@@ -1,58 +0,0 @@
|
|
1 |
-
2023-04-15 22:05:40 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-15 22:05:40 - r - INFO: - ================================================================================
|
3 |
-
2023-04-15 22:05:40 - r - INFO: - Name Value Type
|
4 |
-
2023-04-15 22:05:40 - r - INFO: - env_name gym <class 'str'>
|
5 |
-
2023-04-15 22:05:40 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-15 22:05:40 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-15 22:05:40 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-15 22:05:40 - r - INFO: - render_mode None <class 'str'>
|
9 |
-
2023-04-15 22:05:40 - r - INFO: - algo_name PER_DQN <class 'str'>
|
10 |
-
2023-04-15 22:05:40 - r - INFO: - mode test <class 'str'>
|
11 |
-
2023-04-15 22:05:40 - r - INFO: - mp_backend ray <class 'str'>
|
12 |
-
2023-04-15 22:05:40 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-15 22:05:40 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-15 22:05:40 - r - INFO: - train_eps 250 <class 'int'>
|
15 |
-
2023-04-15 22:05:40 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-15 22:05:40 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-15 22:05:40 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-15 22:05:40 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-15 22:05:40 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
20 |
-
2023-04-15 22:05:40 - r - INFO: - load_path Train_CartPole-v1_PER_DQN_ray_20230415-215738 <class 'str'>
|
21 |
-
2023-04-15 22:05:40 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-15 22:05:40 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-15 22:05:40 - r - INFO: - n_workers 1 <class 'int'>
|
24 |
-
2023-04-15 22:05:40 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-15 22:05:40 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-15 22:05:40 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-15 22:05:40 - r - INFO: - hidden_dim 256 <class 'int'>
|
28 |
-
2023-04-15 22:05:40 - r - INFO: - gamma 0.99 <class 'float'>
|
29 |
-
2023-04-15 22:05:40 - r - INFO: - lr 0.0001 <class 'float'>
|
30 |
-
2023-04-15 22:05:40 - r - INFO: - buffer_size 100000 <class 'int'>
|
31 |
-
2023-04-15 22:05:40 - r - INFO: - per_alpha 0.6 <class 'float'>
|
32 |
-
2023-04-15 22:05:40 - r - INFO: - per_beta 0.4 <class 'float'>
|
33 |
-
2023-04-15 22:05:40 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
|
34 |
-
2023-04-15 22:05:40 - r - INFO: - per_epsilon 0.01 <class 'float'>
|
35 |
-
2023-04-15 22:05:40 - r - INFO: - batch_size 64 <class 'int'>
|
36 |
-
2023-04-15 22:05:40 - r - INFO: - target_update 4 <class 'int'>
|
37 |
-
2023-04-15 22:05:40 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
38 |
-
2023-04-15 22:05:40 - r - INFO: - id CartPole-v1 <class 'str'>
|
39 |
-
2023-04-15 22:05:40 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540 <class 'str'>
|
40 |
-
2023-04-15 22:05:40 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/results <class 'str'>
|
41 |
-
2023-04-15 22:05:40 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/logs <class 'str'>
|
42 |
-
2023-04-15 22:05:40 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/traj <class 'str'>
|
43 |
-
2023-04-15 22:05:40 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/videos <class 'str'>
|
44 |
-
2023-04-15 22:05:40 - r - INFO: - ================================================================================
|
45 |
-
2023-04-15 22:05:40 - r - INFO: - n_states: 4, n_actions: 2
|
46 |
-
2023-04-15 22:05:40 - r - INFO: - Start testing!
|
47 |
-
2023-04-15 22:05:40 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
|
48 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
49 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 2/10, Reward: 199.000, Step: 199
|
50 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
51 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
52 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
53 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
54 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
55 |
-
2023-04-15 22:05:40 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
56 |
-
2023-04-15 22:05:41 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
57 |
-
2023-04-15 22:05:41 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
58 |
-
2023-04-15 22:05:41 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0efe3ec576afef2311748067e61af0fe6c939f7a2c2a1500001987a5d0092ce3
|
3 |
-
size 272407
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/learning_curve.png
DELETED
Binary file (32.6 kB)
|
|
ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,199.0,199
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/logs/log.txt
DELETED
@@ -1,57 +0,0 @@
|
|
1 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - General Configs:
|
2 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
|
3 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type
|
4 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - algo_name DQN <class 'str'>
|
6 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - mode test <class 'str'>
|
7 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - collect_traj 1 <class 'bool'>
|
8 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
9 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - n_workers 1 <class 'int'>
|
10 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - device cpu <class 'str'>
|
12 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - max_episode 10 <class 'int'>
|
13 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
14 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
|
17 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
|
18 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - show_fig 0 <class 'bool'>
|
19 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - save_fig 1 <class 'bool'>
|
20 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
21 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
22 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
|
23 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - Algo Configs:
|
24 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
|
25 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type
|
26 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
27 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
28 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
29 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
|
30 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
31 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
32 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
33 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
34 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
35 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
36 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
|
37 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - Env Configs:
|
38 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
|
39 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type
|
40 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
41 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - render_mode rgb_array <class 'str'>
|
42 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - wrapper None <class 'str'>
|
43 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
44 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================
|
45 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
46 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - Start testing!
|
47 |
-
2023-05-15 21:19:26 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
|
48 |
-
2023-05-15 21:19:30 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
|
49 |
-
2023-05-15 21:19:30 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
|
50 |
-
2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
|
51 |
-
2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
|
52 |
-
2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
|
53 |
-
2023-05-15 21:19:32 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
|
54 |
-
2023-05-15 21:19:32 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
|
55 |
-
2023-05-15 21:19:33 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
|
56 |
-
2023-05-15 21:19:33 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
|
57 |
-
2023-05-15 21:19:33 - SimpleLog - INFO: - Finish testing! total time consumed: 7.28s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/results/trajs_0.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0fae5aa5ceb51833f761621229159f743bbc8e8a6766007136b3f2af48a1a001
|
3 |
-
size 130746
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/model/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.1
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:778bf4752bfe3bce34855fa51be3e7fdeb15c8d13d02779f6ba433435fa2fdf4
|
3 |
-
size 40
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/videos/video.gif
DELETED
Git LFS Details
|
ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926 → Test_single_CartPole-v1_PER_DQN_20230518-232330}/config.yaml
RENAMED
@@ -1,39 +1,43 @@
|
|
1 |
general_cfg:
|
2 |
-
algo_name:
|
3 |
-
collect_traj:
|
4 |
-
device:
|
5 |
env_name: gym
|
6 |
load_checkpoint: true
|
7 |
load_model_step: best
|
8 |
-
load_path: Train_single_CartPole-
|
9 |
max_episode: 10
|
10 |
max_step: 200
|
11 |
mode: test
|
12 |
model_save_fre: 500
|
13 |
mp_backend: single
|
14 |
-
|
|
|
15 |
online_eval: true
|
16 |
online_eval_episode: 10
|
17 |
-
save_fig: true
|
18 |
seed: 1
|
19 |
-
|
20 |
algo_cfg:
|
21 |
batch_size: 64
|
22 |
buffer_size: 100000
|
23 |
-
buffer_type:
|
24 |
-
epsilon_decay:
|
25 |
epsilon_end: 0.01
|
26 |
epsilon_start: 0.95
|
27 |
-
gamma: 0.
|
28 |
lr: 0.0001
|
|
|
|
|
|
|
|
|
29 |
target_update: 4
|
30 |
value_layers:
|
31 |
- activation: relu
|
32 |
-
|
33 |
- 256
|
34 |
layer_type: linear
|
35 |
- activation: relu
|
36 |
-
|
37 |
- 256
|
38 |
layer_type: linear
|
39 |
env_cfg:
|
@@ -41,5 +45,5 @@ env_cfg:
|
|
41 |
ignore_params:
|
42 |
- wrapper
|
43 |
- ignore_params
|
44 |
-
render_mode:
|
45 |
wrapper: null
|
|
|
1 |
general_cfg:
|
2 |
+
algo_name: PER_DQN
|
3 |
+
collect_traj: false
|
4 |
+
device: cuda
|
5 |
env_name: gym
|
6 |
load_checkpoint: true
|
7 |
load_model_step: best
|
8 |
+
load_path: Train_single_CartPole-v1_PER_DQN_20230518-232215
|
9 |
max_episode: 10
|
10 |
max_step: 200
|
11 |
mode: test
|
12 |
model_save_fre: 500
|
13 |
mp_backend: single
|
14 |
+
n_learners: 1
|
15 |
+
n_workers: 2
|
16 |
online_eval: true
|
17 |
online_eval_episode: 10
|
|
|
18 |
seed: 1
|
19 |
+
share_buffer: true
|
20 |
algo_cfg:
|
21 |
batch_size: 64
|
22 |
buffer_size: 100000
|
23 |
+
buffer_type: PER_QUE
|
24 |
+
epsilon_decay: 1000
|
25 |
epsilon_end: 0.01
|
26 |
epsilon_start: 0.95
|
27 |
+
gamma: 0.99
|
28 |
lr: 0.0001
|
29 |
+
per_alpha: 0.6
|
30 |
+
per_beta: 0.4
|
31 |
+
per_beta_annealing: 0.001
|
32 |
+
per_epsilon: 0.01
|
33 |
target_update: 4
|
34 |
value_layers:
|
35 |
- activation: relu
|
36 |
+
layer_size:
|
37 |
- 256
|
38 |
layer_type: linear
|
39 |
- activation: relu
|
40 |
+
layer_size:
|
41 |
- 256
|
42 |
layer_type: linear
|
43 |
env_cfg:
|
|
|
45 |
ignore_params:
|
46 |
- wrapper
|
47 |
- ignore_params
|
48 |
+
render_mode: null
|
49 |
wrapper: null
|
ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/logs/log.txt
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - General Configs:
|
2 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
|
3 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type
|
4 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - algo_name PER_DQN <class 'str'>
|
6 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - mode test <class 'str'>
|
7 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - device cuda <class 'str'>
|
8 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - seed 1 <class 'int'>
|
9 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - max_episode 10 <class 'int'>
|
10 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
11 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
12 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
13 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
14 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - n_learners 1 <class 'int'>
|
15 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - share_buffer 1 <class 'bool'>
|
16 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
17 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
18 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
19 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
|
20 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_PER_DQN_20230518-232215 <class 'str'>
|
21 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
22 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
|
23 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - Algo Configs:
|
24 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
|
25 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type
|
26 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
27 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
28 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_decay 1000 <class 'int'>
|
29 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
30 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
31 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - buffer_type PER_QUE <class 'str'>
|
32 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
33 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - per_alpha 0.6 <class 'float'>
|
34 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - per_beta 0.4 <class 'float'>
|
35 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - per_beta_annealing 0.001 <class 'float'>
|
36 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - per_epsilon 0.01 <class 'float'>
|
37 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
38 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
39 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
|
40 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
|
41 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - Env Configs:
|
42 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
|
43 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type
|
44 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
45 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - render_mode None <class 'str'>
|
46 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - wrapper None <class 'str'>
|
47 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
48 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================
|
49 |
+
2023-05-18 23:23:30 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
50 |
+
2023-05-18 23:23:31 - SimpleLog - INFO: - Start testing!
|
51 |
+
2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
|
52 |
+
2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
|
53 |
+
2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
|
54 |
+
2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
|
55 |
+
2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
|
56 |
+
2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
|
57 |
+
2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
|
58 |
+
2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
|
59 |
+
2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
|
60 |
+
2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
|
61 |
+
2023-05-18 23:23:33 - SimpleLog - INFO: - Finish testing! total time consumed: 2.60s
|
ClassControl/CartPole-v1/{Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/interact/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/interact/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1056
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc3ae7a23e9f72498a6b6190433f9925fc02af039e03defef47a6fda915a140c
|
3 |
size 1056
|
ClassControl/CartPole-v1/{Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs/events.out.tfevents.1680275682.DESKTOP-H34HQIQ.290376.0 → Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/model/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.1}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70fea5313fdd99c138f919e97e5556cb1d9e4370727b560fe1eeb6469d023588
|
3 |
size 40
|
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/config.yaml
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PER_DQN
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_PER_DQN
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
new_step_api: true
|
12 |
-
render: false
|
13 |
-
save_fig: true
|
14 |
-
seed: 1
|
15 |
-
show_fig: false
|
16 |
-
test_eps: 10
|
17 |
-
train_eps: 200
|
18 |
-
wrapper: null
|
19 |
-
algo_cfg:
|
20 |
-
batch_size: 64
|
21 |
-
buffer_size: 100000
|
22 |
-
epsilon_decay: 500
|
23 |
-
epsilon_end: 0.01
|
24 |
-
epsilon_start: 0.95
|
25 |
-
gamma: 0.99
|
26 |
-
hidden_dim: 256
|
27 |
-
lr: 0.0001
|
28 |
-
per_alpha: 0.6
|
29 |
-
per_beta: 0.4
|
30 |
-
per_beta_annealing: 0.001
|
31 |
-
per_epsilon: 0.01
|
32 |
-
target_update: 4
|
33 |
-
value_layers:
|
34 |
-
- activation: relu
|
35 |
-
layer_dim:
|
36 |
-
- n_states
|
37 |
-
- 256
|
38 |
-
layer_type: linear
|
39 |
-
- activation: relu
|
40 |
-
layer_dim:
|
41 |
-
- 256
|
42 |
-
- 256
|
43 |
-
layer_type: linear
|
44 |
-
- activation: none
|
45 |
-
layer_dim:
|
46 |
-
- 256
|
47 |
-
- n_actions
|
48 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/logs/log.txt
DELETED
@@ -1,260 +0,0 @@
|
|
1 |
-
2023-03-31 23:37:49 - r - INFO: - Hyperparameters:
|
2 |
-
2023-03-31 23:37:49 - r - INFO: - ================================================================================
|
3 |
-
2023-03-31 23:37:49 - r - INFO: - Name Value Type
|
4 |
-
2023-03-31 23:37:49 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-03-31 23:37:49 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-03-31 23:37:49 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-03-31 23:37:49 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-03-31 23:37:49 - r - INFO: - algo_name PER_DQN <class 'str'>
|
9 |
-
2023-03-31 23:37:49 - r - INFO: - mode train <class 'str'>
|
10 |
-
2023-03-31 23:37:49 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-03-31 23:37:49 - r - INFO: - device cuda <class 'str'>
|
12 |
-
2023-03-31 23:37:49 - r - INFO: - train_eps 200 <class 'int'>
|
13 |
-
2023-03-31 23:37:49 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2023-03-31 23:37:49 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2023-03-31 23:37:49 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2023-03-31 23:37:49 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2023-03-31 23:37:49 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
-
2023-03-31 23:37:49 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
|
19 |
-
2023-03-31 23:37:49 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2023-03-31 23:37:49 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2023-03-31 23:37:49 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
22 |
-
2023-03-31 23:37:49 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
23 |
-
2023-03-31 23:37:49 - r - INFO: - epsilon_decay 500 <class 'int'>
|
24 |
-
2023-03-31 23:37:49 - r - INFO: - hidden_dim 256 <class 'int'>
|
25 |
-
2023-03-31 23:37:49 - r - INFO: - gamma 0.99 <class 'float'>
|
26 |
-
2023-03-31 23:37:49 - r - INFO: - lr 0.0001 <class 'float'>
|
27 |
-
2023-03-31 23:37:49 - r - INFO: - buffer_size 100000 <class 'int'>
|
28 |
-
2023-03-31 23:37:49 - r - INFO: - per_alpha 0.6 <class 'float'>
|
29 |
-
2023-03-31 23:37:49 - r - INFO: - per_beta 0.4 <class 'float'>
|
30 |
-
2023-03-31 23:37:49 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
|
31 |
-
2023-03-31 23:37:49 - r - INFO: - per_epsilon 0.01 <class 'float'>
|
32 |
-
2023-03-31 23:37:49 - r - INFO: - batch_size 64 <class 'int'>
|
33 |
-
2023-03-31 23:37:49 - r - INFO: - target_update 4 <class 'int'>
|
34 |
-
2023-03-31 23:37:49 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
35 |
-
2023-03-31 23:37:49 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749 <class 'str'>
|
36 |
-
2023-03-31 23:37:49 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/results <class 'str'>
|
37 |
-
2023-03-31 23:37:49 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/logs <class 'str'>
|
38 |
-
2023-03-31 23:37:49 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/traj <class 'str'>
|
39 |
-
2023-03-31 23:37:49 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/tb_logs <class 'str'>
|
40 |
-
2023-03-31 23:37:49 - r - INFO: - ================================================================================
|
41 |
-
2023-03-31 23:37:49 - r - INFO: - n_states: 4, n_actions: 2
|
42 |
-
2023-03-31 23:37:50 - r - INFO: - Start training!
|
43 |
-
2023-03-31 23:37:50 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda
|
44 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 1/200, Reward: 16.000, Step: 16
|
45 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 2/200, Reward: 15.000, Step: 15
|
46 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 3/200, Reward: 25.000, Step: 25
|
47 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 4/200, Reward: 16.000, Step: 16
|
48 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 5/200, Reward: 20.000, Step: 20
|
49 |
-
2023-03-31 23:37:51 - r - INFO: - Current episode 5 has the best eval reward: 9.000
|
50 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 6/200, Reward: 10.000, Step: 10
|
51 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 7/200, Reward: 24.000, Step: 24
|
52 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 8/200, Reward: 20.000, Step: 20
|
53 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 9/200, Reward: 20.000, Step: 20
|
54 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 10/200, Reward: 25.000, Step: 25
|
55 |
-
2023-03-31 23:37:51 - r - INFO: - Current episode 10 has the best eval reward: 9.100
|
56 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 11/200, Reward: 9.000, Step: 9
|
57 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 12/200, Reward: 23.000, Step: 23
|
58 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 13/200, Reward: 14.000, Step: 14
|
59 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 14/200, Reward: 12.000, Step: 12
|
60 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 15/200, Reward: 11.000, Step: 11
|
61 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 16/200, Reward: 17.000, Step: 17
|
62 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 17/200, Reward: 10.000, Step: 10
|
63 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 18/200, Reward: 17.000, Step: 17
|
64 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 19/200, Reward: 10.000, Step: 10
|
65 |
-
2023-03-31 23:37:51 - r - INFO: - Episode: 20/200, Reward: 10.000, Step: 10
|
66 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 21/200, Reward: 22.000, Step: 22
|
67 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 22/200, Reward: 18.000, Step: 18
|
68 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 23/200, Reward: 13.000, Step: 13
|
69 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 24/200, Reward: 13.000, Step: 13
|
70 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 25/200, Reward: 9.000, Step: 9
|
71 |
-
2023-03-31 23:37:52 - r - INFO: - Current episode 25 has the best eval reward: 9.600
|
72 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 26/200, Reward: 10.000, Step: 10
|
73 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 27/200, Reward: 13.000, Step: 13
|
74 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 28/200, Reward: 11.000, Step: 11
|
75 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 29/200, Reward: 10.000, Step: 10
|
76 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 30/200, Reward: 12.000, Step: 12
|
77 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 31/200, Reward: 14.000, Step: 14
|
78 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 32/200, Reward: 11.000, Step: 11
|
79 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 33/200, Reward: 18.000, Step: 18
|
80 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 34/200, Reward: 10.000, Step: 10
|
81 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 35/200, Reward: 10.000, Step: 10
|
82 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 36/200, Reward: 8.000, Step: 8
|
83 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 37/200, Reward: 12.000, Step: 12
|
84 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 38/200, Reward: 10.000, Step: 10
|
85 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 39/200, Reward: 11.000, Step: 11
|
86 |
-
2023-03-31 23:37:52 - r - INFO: - Episode: 40/200, Reward: 10.000, Step: 10
|
87 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 41/200, Reward: 9.000, Step: 9
|
88 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 42/200, Reward: 12.000, Step: 12
|
89 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 43/200, Reward: 9.000, Step: 9
|
90 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 44/200, Reward: 13.000, Step: 13
|
91 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 45/200, Reward: 13.000, Step: 13
|
92 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 46/200, Reward: 12.000, Step: 12
|
93 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 47/200, Reward: 10.000, Step: 10
|
94 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 48/200, Reward: 10.000, Step: 10
|
95 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 49/200, Reward: 10.000, Step: 10
|
96 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 50/200, Reward: 13.000, Step: 13
|
97 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 51/200, Reward: 10.000, Step: 10
|
98 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 52/200, Reward: 15.000, Step: 15
|
99 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 53/200, Reward: 18.000, Step: 18
|
100 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 54/200, Reward: 18.000, Step: 18
|
101 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 55/200, Reward: 16.000, Step: 16
|
102 |
-
2023-03-31 23:37:53 - r - INFO: - Current episode 55 has the best eval reward: 28.000
|
103 |
-
2023-03-31 23:37:53 - r - INFO: - Episode: 56/200, Reward: 47.000, Step: 47
|
104 |
-
2023-03-31 23:37:54 - r - INFO: - Episode: 57/200, Reward: 87.000, Step: 87
|
105 |
-
2023-03-31 23:37:54 - r - INFO: - Episode: 58/200, Reward: 20.000, Step: 20
|
106 |
-
2023-03-31 23:37:54 - r - INFO: - Episode: 59/200, Reward: 47.000, Step: 47
|
107 |
-
2023-03-31 23:37:54 - r - INFO: - Episode: 60/200, Reward: 17.000, Step: 17
|
108 |
-
2023-03-31 23:37:54 - r - INFO: - Episode: 61/200, Reward: 37.000, Step: 37
|
109 |
-
2023-03-31 23:37:54 - r - INFO: - Episode: 62/200, Reward: 43.000, Step: 43
|
110 |
-
2023-03-31 23:37:54 - r - INFO: - Episode: 63/200, Reward: 33.000, Step: 33
|
111 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 64/200, Reward: 18.000, Step: 18
|
112 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 65/200, Reward: 29.000, Step: 29
|
113 |
-
2023-03-31 23:37:55 - r - INFO: - Current episode 65 has the best eval reward: 30.700
|
114 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 66/200, Reward: 30.000, Step: 30
|
115 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 67/200, Reward: 23.000, Step: 23
|
116 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 68/200, Reward: 26.000, Step: 26
|
117 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 69/200, Reward: 18.000, Step: 18
|
118 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 70/200, Reward: 20.000, Step: 20
|
119 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 71/200, Reward: 26.000, Step: 26
|
120 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 72/200, Reward: 16.000, Step: 16
|
121 |
-
2023-03-31 23:37:55 - r - INFO: - Episode: 73/200, Reward: 23.000, Step: 23
|
122 |
-
2023-03-31 23:37:56 - r - INFO: - Episode: 74/200, Reward: 30.000, Step: 30
|
123 |
-
2023-03-31 23:37:56 - r - INFO: - Episode: 75/200, Reward: 23.000, Step: 23
|
124 |
-
2023-03-31 23:37:56 - r - INFO: - Episode: 76/200, Reward: 26.000, Step: 26
|
125 |
-
2023-03-31 23:37:56 - r - INFO: - Episode: 77/200, Reward: 34.000, Step: 34
|
126 |
-
2023-03-31 23:37:56 - r - INFO: - Episode: 78/200, Reward: 29.000, Step: 29
|
127 |
-
2023-03-31 23:37:56 - r - INFO: - Episode: 79/200, Reward: 32.000, Step: 32
|
128 |
-
2023-03-31 23:37:56 - r - INFO: - Episode: 80/200, Reward: 23.000, Step: 23
|
129 |
-
2023-03-31 23:37:57 - r - INFO: - Episode: 81/200, Reward: 32.000, Step: 32
|
130 |
-
2023-03-31 23:37:57 - r - INFO: - Episode: 82/200, Reward: 72.000, Step: 72
|
131 |
-
2023-03-31 23:37:57 - r - INFO: - Episode: 83/200, Reward: 105.000, Step: 105
|
132 |
-
2023-03-31 23:37:58 - r - INFO: - Episode: 84/200, Reward: 63.000, Step: 63
|
133 |
-
2023-03-31 23:37:58 - r - INFO: - Episode: 85/200, Reward: 119.000, Step: 119
|
134 |
-
2023-03-31 23:37:59 - r - INFO: - Current episode 85 has the best eval reward: 86.500
|
135 |
-
2023-03-31 23:37:59 - r - INFO: - Episode: 86/200, Reward: 52.000, Step: 52
|
136 |
-
2023-03-31 23:37:59 - r - INFO: - Episode: 87/200, Reward: 155.000, Step: 155
|
137 |
-
2023-03-31 23:38:00 - r - INFO: - Episode: 88/200, Reward: 79.000, Step: 79
|
138 |
-
2023-03-31 23:38:00 - r - INFO: - Episode: 89/200, Reward: 44.000, Step: 44
|
139 |
-
2023-03-31 23:38:00 - r - INFO: - Episode: 90/200, Reward: 140.000, Step: 140
|
140 |
-
2023-03-31 23:38:01 - r - INFO: - Episode: 91/200, Reward: 86.000, Step: 86
|
141 |
-
2023-03-31 23:38:01 - r - INFO: - Episode: 92/200, Reward: 183.000, Step: 183
|
142 |
-
2023-03-31 23:38:02 - r - INFO: - Episode: 93/200, Reward: 112.000, Step: 112
|
143 |
-
2023-03-31 23:38:03 - r - INFO: - Episode: 94/200, Reward: 190.000, Step: 190
|
144 |
-
2023-03-31 23:38:03 - r - INFO: - Episode: 95/200, Reward: 200.000, Step: 200
|
145 |
-
2023-03-31 23:38:04 - r - INFO: - Current episode 95 has the best eval reward: 164.200
|
146 |
-
2023-03-31 23:38:05 - r - INFO: - Episode: 96/200, Reward: 157.000, Step: 157
|
147 |
-
2023-03-31 23:38:05 - r - INFO: - Episode: 97/200, Reward: 200.000, Step: 200
|
148 |
-
2023-03-31 23:38:06 - r - INFO: - Episode: 98/200, Reward: 200.000, Step: 200
|
149 |
-
2023-03-31 23:38:07 - r - INFO: - Episode: 99/200, Reward: 200.000, Step: 200
|
150 |
-
2023-03-31 23:38:08 - r - INFO: - Episode: 100/200, Reward: 200.000, Step: 200
|
151 |
-
2023-03-31 23:38:08 - r - INFO: - Current episode 100 has the best eval reward: 200.000
|
152 |
-
2023-03-31 23:38:09 - r - INFO: - Episode: 101/200, Reward: 200.000, Step: 200
|
153 |
-
2023-03-31 23:38:10 - r - INFO: - Episode: 102/200, Reward: 200.000, Step: 200
|
154 |
-
2023-03-31 23:38:11 - r - INFO: - Episode: 103/200, Reward: 200.000, Step: 200
|
155 |
-
2023-03-31 23:38:12 - r - INFO: - Episode: 104/200, Reward: 200.000, Step: 200
|
156 |
-
2023-03-31 23:38:12 - r - INFO: - Episode: 105/200, Reward: 200.000, Step: 200
|
157 |
-
2023-03-31 23:38:13 - r - INFO: - Current episode 105 has the best eval reward: 200.000
|
158 |
-
2023-03-31 23:38:14 - r - INFO: - Episode: 106/200, Reward: 200.000, Step: 200
|
159 |
-
2023-03-31 23:38:15 - r - INFO: - Episode: 107/200, Reward: 200.000, Step: 200
|
160 |
-
2023-03-31 23:38:16 - r - INFO: - Episode: 108/200, Reward: 200.000, Step: 200
|
161 |
-
2023-03-31 23:38:17 - r - INFO: - Episode: 109/200, Reward: 200.000, Step: 200
|
162 |
-
2023-03-31 23:38:17 - r - INFO: - Episode: 110/200, Reward: 200.000, Step: 200
|
163 |
-
2023-03-31 23:38:19 - r - INFO: - Episode: 111/200, Reward: 200.000, Step: 200
|
164 |
-
2023-03-31 23:38:20 - r - INFO: - Episode: 112/200, Reward: 200.000, Step: 200
|
165 |
-
2023-03-31 23:38:21 - r - INFO: - Episode: 113/200, Reward: 200.000, Step: 200
|
166 |
-
2023-03-31 23:38:22 - r - INFO: - Episode: 114/200, Reward: 200.000, Step: 200
|
167 |
-
2023-03-31 23:38:23 - r - INFO: - Episode: 115/200, Reward: 190.000, Step: 190
|
168 |
-
2023-03-31 23:38:24 - r - INFO: - Episode: 116/200, Reward: 200.000, Step: 200
|
169 |
-
2023-03-31 23:38:25 - r - INFO: - Episode: 117/200, Reward: 200.000, Step: 200
|
170 |
-
2023-03-31 23:38:26 - r - INFO: - Episode: 118/200, Reward: 200.000, Step: 200
|
171 |
-
2023-03-31 23:38:27 - r - INFO: - Episode: 119/200, Reward: 200.000, Step: 200
|
172 |
-
2023-03-31 23:38:28 - r - INFO: - Episode: 120/200, Reward: 200.000, Step: 200
|
173 |
-
2023-03-31 23:38:28 - r - INFO: - Current episode 120 has the best eval reward: 200.000
|
174 |
-
2023-03-31 23:38:29 - r - INFO: - Episode: 121/200, Reward: 200.000, Step: 200
|
175 |
-
2023-03-31 23:38:30 - r - INFO: - Episode: 122/200, Reward: 200.000, Step: 200
|
176 |
-
2023-03-31 23:38:31 - r - INFO: - Episode: 123/200, Reward: 200.000, Step: 200
|
177 |
-
2023-03-31 23:38:32 - r - INFO: - Episode: 124/200, Reward: 198.000, Step: 198
|
178 |
-
2023-03-31 23:38:33 - r - INFO: - Episode: 125/200, Reward: 200.000, Step: 200
|
179 |
-
2023-03-31 23:38:35 - r - INFO: - Episode: 126/200, Reward: 188.000, Step: 188
|
180 |
-
2023-03-31 23:38:36 - r - INFO: - Episode: 127/200, Reward: 200.000, Step: 200
|
181 |
-
2023-03-31 23:38:37 - r - INFO: - Episode: 128/200, Reward: 200.000, Step: 200
|
182 |
-
2023-03-31 23:38:38 - r - INFO: - Episode: 129/200, Reward: 175.000, Step: 175
|
183 |
-
2023-03-31 23:38:39 - r - INFO: - Episode: 130/200, Reward: 200.000, Step: 200
|
184 |
-
2023-03-31 23:38:41 - r - INFO: - Episode: 131/200, Reward: 200.000, Step: 200
|
185 |
-
2023-03-31 23:38:42 - r - INFO: - Episode: 132/200, Reward: 172.000, Step: 172
|
186 |
-
2023-03-31 23:38:43 - r - INFO: - Episode: 133/200, Reward: 200.000, Step: 200
|
187 |
-
2023-03-31 23:38:44 - r - INFO: - Episode: 134/200, Reward: 200.000, Step: 200
|
188 |
-
2023-03-31 23:38:45 - r - INFO: - Episode: 135/200, Reward: 179.000, Step: 179
|
189 |
-
2023-03-31 23:38:46 - r - INFO: - Episode: 136/200, Reward: 200.000, Step: 200
|
190 |
-
2023-03-31 23:38:47 - r - INFO: - Episode: 137/200, Reward: 200.000, Step: 200
|
191 |
-
2023-03-31 23:38:49 - r - INFO: - Episode: 138/200, Reward: 200.000, Step: 200
|
192 |
-
2023-03-31 23:38:49 - r - INFO: - Episode: 139/200, Reward: 161.000, Step: 161
|
193 |
-
2023-03-31 23:38:51 - r - INFO: - Episode: 140/200, Reward: 200.000, Step: 200
|
194 |
-
2023-03-31 23:38:52 - r - INFO: - Episode: 141/200, Reward: 150.000, Step: 150
|
195 |
-
2023-03-31 23:38:53 - r - INFO: - Episode: 142/200, Reward: 200.000, Step: 200
|
196 |
-
2023-03-31 23:38:54 - r - INFO: - Episode: 143/200, Reward: 200.000, Step: 200
|
197 |
-
2023-03-31 23:38:55 - r - INFO: - Episode: 144/200, Reward: 170.000, Step: 170
|
198 |
-
2023-03-31 23:38:56 - r - INFO: - Episode: 145/200, Reward: 200.000, Step: 200
|
199 |
-
2023-03-31 23:38:58 - r - INFO: - Episode: 146/200, Reward: 200.000, Step: 200
|
200 |
-
2023-03-31 23:38:59 - r - INFO: - Episode: 147/200, Reward: 160.000, Step: 160
|
201 |
-
2023-03-31 23:39:00 - r - INFO: - Episode: 148/200, Reward: 160.000, Step: 160
|
202 |
-
2023-03-31 23:39:01 - r - INFO: - Episode: 149/200, Reward: 200.000, Step: 200
|
203 |
-
2023-03-31 23:39:02 - r - INFO: - Episode: 150/200, Reward: 200.000, Step: 200
|
204 |
-
2023-03-31 23:39:04 - r - INFO: - Episode: 151/200, Reward: 177.000, Step: 177
|
205 |
-
2023-03-31 23:39:05 - r - INFO: - Episode: 152/200, Reward: 193.000, Step: 193
|
206 |
-
2023-03-31 23:39:06 - r - INFO: - Episode: 153/200, Reward: 182.000, Step: 182
|
207 |
-
2023-03-31 23:39:08 - r - INFO: - Episode: 154/200, Reward: 176.000, Step: 176
|
208 |
-
2023-03-31 23:39:09 - r - INFO: - Episode: 155/200, Reward: 200.000, Step: 200
|
209 |
-
2023-03-31 23:39:11 - r - INFO: - Episode: 156/200, Reward: 200.000, Step: 200
|
210 |
-
2023-03-31 23:39:12 - r - INFO: - Episode: 157/200, Reward: 171.000, Step: 171
|
211 |
-
2023-03-31 23:39:13 - r - INFO: - Episode: 158/200, Reward: 192.000, Step: 192
|
212 |
-
2023-03-31 23:39:14 - r - INFO: - Episode: 159/200, Reward: 200.000, Step: 200
|
213 |
-
2023-03-31 23:39:15 - r - INFO: - Episode: 160/200, Reward: 179.000, Step: 179
|
214 |
-
2023-03-31 23:39:17 - r - INFO: - Episode: 161/200, Reward: 177.000, Step: 177
|
215 |
-
2023-03-31 23:39:18 - r - INFO: - Episode: 162/200, Reward: 199.000, Step: 199
|
216 |
-
2023-03-31 23:39:19 - r - INFO: - Episode: 163/200, Reward: 200.000, Step: 200
|
217 |
-
2023-03-31 23:39:21 - r - INFO: - Episode: 164/200, Reward: 186.000, Step: 186
|
218 |
-
2023-03-31 23:39:22 - r - INFO: - Episode: 165/200, Reward: 178.000, Step: 178
|
219 |
-
2023-03-31 23:39:23 - r - INFO: - Episode: 166/200, Reward: 200.000, Step: 200
|
220 |
-
2023-03-31 23:39:25 - r - INFO: - Episode: 167/200, Reward: 200.000, Step: 200
|
221 |
-
2023-03-31 23:39:26 - r - INFO: - Episode: 168/200, Reward: 200.000, Step: 200
|
222 |
-
2023-03-31 23:39:27 - r - INFO: - Episode: 169/200, Reward: 179.000, Step: 179
|
223 |
-
2023-03-31 23:39:29 - r - INFO: - Episode: 170/200, Reward: 200.000, Step: 200
|
224 |
-
2023-03-31 23:39:31 - r - INFO: - Episode: 171/200, Reward: 200.000, Step: 200
|
225 |
-
2023-03-31 23:39:32 - r - INFO: - Episode: 172/200, Reward: 200.000, Step: 200
|
226 |
-
2023-03-31 23:39:34 - r - INFO: - Episode: 173/200, Reward: 200.000, Step: 200
|
227 |
-
2023-03-31 23:39:35 - r - INFO: - Episode: 174/200, Reward: 200.000, Step: 200
|
228 |
-
2023-03-31 23:39:36 - r - INFO: - Episode: 175/200, Reward: 200.000, Step: 200
|
229 |
-
2023-03-31 23:39:37 - r - INFO: - Current episode 175 has the best eval reward: 200.000
|
230 |
-
2023-03-31 23:39:38 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200
|
231 |
-
2023-03-31 23:39:40 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200
|
232 |
-
2023-03-31 23:39:41 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200
|
233 |
-
2023-03-31 23:39:43 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200
|
234 |
-
2023-03-31 23:39:44 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200
|
235 |
-
2023-03-31 23:39:45 - r - INFO: - Current episode 180 has the best eval reward: 200.000
|
236 |
-
2023-03-31 23:39:46 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200
|
237 |
-
2023-03-31 23:39:47 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200
|
238 |
-
2023-03-31 23:39:49 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200
|
239 |
-
2023-03-31 23:39:50 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200
|
240 |
-
2023-03-31 23:39:52 - r - INFO: - Episode: 185/200, Reward: 200.000, Step: 200
|
241 |
-
2023-03-31 23:39:52 - r - INFO: - Current episode 185 has the best eval reward: 200.000
|
242 |
-
2023-03-31 23:39:54 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200
|
243 |
-
2023-03-31 23:39:55 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200
|
244 |
-
2023-03-31 23:39:57 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200
|
245 |
-
2023-03-31 23:39:58 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200
|
246 |
-
2023-03-31 23:40:00 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200
|
247 |
-
2023-03-31 23:40:00 - r - INFO: - Current episode 190 has the best eval reward: 200.000
|
248 |
-
2023-03-31 23:40:02 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200
|
249 |
-
2023-03-31 23:40:03 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200
|
250 |
-
2023-03-31 23:40:05 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200
|
251 |
-
2023-03-31 23:40:06 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200
|
252 |
-
2023-03-31 23:40:08 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200
|
253 |
-
2023-03-31 23:40:09 - r - INFO: - Current episode 195 has the best eval reward: 200.000
|
254 |
-
2023-03-31 23:40:10 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200
|
255 |
-
2023-03-31 23:40:13 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200
|
256 |
-
2023-03-31 23:40:17 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200
|
257 |
-
2023-03-31 23:40:24 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200
|
258 |
-
2023-03-31 23:40:29 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200
|
259 |
-
2023-03-31 23:40:32 - r - INFO: - Current episode 200 has the best eval reward: 200.000
|
260 |
-
2023-03-31 23:40:32 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c4ba22dbfbe3211e48c45027f9c4efb9981cdf6ddbd972b57201fb68ca90d2fd
|
3 |
-
size 272471
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/learning_curve.png
DELETED
Binary file (50.7 kB)
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/res.csv
DELETED
@@ -1,201 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,16.0,16
|
3 |
-
1,15.0,15
|
4 |
-
2,25.0,25
|
5 |
-
3,16.0,16
|
6 |
-
4,20.0,20
|
7 |
-
5,10.0,10
|
8 |
-
6,24.0,24
|
9 |
-
7,20.0,20
|
10 |
-
8,20.0,20
|
11 |
-
9,25.0,25
|
12 |
-
10,9.0,9
|
13 |
-
11,23.0,23
|
14 |
-
12,14.0,14
|
15 |
-
13,12.0,12
|
16 |
-
14,11.0,11
|
17 |
-
15,17.0,17
|
18 |
-
16,10.0,10
|
19 |
-
17,17.0,17
|
20 |
-
18,10.0,10
|
21 |
-
19,10.0,10
|
22 |
-
20,22.0,22
|
23 |
-
21,18.0,18
|
24 |
-
22,13.0,13
|
25 |
-
23,13.0,13
|
26 |
-
24,9.0,9
|
27 |
-
25,10.0,10
|
28 |
-
26,13.0,13
|
29 |
-
27,11.0,11
|
30 |
-
28,10.0,10
|
31 |
-
29,12.0,12
|
32 |
-
30,14.0,14
|
33 |
-
31,11.0,11
|
34 |
-
32,18.0,18
|
35 |
-
33,10.0,10
|
36 |
-
34,10.0,10
|
37 |
-
35,8.0,8
|
38 |
-
36,12.0,12
|
39 |
-
37,10.0,10
|
40 |
-
38,11.0,11
|
41 |
-
39,10.0,10
|
42 |
-
40,9.0,9
|
43 |
-
41,12.0,12
|
44 |
-
42,9.0,9
|
45 |
-
43,13.0,13
|
46 |
-
44,13.0,13
|
47 |
-
45,12.0,12
|
48 |
-
46,10.0,10
|
49 |
-
47,10.0,10
|
50 |
-
48,10.0,10
|
51 |
-
49,13.0,13
|
52 |
-
50,10.0,10
|
53 |
-
51,15.0,15
|
54 |
-
52,18.0,18
|
55 |
-
53,18.0,18
|
56 |
-
54,16.0,16
|
57 |
-
55,47.0,47
|
58 |
-
56,87.0,87
|
59 |
-
57,20.0,20
|
60 |
-
58,47.0,47
|
61 |
-
59,17.0,17
|
62 |
-
60,37.0,37
|
63 |
-
61,43.0,43
|
64 |
-
62,33.0,33
|
65 |
-
63,18.0,18
|
66 |
-
64,29.0,29
|
67 |
-
65,30.0,30
|
68 |
-
66,23.0,23
|
69 |
-
67,26.0,26
|
70 |
-
68,18.0,18
|
71 |
-
69,20.0,20
|
72 |
-
70,26.0,26
|
73 |
-
71,16.0,16
|
74 |
-
72,23.0,23
|
75 |
-
73,30.0,30
|
76 |
-
74,23.0,23
|
77 |
-
75,26.0,26
|
78 |
-
76,34.0,34
|
79 |
-
77,29.0,29
|
80 |
-
78,32.0,32
|
81 |
-
79,23.0,23
|
82 |
-
80,32.0,32
|
83 |
-
81,72.0,72
|
84 |
-
82,105.0,105
|
85 |
-
83,63.0,63
|
86 |
-
84,119.0,119
|
87 |
-
85,52.0,52
|
88 |
-
86,155.0,155
|
89 |
-
87,79.0,79
|
90 |
-
88,44.0,44
|
91 |
-
89,140.0,140
|
92 |
-
90,86.0,86
|
93 |
-
91,183.0,183
|
94 |
-
92,112.0,112
|
95 |
-
93,190.0,190
|
96 |
-
94,200.0,200
|
97 |
-
95,157.0,157
|
98 |
-
96,200.0,200
|
99 |
-
97,200.0,200
|
100 |
-
98,200.0,200
|
101 |
-
99,200.0,200
|
102 |
-
100,200.0,200
|
103 |
-
101,200.0,200
|
104 |
-
102,200.0,200
|
105 |
-
103,200.0,200
|
106 |
-
104,200.0,200
|
107 |
-
105,200.0,200
|
108 |
-
106,200.0,200
|
109 |
-
107,200.0,200
|
110 |
-
108,200.0,200
|
111 |
-
109,200.0,200
|
112 |
-
110,200.0,200
|
113 |
-
111,200.0,200
|
114 |
-
112,200.0,200
|
115 |
-
113,200.0,200
|
116 |
-
114,190.0,190
|
117 |
-
115,200.0,200
|
118 |
-
116,200.0,200
|
119 |
-
117,200.0,200
|
120 |
-
118,200.0,200
|
121 |
-
119,200.0,200
|
122 |
-
120,200.0,200
|
123 |
-
121,200.0,200
|
124 |
-
122,200.0,200
|
125 |
-
123,198.0,198
|
126 |
-
124,200.0,200
|
127 |
-
125,188.0,188
|
128 |
-
126,200.0,200
|
129 |
-
127,200.0,200
|
130 |
-
128,175.0,175
|
131 |
-
129,200.0,200
|
132 |
-
130,200.0,200
|
133 |
-
131,172.0,172
|
134 |
-
132,200.0,200
|
135 |
-
133,200.0,200
|
136 |
-
134,179.0,179
|
137 |
-
135,200.0,200
|
138 |
-
136,200.0,200
|
139 |
-
137,200.0,200
|
140 |
-
138,161.0,161
|
141 |
-
139,200.0,200
|
142 |
-
140,150.0,150
|
143 |
-
141,200.0,200
|
144 |
-
142,200.0,200
|
145 |
-
143,170.0,170
|
146 |
-
144,200.0,200
|
147 |
-
145,200.0,200
|
148 |
-
146,160.0,160
|
149 |
-
147,160.0,160
|
150 |
-
148,200.0,200
|
151 |
-
149,200.0,200
|
152 |
-
150,177.0,177
|
153 |
-
151,193.0,193
|
154 |
-
152,182.0,182
|
155 |
-
153,176.0,176
|
156 |
-
154,200.0,200
|
157 |
-
155,200.0,200
|
158 |
-
156,171.0,171
|
159 |
-
157,192.0,192
|
160 |
-
158,200.0,200
|
161 |
-
159,179.0,179
|
162 |
-
160,177.0,177
|
163 |
-
161,199.0,199
|
164 |
-
162,200.0,200
|
165 |
-
163,186.0,186
|
166 |
-
164,178.0,178
|
167 |
-
165,200.0,200
|
168 |
-
166,200.0,200
|
169 |
-
167,200.0,200
|
170 |
-
168,179.0,179
|
171 |
-
169,200.0,200
|
172 |
-
170,200.0,200
|
173 |
-
171,200.0,200
|
174 |
-
172,200.0,200
|
175 |
-
173,200.0,200
|
176 |
-
174,200.0,200
|
177 |
-
175,200.0,200
|
178 |
-
176,200.0,200
|
179 |
-
177,200.0,200
|
180 |
-
178,200.0,200
|
181 |
-
179,200.0,200
|
182 |
-
180,200.0,200
|
183 |
-
181,200.0,200
|
184 |
-
182,200.0,200
|
185 |
-
183,200.0,200
|
186 |
-
184,200.0,200
|
187 |
-
185,200.0,200
|
188 |
-
186,200.0,200
|
189 |
-
187,200.0,200
|
190 |
-
188,200.0,200
|
191 |
-
189,200.0,200
|
192 |
-
190,200.0,200
|
193 |
-
191,200.0,200
|
194 |
-
192,200.0,200
|
195 |
-
193,200.0,200
|
196 |
-
194,200.0,200
|
197 |
-
195,200.0,200
|
198 |
-
196,200.0,200
|
199 |
-
197,200.0,200
|
200 |
-
198,200.0,200
|
201 |
-
199,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/tb_logs/events.out.tfevents.1680277069.DESKTOP-H34HQIQ.305216.0
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1ead3d7b1b3efd92eecfb7f314b1922f372c92614db7819dbfa6e06770b12d37
|
3 |
-
size 40
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/config.yaml
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PER_DQN
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_PER_DQN
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
new_step_api: true
|
12 |
-
render: false
|
13 |
-
save_fig: true
|
14 |
-
seed: 1
|
15 |
-
show_fig: false
|
16 |
-
test_eps: 10
|
17 |
-
train_eps: 200
|
18 |
-
wrapper: null
|
19 |
-
algo_cfg:
|
20 |
-
batch_size: 64
|
21 |
-
buffer_size: 100000
|
22 |
-
epsilon_decay: 500
|
23 |
-
epsilon_end: 0.01
|
24 |
-
epsilon_start: 0.95
|
25 |
-
gamma: 0.99
|
26 |
-
hidden_dim: 256
|
27 |
-
lr: 0.0001
|
28 |
-
per_alpha: 0.6
|
29 |
-
per_beta: 0.4
|
30 |
-
per_beta_annealing: 0.001
|
31 |
-
per_epsilon: 0.01
|
32 |
-
target_update: 4
|
33 |
-
value_layers:
|
34 |
-
- activation: relu
|
35 |
-
layer_dim:
|
36 |
-
- n_states
|
37 |
-
- 256
|
38 |
-
layer_type: linear
|
39 |
-
- activation: relu
|
40 |
-
layer_dim:
|
41 |
-
- 256
|
42 |
-
- 256
|
43 |
-
layer_type: linear
|
44 |
-
- activation: none
|
45 |
-
layer_dim:
|
46 |
-
- 256
|
47 |
-
- n_actions
|
48 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/logs/log.txt
DELETED
@@ -1,267 +0,0 @@
|
|
1 |
-
2023-03-31 22:58:15 - r - INFO: - Hyperparameters:
|
2 |
-
2023-03-31 22:58:15 - r - INFO: - ================================================================================
|
3 |
-
2023-03-31 22:58:15 - r - INFO: - Name Value Type
|
4 |
-
2023-03-31 22:58:15 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-03-31 22:58:15 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-03-31 22:58:15 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-03-31 22:58:15 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-03-31 22:58:15 - r - INFO: - algo_name PER_DQN <class 'str'>
|
9 |
-
2023-03-31 22:58:15 - r - INFO: - mode train <class 'str'>
|
10 |
-
2023-03-31 22:58:15 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-03-31 22:58:15 - r - INFO: - device cuda <class 'str'>
|
12 |
-
2023-03-31 22:58:15 - r - INFO: - train_eps 200 <class 'int'>
|
13 |
-
2023-03-31 22:58:15 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2023-03-31 22:58:15 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2023-03-31 22:58:15 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2023-03-31 22:58:15 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2023-03-31 22:58:15 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
-
2023-03-31 22:58:15 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
|
19 |
-
2023-03-31 22:58:15 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2023-03-31 22:58:15 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2023-03-31 22:58:15 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
22 |
-
2023-03-31 22:58:15 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
23 |
-
2023-03-31 22:58:15 - r - INFO: - epsilon_decay 500 <class 'int'>
|
24 |
-
2023-03-31 22:58:15 - r - INFO: - hidden_dim 256 <class 'int'>
|
25 |
-
2023-03-31 22:58:15 - r - INFO: - gamma 0.99 <class 'float'>
|
26 |
-
2023-03-31 22:58:15 - r - INFO: - lr 0.0001 <class 'float'>
|
27 |
-
2023-03-31 22:58:15 - r - INFO: - buffer_size 100000 <class 'int'>
|
28 |
-
2023-03-31 22:58:15 - r - INFO: - per_alpha 0.6 <class 'float'>
|
29 |
-
2023-03-31 22:58:15 - r - INFO: - per_beta 0.4 <class 'float'>
|
30 |
-
2023-03-31 22:58:15 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
|
31 |
-
2023-03-31 22:58:15 - r - INFO: - per_epsilon 0.01 <class 'float'>
|
32 |
-
2023-03-31 22:58:15 - r - INFO: - batch_size 64 <class 'int'>
|
33 |
-
2023-03-31 22:58:15 - r - INFO: - target_update 4 <class 'int'>
|
34 |
-
2023-03-31 22:58:15 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
35 |
-
2023-03-31 22:58:15 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815 <class 'str'>
|
36 |
-
2023-03-31 22:58:15 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/results <class 'str'>
|
37 |
-
2023-03-31 22:58:15 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/logs <class 'str'>
|
38 |
-
2023-03-31 22:58:15 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/traj <class 'str'>
|
39 |
-
2023-03-31 22:58:15 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs <class 'str'>
|
40 |
-
2023-03-31 22:58:15 - r - INFO: - ================================================================================
|
41 |
-
2023-03-31 22:58:15 - r - INFO: - n_states: 4, n_actions: 2
|
42 |
-
2023-03-31 22:58:16 - r - INFO: - Start training!
|
43 |
-
2023-03-31 22:58:16 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda
|
44 |
-
2023-03-31 22:58:17 - r - INFO: - Episode: 1/200, Reward: 15.000, Step: 15
|
45 |
-
2023-03-31 22:58:17 - r - INFO: - Episode: 2/200, Reward: 29.000, Step: 29
|
46 |
-
2023-03-31 22:58:17 - r - INFO: - Episode: 3/200, Reward: 13.000, Step: 13
|
47 |
-
2023-03-31 22:58:17 - r - INFO: - Episode: 4/200, Reward: 14.000, Step: 14
|
48 |
-
2023-03-31 22:58:17 - r - INFO: - Episode: 5/200, Reward: 11.000, Step: 11
|
49 |
-
2023-03-31 22:58:17 - r - INFO: - Current episode 5 has the best eval reward: 9.600
|
50 |
-
2023-03-31 22:58:17 - r - INFO: - Episode: 6/200, Reward: 39.000, Step: 39
|
51 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 7/200, Reward: 35.000, Step: 35
|
52 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 8/200, Reward: 16.000, Step: 16
|
53 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 9/200, Reward: 13.000, Step: 13
|
54 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 10/200, Reward: 12.000, Step: 12
|
55 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 11/200, Reward: 11.000, Step: 11
|
56 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 12/200, Reward: 34.000, Step: 34
|
57 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 13/200, Reward: 15.000, Step: 15
|
58 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 14/200, Reward: 23.000, Step: 23
|
59 |
-
2023-03-31 22:58:18 - r - INFO: - Episode: 15/200, Reward: 9.000, Step: 9
|
60 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 16/200, Reward: 19.000, Step: 19
|
61 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 17/200, Reward: 9.000, Step: 9
|
62 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 18/200, Reward: 10.000, Step: 10
|
63 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 19/200, Reward: 16.000, Step: 16
|
64 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 20/200, Reward: 19.000, Step: 19
|
65 |
-
2023-03-31 22:58:19 - r - INFO: - Current episode 20 has the best eval reward: 9.700
|
66 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 21/200, Reward: 11.000, Step: 11
|
67 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 22/200, Reward: 10.000, Step: 10
|
68 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 23/200, Reward: 14.000, Step: 14
|
69 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 24/200, Reward: 12.000, Step: 12
|
70 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 25/200, Reward: 16.000, Step: 16
|
71 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 26/200, Reward: 11.000, Step: 11
|
72 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 27/200, Reward: 10.000, Step: 10
|
73 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 28/200, Reward: 16.000, Step: 16
|
74 |
-
2023-03-31 22:58:19 - r - INFO: - Episode: 29/200, Reward: 12.000, Step: 12
|
75 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 30/200, Reward: 16.000, Step: 16
|
76 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 31/200, Reward: 11.000, Step: 11
|
77 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 32/200, Reward: 8.000, Step: 8
|
78 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 33/200, Reward: 8.000, Step: 8
|
79 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 34/200, Reward: 12.000, Step: 12
|
80 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 35/200, Reward: 10.000, Step: 10
|
81 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 36/200, Reward: 9.000, Step: 9
|
82 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 37/200, Reward: 11.000, Step: 11
|
83 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 38/200, Reward: 10.000, Step: 10
|
84 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 39/200, Reward: 11.000, Step: 11
|
85 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 40/200, Reward: 10.000, Step: 10
|
86 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 41/200, Reward: 10.000, Step: 10
|
87 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 42/200, Reward: 10.000, Step: 10
|
88 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 43/200, Reward: 10.000, Step: 10
|
89 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 44/200, Reward: 9.000, Step: 9
|
90 |
-
2023-03-31 22:58:20 - r - INFO: - Episode: 45/200, Reward: 11.000, Step: 11
|
91 |
-
2023-03-31 22:58:21 - r - INFO: - Current episode 45 has the best eval reward: 10.600
|
92 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 46/200, Reward: 10.000, Step: 10
|
93 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 47/200, Reward: 10.000, Step: 10
|
94 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 48/200, Reward: 11.000, Step: 11
|
95 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 49/200, Reward: 10.000, Step: 10
|
96 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 50/200, Reward: 13.000, Step: 13
|
97 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 51/200, Reward: 18.000, Step: 18
|
98 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 52/200, Reward: 12.000, Step: 12
|
99 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 53/200, Reward: 10.000, Step: 10
|
100 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 54/200, Reward: 10.000, Step: 10
|
101 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 55/200, Reward: 11.000, Step: 11
|
102 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 56/200, Reward: 8.000, Step: 8
|
103 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 57/200, Reward: 16.000, Step: 16
|
104 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 58/200, Reward: 11.000, Step: 11
|
105 |
-
2023-03-31 22:58:21 - r - INFO: - Episode: 59/200, Reward: 9.000, Step: 9
|
106 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 60/200, Reward: 9.000, Step: 9
|
107 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 61/200, Reward: 10.000, Step: 10
|
108 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 62/200, Reward: 10.000, Step: 10
|
109 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 63/200, Reward: 9.000, Step: 9
|
110 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 64/200, Reward: 8.000, Step: 8
|
111 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 65/200, Reward: 10.000, Step: 10
|
112 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 66/200, Reward: 9.000, Step: 9
|
113 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 67/200, Reward: 10.000, Step: 10
|
114 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 68/200, Reward: 12.000, Step: 12
|
115 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 69/200, Reward: 12.000, Step: 12
|
116 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 70/200, Reward: 12.000, Step: 12
|
117 |
-
2023-03-31 22:58:22 - r - INFO: - Current episode 70 has the best eval reward: 12.500
|
118 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 71/200, Reward: 10.000, Step: 10
|
119 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 72/200, Reward: 13.000, Step: 13
|
120 |
-
2023-03-31 22:58:22 - r - INFO: - Episode: 73/200, Reward: 20.000, Step: 20
|
121 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 74/200, Reward: 12.000, Step: 12
|
122 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 75/200, Reward: 13.000, Step: 13
|
123 |
-
2023-03-31 22:58:23 - r - INFO: - Current episode 75 has the best eval reward: 13.000
|
124 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 76/200, Reward: 15.000, Step: 15
|
125 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 77/200, Reward: 13.000, Step: 13
|
126 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 78/200, Reward: 19.000, Step: 19
|
127 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 79/200, Reward: 14.000, Step: 14
|
128 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 80/200, Reward: 12.000, Step: 12
|
129 |
-
2023-03-31 22:58:23 - r - INFO: - Current episode 80 has the best eval reward: 15.400
|
130 |
-
2023-03-31 22:58:23 - r - INFO: - Episode: 81/200, Reward: 13.000, Step: 13
|
131 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 82/200, Reward: 14.000, Step: 14
|
132 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 83/200, Reward: 13.000, Step: 13
|
133 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 84/200, Reward: 13.000, Step: 13
|
134 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 85/200, Reward: 14.000, Step: 14
|
135 |
-
2023-03-31 22:58:24 - r - INFO: - Current episode 85 has the best eval reward: 16.000
|
136 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 86/200, Reward: 18.000, Step: 18
|
137 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 87/200, Reward: 23.000, Step: 23
|
138 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 88/200, Reward: 13.000, Step: 13
|
139 |
-
2023-03-31 22:58:24 - r - INFO: - Episode: 89/200, Reward: 13.000, Step: 13
|
140 |
-
2023-03-31 22:58:25 - r - INFO: - Episode: 90/200, Reward: 21.000, Step: 21
|
141 |
-
2023-03-31 22:58:25 - r - INFO: - Current episode 90 has the best eval reward: 18.800
|
142 |
-
2023-03-31 22:58:25 - r - INFO: - Episode: 91/200, Reward: 17.000, Step: 17
|
143 |
-
2023-03-31 22:58:25 - r - INFO: - Episode: 92/200, Reward: 23.000, Step: 23
|
144 |
-
2023-03-31 22:58:25 - r - INFO: - Episode: 93/200, Reward: 16.000, Step: 16
|
145 |
-
2023-03-31 22:58:25 - r - INFO: - Episode: 94/200, Reward: 22.000, Step: 22
|
146 |
-
2023-03-31 22:58:25 - r - INFO: - Episode: 95/200, Reward: 23.000, Step: 23
|
147 |
-
2023-03-31 22:58:26 - r - INFO: - Current episode 95 has the best eval reward: 22.000
|
148 |
-
2023-03-31 22:58:26 - r - INFO: - Episode: 96/200, Reward: 14.000, Step: 14
|
149 |
-
2023-03-31 22:58:26 - r - INFO: - Episode: 97/200, Reward: 20.000, Step: 20
|
150 |
-
2023-03-31 22:58:26 - r - INFO: - Episode: 98/200, Reward: 24.000, Step: 24
|
151 |
-
2023-03-31 22:58:26 - r - INFO: - Episode: 99/200, Reward: 21.000, Step: 21
|
152 |
-
2023-03-31 22:58:26 - r - INFO: - Episode: 100/200, Reward: 22.000, Step: 22
|
153 |
-
2023-03-31 22:58:27 - r - INFO: - Episode: 101/200, Reward: 21.000, Step: 21
|
154 |
-
2023-03-31 22:58:27 - r - INFO: - Episode: 102/200, Reward: 19.000, Step: 19
|
155 |
-
2023-03-31 22:58:27 - r - INFO: - Episode: 103/200, Reward: 18.000, Step: 18
|
156 |
-
2023-03-31 22:58:27 - r - INFO: - Episode: 104/200, Reward: 18.000, Step: 18
|
157 |
-
2023-03-31 22:58:27 - r - INFO: - Episode: 105/200, Reward: 23.000, Step: 23
|
158 |
-
2023-03-31 22:58:27 - r - INFO: - Episode: 106/200, Reward: 16.000, Step: 16
|
159 |
-
2023-03-31 22:58:27 - r - INFO: - Episode: 107/200, Reward: 19.000, Step: 19
|
160 |
-
2023-03-31 22:58:28 - r - INFO: - Episode: 108/200, Reward: 18.000, Step: 18
|
161 |
-
2023-03-31 22:58:28 - r - INFO: - Episode: 109/200, Reward: 21.000, Step: 21
|
162 |
-
2023-03-31 22:58:28 - r - INFO: - Episode: 110/200, Reward: 24.000, Step: 24
|
163 |
-
2023-03-31 22:58:28 - r - INFO: - Current episode 110 has the best eval reward: 23.300
|
164 |
-
2023-03-31 22:58:28 - r - INFO: - Episode: 111/200, Reward: 24.000, Step: 24
|
165 |
-
2023-03-31 22:58:28 - r - INFO: - Episode: 112/200, Reward: 27.000, Step: 27
|
166 |
-
2023-03-31 22:58:29 - r - INFO: - Episode: 113/200, Reward: 35.000, Step: 35
|
167 |
-
2023-03-31 22:58:29 - r - INFO: - Episode: 114/200, Reward: 23.000, Step: 23
|
168 |
-
2023-03-31 22:58:29 - r - INFO: - Episode: 115/200, Reward: 29.000, Step: 29
|
169 |
-
2023-03-31 22:58:29 - r - INFO: - Current episode 115 has the best eval reward: 24.100
|
170 |
-
2023-03-31 22:58:29 - r - INFO: - Episode: 116/200, Reward: 25.000, Step: 25
|
171 |
-
2023-03-31 22:58:29 - r - INFO: - Episode: 117/200, Reward: 20.000, Step: 20
|
172 |
-
2023-03-31 22:58:30 - r - INFO: - Episode: 118/200, Reward: 23.000, Step: 23
|
173 |
-
2023-03-31 22:58:30 - r - INFO: - Episode: 119/200, Reward: 21.000, Step: 21
|
174 |
-
2023-03-31 22:58:30 - r - INFO: - Episode: 120/200, Reward: 23.000, Step: 23
|
175 |
-
2023-03-31 22:58:30 - r - INFO: - Current episode 120 has the best eval reward: 24.500
|
176 |
-
2023-03-31 22:58:30 - r - INFO: - Episode: 121/200, Reward: 17.000, Step: 17
|
177 |
-
2023-03-31 22:58:30 - r - INFO: - Episode: 122/200, Reward: 19.000, Step: 19
|
178 |
-
2023-03-31 22:58:30 - r - INFO: - Episode: 123/200, Reward: 19.000, Step: 19
|
179 |
-
2023-03-31 22:58:31 - r - INFO: - Episode: 124/200, Reward: 21.000, Step: 21
|
180 |
-
2023-03-31 22:58:31 - r - INFO: - Episode: 125/200, Reward: 24.000, Step: 24
|
181 |
-
2023-03-31 22:58:31 - r - INFO: - Current episode 125 has the best eval reward: 25.600
|
182 |
-
2023-03-31 22:58:31 - r - INFO: - Episode: 126/200, Reward: 23.000, Step: 23
|
183 |
-
2023-03-31 22:58:31 - r - INFO: - Episode: 127/200, Reward: 22.000, Step: 22
|
184 |
-
2023-03-31 22:58:31 - r - INFO: - Episode: 128/200, Reward: 23.000, Step: 23
|
185 |
-
2023-03-31 22:58:31 - r - INFO: - Episode: 129/200, Reward: 22.000, Step: 22
|
186 |
-
2023-03-31 22:58:32 - r - INFO: - Episode: 130/200, Reward: 28.000, Step: 28
|
187 |
-
2023-03-31 22:58:32 - r - INFO: - Current episode 130 has the best eval reward: 29.800
|
188 |
-
2023-03-31 22:58:32 - r - INFO: - Episode: 131/200, Reward: 32.000, Step: 32
|
189 |
-
2023-03-31 22:58:32 - r - INFO: - Episode: 132/200, Reward: 35.000, Step: 35
|
190 |
-
2023-03-31 22:58:32 - r - INFO: - Episode: 133/200, Reward: 27.000, Step: 27
|
191 |
-
2023-03-31 22:58:33 - r - INFO: - Episode: 134/200, Reward: 24.000, Step: 24
|
192 |
-
2023-03-31 22:58:33 - r - INFO: - Episode: 135/200, Reward: 37.000, Step: 37
|
193 |
-
2023-03-31 22:58:33 - r - INFO: - Current episode 135 has the best eval reward: 35.700
|
194 |
-
2023-03-31 22:58:33 - r - INFO: - Episode: 136/200, Reward: 33.000, Step: 33
|
195 |
-
2023-03-31 22:58:34 - r - INFO: - Episode: 137/200, Reward: 39.000, Step: 39
|
196 |
-
2023-03-31 22:58:34 - r - INFO: - Episode: 138/200, Reward: 24.000, Step: 24
|
197 |
-
2023-03-31 22:58:34 - r - INFO: - Episode: 139/200, Reward: 24.000, Step: 24
|
198 |
-
2023-03-31 22:58:34 - r - INFO: - Episode: 140/200, Reward: 40.000, Step: 40
|
199 |
-
2023-03-31 22:58:35 - r - INFO: - Current episode 140 has the best eval reward: 40.200
|
200 |
-
2023-03-31 22:58:35 - r - INFO: - Episode: 141/200, Reward: 31.000, Step: 31
|
201 |
-
2023-03-31 22:58:35 - r - INFO: - Episode: 142/200, Reward: 30.000, Step: 30
|
202 |
-
2023-03-31 22:58:35 - r - INFO: - Episode: 143/200, Reward: 25.000, Step: 25
|
203 |
-
2023-03-31 22:58:35 - r - INFO: - Episode: 144/200, Reward: 23.000, Step: 23
|
204 |
-
2023-03-31 22:58:35 - r - INFO: - Episode: 145/200, Reward: 29.000, Step: 29
|
205 |
-
2023-03-31 22:58:36 - r - INFO: - Current episode 145 has the best eval reward: 58.500
|
206 |
-
2023-03-31 22:58:36 - r - INFO: - Episode: 146/200, Reward: 51.000, Step: 51
|
207 |
-
2023-03-31 22:58:37 - r - INFO: - Episode: 147/200, Reward: 73.000, Step: 73
|
208 |
-
2023-03-31 22:58:37 - r - INFO: - Episode: 148/200, Reward: 38.000, Step: 38
|
209 |
-
2023-03-31 22:58:37 - r - INFO: - Episode: 149/200, Reward: 37.000, Step: 37
|
210 |
-
2023-03-31 22:58:37 - r - INFO: - Episode: 150/200, Reward: 32.000, Step: 32
|
211 |
-
2023-03-31 22:58:38 - r - INFO: - Episode: 151/200, Reward: 43.000, Step: 43
|
212 |
-
2023-03-31 22:58:38 - r - INFO: - Episode: 152/200, Reward: 29.000, Step: 29
|
213 |
-
2023-03-31 22:58:38 - r - INFO: - Episode: 153/200, Reward: 33.000, Step: 33
|
214 |
-
2023-03-31 22:58:38 - r - INFO: - Episode: 154/200, Reward: 31.000, Step: 31
|
215 |
-
2023-03-31 22:58:39 - r - INFO: - Episode: 155/200, Reward: 41.000, Step: 41
|
216 |
-
2023-03-31 22:58:39 - r - INFO: - Episode: 156/200, Reward: 79.000, Step: 79
|
217 |
-
2023-03-31 22:58:40 - r - INFO: - Episode: 157/200, Reward: 47.000, Step: 47
|
218 |
-
2023-03-31 22:58:40 - r - INFO: - Episode: 158/200, Reward: 32.000, Step: 32
|
219 |
-
2023-03-31 22:58:40 - r - INFO: - Episode: 159/200, Reward: 36.000, Step: 36
|
220 |
-
2023-03-31 22:58:41 - r - INFO: - Episode: 160/200, Reward: 76.000, Step: 76
|
221 |
-
2023-03-31 22:58:41 - r - INFO: - Current episode 160 has the best eval reward: 75.000
|
222 |
-
2023-03-31 22:58:41 - r - INFO: - Episode: 161/200, Reward: 73.000, Step: 73
|
223 |
-
2023-03-31 22:58:42 - r - INFO: - Episode: 162/200, Reward: 59.000, Step: 59
|
224 |
-
2023-03-31 22:58:42 - r - INFO: - Episode: 163/200, Reward: 102.000, Step: 102
|
225 |
-
2023-03-31 22:58:43 - r - INFO: - Episode: 164/200, Reward: 87.000, Step: 87
|
226 |
-
2023-03-31 22:58:44 - r - INFO: - Episode: 165/200, Reward: 94.000, Step: 94
|
227 |
-
2023-03-31 22:58:44 - r - INFO: - Current episode 165 has the best eval reward: 143.300
|
228 |
-
2023-03-31 22:58:45 - r - INFO: - Episode: 166/200, Reward: 116.000, Step: 116
|
229 |
-
2023-03-31 22:58:46 - r - INFO: - Episode: 167/200, Reward: 135.000, Step: 135
|
230 |
-
2023-03-31 22:58:47 - r - INFO: - Episode: 168/200, Reward: 140.000, Step: 140
|
231 |
-
2023-03-31 22:58:48 - r - INFO: - Episode: 169/200, Reward: 167.000, Step: 167
|
232 |
-
2023-03-31 22:58:49 - r - INFO: - Episode: 170/200, Reward: 128.000, Step: 128
|
233 |
-
2023-03-31 22:58:50 - r - INFO: - Current episode 170 has the best eval reward: 157.400
|
234 |
-
2023-03-31 22:58:51 - r - INFO: - Episode: 171/200, Reward: 200.000, Step: 200
|
235 |
-
2023-03-31 22:58:52 - r - INFO: - Episode: 172/200, Reward: 135.000, Step: 135
|
236 |
-
2023-03-31 22:58:53 - r - INFO: - Episode: 173/200, Reward: 163.000, Step: 163
|
237 |
-
2023-03-31 22:58:54 - r - INFO: - Episode: 174/200, Reward: 180.000, Step: 180
|
238 |
-
2023-03-31 22:58:56 - r - INFO: - Episode: 175/200, Reward: 185.000, Step: 185
|
239 |
-
2023-03-31 22:58:56 - r - INFO: - Current episode 175 has the best eval reward: 165.700
|
240 |
-
2023-03-31 22:58:57 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200
|
241 |
-
2023-03-31 22:58:59 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200
|
242 |
-
2023-03-31 22:59:00 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200
|
243 |
-
2023-03-31 22:59:01 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200
|
244 |
-
2023-03-31 22:59:03 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200
|
245 |
-
2023-03-31 22:59:04 - r - INFO: - Current episode 180 has the best eval reward: 200.000
|
246 |
-
2023-03-31 22:59:05 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200
|
247 |
-
2023-03-31 22:59:06 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200
|
248 |
-
2023-03-31 22:59:08 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200
|
249 |
-
2023-03-31 22:59:09 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200
|
250 |
-
2023-03-31 22:59:10 - r - INFO: - Episode: 185/200, Reward: 200.000, Step: 200
|
251 |
-
2023-03-31 22:59:12 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200
|
252 |
-
2023-03-31 22:59:18 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200
|
253 |
-
2023-03-31 22:59:22 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200
|
254 |
-
2023-03-31 22:59:24 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200
|
255 |
-
2023-03-31 22:59:26 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200
|
256 |
-
2023-03-31 22:59:28 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200
|
257 |
-
2023-03-31 22:59:29 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200
|
258 |
-
2023-03-31 22:59:30 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200
|
259 |
-
2023-03-31 22:59:32 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200
|
260 |
-
2023-03-31 22:59:33 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200
|
261 |
-
2023-03-31 22:59:35 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200
|
262 |
-
2023-03-31 22:59:37 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200
|
263 |
-
2023-03-31 22:59:38 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200
|
264 |
-
2023-03-31 22:59:39 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200
|
265 |
-
2023-03-31 22:59:40 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200
|
266 |
-
2023-03-31 22:59:41 - r - INFO: - Current episode 200 has the best eval reward: 200.000
|
267 |
-
2023-03-31 22:59:41 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c438616b97ca890557a9e9b1cd42decfc5decc64e5aee660d89158290e92683d
|
3 |
-
size 272471
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/learning_curve.png
DELETED
Binary file (46.2 kB)
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/res.csv
DELETED
@@ -1,201 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,15.0,15
|
3 |
-
1,29.0,29
|
4 |
-
2,13.0,13
|
5 |
-
3,14.0,14
|
6 |
-
4,11.0,11
|
7 |
-
5,39.0,39
|
8 |
-
6,35.0,35
|
9 |
-
7,16.0,16
|
10 |
-
8,13.0,13
|
11 |
-
9,12.0,12
|
12 |
-
10,11.0,11
|
13 |
-
11,34.0,34
|
14 |
-
12,15.0,15
|
15 |
-
13,23.0,23
|
16 |
-
14,9.0,9
|
17 |
-
15,19.0,19
|
18 |
-
16,9.0,9
|
19 |
-
17,10.0,10
|
20 |
-
18,16.0,16
|
21 |
-
19,19.0,19
|
22 |
-
20,11.0,11
|
23 |
-
21,10.0,10
|
24 |
-
22,14.0,14
|
25 |
-
23,12.0,12
|
26 |
-
24,16.0,16
|
27 |
-
25,11.0,11
|
28 |
-
26,10.0,10
|
29 |
-
27,16.0,16
|
30 |
-
28,12.0,12
|
31 |
-
29,16.0,16
|
32 |
-
30,11.0,11
|
33 |
-
31,8.0,8
|
34 |
-
32,8.0,8
|
35 |
-
33,12.0,12
|
36 |
-
34,10.0,10
|
37 |
-
35,9.0,9
|
38 |
-
36,11.0,11
|
39 |
-
37,10.0,10
|
40 |
-
38,11.0,11
|
41 |
-
39,10.0,10
|
42 |
-
40,10.0,10
|
43 |
-
41,10.0,10
|
44 |
-
42,10.0,10
|
45 |
-
43,9.0,9
|
46 |
-
44,11.0,11
|
47 |
-
45,10.0,10
|
48 |
-
46,10.0,10
|
49 |
-
47,11.0,11
|
50 |
-
48,10.0,10
|
51 |
-
49,13.0,13
|
52 |
-
50,18.0,18
|
53 |
-
51,12.0,12
|
54 |
-
52,10.0,10
|
55 |
-
53,10.0,10
|
56 |
-
54,11.0,11
|
57 |
-
55,8.0,8
|
58 |
-
56,16.0,16
|
59 |
-
57,11.0,11
|
60 |
-
58,9.0,9
|
61 |
-
59,9.0,9
|
62 |
-
60,10.0,10
|
63 |
-
61,10.0,10
|
64 |
-
62,9.0,9
|
65 |
-
63,8.0,8
|
66 |
-
64,10.0,10
|
67 |
-
65,9.0,9
|
68 |
-
66,10.0,10
|
69 |
-
67,12.0,12
|
70 |
-
68,12.0,12
|
71 |
-
69,12.0,12
|
72 |
-
70,10.0,10
|
73 |
-
71,13.0,13
|
74 |
-
72,20.0,20
|
75 |
-
73,12.0,12
|
76 |
-
74,13.0,13
|
77 |
-
75,15.0,15
|
78 |
-
76,13.0,13
|
79 |
-
77,19.0,19
|
80 |
-
78,14.0,14
|
81 |
-
79,12.0,12
|
82 |
-
80,13.0,13
|
83 |
-
81,14.0,14
|
84 |
-
82,13.0,13
|
85 |
-
83,13.0,13
|
86 |
-
84,14.0,14
|
87 |
-
85,18.0,18
|
88 |
-
86,23.0,23
|
89 |
-
87,13.0,13
|
90 |
-
88,13.0,13
|
91 |
-
89,21.0,21
|
92 |
-
90,17.0,17
|
93 |
-
91,23.0,23
|
94 |
-
92,16.0,16
|
95 |
-
93,22.0,22
|
96 |
-
94,23.0,23
|
97 |
-
95,14.0,14
|
98 |
-
96,20.0,20
|
99 |
-
97,24.0,24
|
100 |
-
98,21.0,21
|
101 |
-
99,22.0,22
|
102 |
-
100,21.0,21
|
103 |
-
101,19.0,19
|
104 |
-
102,18.0,18
|
105 |
-
103,18.0,18
|
106 |
-
104,23.0,23
|
107 |
-
105,16.0,16
|
108 |
-
106,19.0,19
|
109 |
-
107,18.0,18
|
110 |
-
108,21.0,21
|
111 |
-
109,24.0,24
|
112 |
-
110,24.0,24
|
113 |
-
111,27.0,27
|
114 |
-
112,35.0,35
|
115 |
-
113,23.0,23
|
116 |
-
114,29.0,29
|
117 |
-
115,25.0,25
|
118 |
-
116,20.0,20
|
119 |
-
117,23.0,23
|
120 |
-
118,21.0,21
|
121 |
-
119,23.0,23
|
122 |
-
120,17.0,17
|
123 |
-
121,19.0,19
|
124 |
-
122,19.0,19
|
125 |
-
123,21.0,21
|
126 |
-
124,24.0,24
|
127 |
-
125,23.0,23
|
128 |
-
126,22.0,22
|
129 |
-
127,23.0,23
|
130 |
-
128,22.0,22
|
131 |
-
129,28.0,28
|
132 |
-
130,32.0,32
|
133 |
-
131,35.0,35
|
134 |
-
132,27.0,27
|
135 |
-
133,24.0,24
|
136 |
-
134,37.0,37
|
137 |
-
135,33.0,33
|
138 |
-
136,39.0,39
|
139 |
-
137,24.0,24
|
140 |
-
138,24.0,24
|
141 |
-
139,40.0,40
|
142 |
-
140,31.0,31
|
143 |
-
141,30.0,30
|
144 |
-
142,25.0,25
|
145 |
-
143,23.0,23
|
146 |
-
144,29.0,29
|
147 |
-
145,51.0,51
|
148 |
-
146,73.0,73
|
149 |
-
147,38.0,38
|
150 |
-
148,37.0,37
|
151 |
-
149,32.0,32
|
152 |
-
150,43.0,43
|
153 |
-
151,29.0,29
|
154 |
-
152,33.0,33
|
155 |
-
153,31.0,31
|
156 |
-
154,41.0,41
|
157 |
-
155,79.0,79
|
158 |
-
156,47.0,47
|
159 |
-
157,32.0,32
|
160 |
-
158,36.0,36
|
161 |
-
159,76.0,76
|
162 |
-
160,73.0,73
|
163 |
-
161,59.0,59
|
164 |
-
162,102.0,102
|
165 |
-
163,87.0,87
|
166 |
-
164,94.0,94
|
167 |
-
165,116.0,116
|
168 |
-
166,135.0,135
|
169 |
-
167,140.0,140
|
170 |
-
168,167.0,167
|
171 |
-
169,128.0,128
|
172 |
-
170,200.0,200
|
173 |
-
171,135.0,135
|
174 |
-
172,163.0,163
|
175 |
-
173,180.0,180
|
176 |
-
174,185.0,185
|
177 |
-
175,200.0,200
|
178 |
-
176,200.0,200
|
179 |
-
177,200.0,200
|
180 |
-
178,200.0,200
|
181 |
-
179,200.0,200
|
182 |
-
180,200.0,200
|
183 |
-
181,200.0,200
|
184 |
-
182,200.0,200
|
185 |
-
183,200.0,200
|
186 |
-
184,200.0,200
|
187 |
-
185,200.0,200
|
188 |
-
186,200.0,200
|
189 |
-
187,200.0,200
|
190 |
-
188,200.0,200
|
191 |
-
189,200.0,200
|
192 |
-
190,200.0,200
|
193 |
-
191,200.0,200
|
194 |
-
192,200.0,200
|
195 |
-
193,200.0,200
|
196 |
-
194,200.0,200
|
197 |
-
195,200.0,200
|
198 |
-
196,200.0,200
|
199 |
-
197,200.0,200
|
200 |
-
198,200.0,200
|
201 |
-
199,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs/events.out.tfevents.1680274695.DESKTOP-H34HQIQ.317208.0
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e4daaaaabe093b8f9d6baf9504a0c5b9e14d2ea89477d20323c5eacbf5942b64
|
3 |
-
size 40
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/config.yaml
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PER_DQN
|
3 |
-
device: cpu
|
4 |
-
env_name: gym
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_PER_DQN
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
mp_backend: mp
|
12 |
-
n_workers: 2
|
13 |
-
new_step_api: true
|
14 |
-
render: false
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
-
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 200
|
21 |
-
wrapper: null
|
22 |
-
algo_cfg:
|
23 |
-
batch_size: 64
|
24 |
-
buffer_size: 100000
|
25 |
-
epsilon_decay: 500
|
26 |
-
epsilon_end: 0.01
|
27 |
-
epsilon_start: 0.95
|
28 |
-
gamma: 0.99
|
29 |
-
hidden_dim: 256
|
30 |
-
lr: 0.0001
|
31 |
-
per_alpha: 0.6
|
32 |
-
per_beta: 0.4
|
33 |
-
per_beta_annealing: 0.001
|
34 |
-
per_epsilon: 0.01
|
35 |
-
target_update: 4
|
36 |
-
value_layers:
|
37 |
-
- activation: relu
|
38 |
-
layer_dim:
|
39 |
-
- n_states
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: relu
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- 256
|
46 |
-
layer_type: linear
|
47 |
-
- activation: none
|
48 |
-
layer_dim:
|
49 |
-
- 256
|
50 |
-
- n_actions
|
51 |
-
layer_type: linear
|
52 |
-
env_cfg:
|
53 |
-
id: CartPole-v1
|
54 |
-
new_step_api: true
|
55 |
-
render_mode: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/logs/log.txt
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
2023-04-15 21:50:02 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-15 21:50:02 - r - INFO: - ================================================================================
|
3 |
-
2023-04-15 21:50:02 - r - INFO: - Name Value Type
|
4 |
-
2023-04-15 21:50:02 - r - INFO: - env_name gym <class 'str'>
|
5 |
-
2023-04-15 21:50:02 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-15 21:50:02 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-15 21:50:02 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-15 21:50:02 - r - INFO: - render_mode None <class 'str'>
|
9 |
-
2023-04-15 21:50:02 - r - INFO: - algo_name PER_DQN <class 'str'>
|
10 |
-
2023-04-15 21:50:02 - r - INFO: - mode train <class 'str'>
|
11 |
-
2023-04-15 21:50:02 - r - INFO: - mp_backend mp <class 'str'>
|
12 |
-
2023-04-15 21:50:02 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-15 21:50:02 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-15 21:50:02 - r - INFO: - train_eps 200 <class 'int'>
|
15 |
-
2023-04-15 21:50:02 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-15 21:50:02 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-15 21:50:02 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-15 21:50:02 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-15 21:50:02 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
20 |
-
2023-04-15 21:50:02 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
|
21 |
-
2023-04-15 21:50:02 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-15 21:50:02 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-15 21:50:02 - r - INFO: - n_workers 2 <class 'int'>
|
24 |
-
2023-04-15 21:50:02 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-15 21:50:02 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-15 21:50:02 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-15 21:50:02 - r - INFO: - hidden_dim 256 <class 'int'>
|
28 |
-
2023-04-15 21:50:02 - r - INFO: - gamma 0.99 <class 'float'>
|
29 |
-
2023-04-15 21:50:02 - r - INFO: - lr 0.0001 <class 'float'>
|
30 |
-
2023-04-15 21:50:02 - r - INFO: - buffer_size 100000 <class 'int'>
|
31 |
-
2023-04-15 21:50:02 - r - INFO: - per_alpha 0.6 <class 'float'>
|
32 |
-
2023-04-15 21:50:02 - r - INFO: - per_beta 0.4 <class 'float'>
|
33 |
-
2023-04-15 21:50:02 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
|
34 |
-
2023-04-15 21:50:02 - r - INFO: - per_epsilon 0.01 <class 'float'>
|
35 |
-
2023-04-15 21:50:02 - r - INFO: - batch_size 64 <class 'int'>
|
36 |
-
2023-04-15 21:50:02 - r - INFO: - target_update 4 <class 'int'>
|
37 |
-
2023-04-15 21:50:02 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
38 |
-
2023-04-15 21:50:02 - r - INFO: - id CartPole-v1 <class 'str'>
|
39 |
-
2023-04-15 21:50:02 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002 <class 'str'>
|
40 |
-
2023-04-15 21:50:02 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/results <class 'str'>
|
41 |
-
2023-04-15 21:50:02 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/logs <class 'str'>
|
42 |
-
2023-04-15 21:50:02 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/traj <class 'str'>
|
43 |
-
2023-04-15 21:50:02 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/videos <class 'str'>
|
44 |
-
2023-04-15 21:50:02 - r - INFO: - ================================================================================
|
45 |
-
2023-04-15 21:50:02 - r - INFO: - n_states: 4, n_actions: 2
|
46 |
-
2023-04-15 21:50:02 - r - INFO: - Start training!
|
47 |
-
2023-04-15 21:50:02 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
|
48 |
-
2023-04-15 21:51:00 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d4251c7f141686d5391c5c933b493b27a184102ccf1596bead1dccaa6cc0bd9a
|
3 |
-
size 272407
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/learning_curve.png
DELETED
Binary file (45.2 kB)
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/res.csv
DELETED
@@ -1,202 +0,0 @@
|
|
1 |
-
episodes,rewards
|
2 |
-
0,18.0
|
3 |
-
1,19.0
|
4 |
-
2,17.0
|
5 |
-
3,24.0
|
6 |
-
4,18.0
|
7 |
-
5,17.0
|
8 |
-
6,13.0
|
9 |
-
7,16.0
|
10 |
-
8,21.0
|
11 |
-
9,30.0
|
12 |
-
10,17.0
|
13 |
-
11,18.0
|
14 |
-
12,11.0
|
15 |
-
13,13.0
|
16 |
-
14,16.0
|
17 |
-
15,14.0
|
18 |
-
16,28.0
|
19 |
-
17,12.0
|
20 |
-
18,14.0
|
21 |
-
19,19.0
|
22 |
-
20,11.0
|
23 |
-
21,10.0
|
24 |
-
22,31.0
|
25 |
-
23,23.0
|
26 |
-
24,22.0
|
27 |
-
25,11.0
|
28 |
-
26,16.0
|
29 |
-
27,12.0
|
30 |
-
28,12.0
|
31 |
-
29,16.0
|
32 |
-
30,12.0
|
33 |
-
31,16.0
|
34 |
-
32,14.0
|
35 |
-
33,21.0
|
36 |
-
34,12.0
|
37 |
-
35,9.0
|
38 |
-
36,9.0
|
39 |
-
37,26.0
|
40 |
-
38,11.0
|
41 |
-
39,22.0
|
42 |
-
40,17.0
|
43 |
-
41,21.0
|
44 |
-
42,16.0
|
45 |
-
43,27.0
|
46 |
-
44,13.0
|
47 |
-
45,18.0
|
48 |
-
46,19.0
|
49 |
-
47,11.0
|
50 |
-
48,11.0
|
51 |
-
49,16.0
|
52 |
-
50,10.0
|
53 |
-
51,9.0
|
54 |
-
52,9.0
|
55 |
-
53,16.0
|
56 |
-
54,9.0
|
57 |
-
55,12.0
|
58 |
-
56,11.0
|
59 |
-
57,11.0
|
60 |
-
58,10.0
|
61 |
-
59,12.0
|
62 |
-
60,10.0
|
63 |
-
61,14.0
|
64 |
-
62,11.0
|
65 |
-
63,12.0
|
66 |
-
64,12.0
|
67 |
-
65,18.0
|
68 |
-
66,12.0
|
69 |
-
67,16.0
|
70 |
-
68,14.0
|
71 |
-
69,23.0
|
72 |
-
70,20.0
|
73 |
-
71,23.0
|
74 |
-
72,17.0
|
75 |
-
73,18.0
|
76 |
-
74,22.0
|
77 |
-
75,22.0
|
78 |
-
76,49.0
|
79 |
-
77,24.0
|
80 |
-
78,60.0
|
81 |
-
79,35.0
|
82 |
-
80,51.0
|
83 |
-
81,78.0
|
84 |
-
82,49.0
|
85 |
-
83,75.0
|
86 |
-
84,100.0
|
87 |
-
85,78.0
|
88 |
-
86,61.0
|
89 |
-
87,65.0
|
90 |
-
88,86.0
|
91 |
-
89,105.0
|
92 |
-
90,54.0
|
93 |
-
91,60.0
|
94 |
-
92,37.0
|
95 |
-
93,149.0
|
96 |
-
94,44.0
|
97 |
-
95,104.0
|
98 |
-
96,200.0
|
99 |
-
97,112.0
|
100 |
-
98,163.0
|
101 |
-
99,167.0
|
102 |
-
100,113.0
|
103 |
-
101,152.0
|
104 |
-
102,200.0
|
105 |
-
103,200.0
|
106 |
-
104,200.0
|
107 |
-
105,200.0
|
108 |
-
106,200.0
|
109 |
-
107,200.0
|
110 |
-
108,200.0
|
111 |
-
109,200.0
|
112 |
-
110,200.0
|
113 |
-
111,200.0
|
114 |
-
112,200.0
|
115 |
-
113,200.0
|
116 |
-
114,200.0
|
117 |
-
115,200.0
|
118 |
-
116,200.0
|
119 |
-
117,200.0
|
120 |
-
118,200.0
|
121 |
-
119,200.0
|
122 |
-
120,200.0
|
123 |
-
121,200.0
|
124 |
-
122,200.0
|
125 |
-
123,200.0
|
126 |
-
124,200.0
|
127 |
-
125,200.0
|
128 |
-
126,200.0
|
129 |
-
127,200.0
|
130 |
-
128,200.0
|
131 |
-
129,200.0
|
132 |
-
130,191.0
|
133 |
-
131,200.0
|
134 |
-
132,189.0
|
135 |
-
133,200.0
|
136 |
-
134,200.0
|
137 |
-
135,200.0
|
138 |
-
136,185.0
|
139 |
-
137,200.0
|
140 |
-
138,197.0
|
141 |
-
139,200.0
|
142 |
-
140,188.0
|
143 |
-
141,200.0
|
144 |
-
142,199.0
|
145 |
-
143,200.0
|
146 |
-
144,200.0
|
147 |
-
145,200.0
|
148 |
-
146,200.0
|
149 |
-
147,200.0
|
150 |
-
148,200.0
|
151 |
-
149,200.0
|
152 |
-
150,200.0
|
153 |
-
151,200.0
|
154 |
-
152,200.0
|
155 |
-
153,200.0
|
156 |
-
154,200.0
|
157 |
-
155,200.0
|
158 |
-
156,200.0
|
159 |
-
157,200.0
|
160 |
-
158,200.0
|
161 |
-
159,200.0
|
162 |
-
160,200.0
|
163 |
-
161,200.0
|
164 |
-
162,200.0
|
165 |
-
163,200.0
|
166 |
-
164,200.0
|
167 |
-
165,200.0
|
168 |
-
166,200.0
|
169 |
-
167,200.0
|
170 |
-
168,200.0
|
171 |
-
169,200.0
|
172 |
-
170,200.0
|
173 |
-
171,200.0
|
174 |
-
172,200.0
|
175 |
-
173,200.0
|
176 |
-
174,200.0
|
177 |
-
175,200.0
|
178 |
-
176,200.0
|
179 |
-
177,200.0
|
180 |
-
178,200.0
|
181 |
-
179,200.0
|
182 |
-
180,200.0
|
183 |
-
181,200.0
|
184 |
-
182,200.0
|
185 |
-
183,200.0
|
186 |
-
184,200.0
|
187 |
-
185,200.0
|
188 |
-
186,200.0
|
189 |
-
187,200.0
|
190 |
-
188,200.0
|
191 |
-
189,200.0
|
192 |
-
190,200.0
|
193 |
-
191,200.0
|
194 |
-
192,200.0
|
195 |
-
193,200.0
|
196 |
-
194,200.0
|
197 |
-
195,200.0
|
198 |
-
196,200.0
|
199 |
-
197,200.0
|
200 |
-
198,200.0
|
201 |
-
199,200.0
|
202 |
-
200,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/config.yaml
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PER_DQN
|
3 |
-
device: cpu
|
4 |
-
env_name: gym
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_PER_DQN
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
mp_backend: ray
|
12 |
-
n_workers: 2
|
13 |
-
new_step_api: true
|
14 |
-
render: false
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
-
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 250
|
21 |
-
wrapper: null
|
22 |
-
algo_cfg:
|
23 |
-
batch_size: 64
|
24 |
-
buffer_size: 100000
|
25 |
-
epsilon_decay: 500
|
26 |
-
epsilon_end: 0.01
|
27 |
-
epsilon_start: 0.95
|
28 |
-
gamma: 0.99
|
29 |
-
hidden_dim: 256
|
30 |
-
lr: 0.0001
|
31 |
-
per_alpha: 0.6
|
32 |
-
per_beta: 0.4
|
33 |
-
per_beta_annealing: 0.001
|
34 |
-
per_epsilon: 0.01
|
35 |
-
target_update: 4
|
36 |
-
value_layers:
|
37 |
-
- activation: relu
|
38 |
-
layer_dim:
|
39 |
-
- n_states
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: relu
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- 256
|
46 |
-
layer_type: linear
|
47 |
-
- activation: none
|
48 |
-
layer_dim:
|
49 |
-
- 256
|
50 |
-
- n_actions
|
51 |
-
layer_type: linear
|
52 |
-
env_cfg:
|
53 |
-
id: CartPole-v1
|
54 |
-
new_step_api: true
|
55 |
-
render_mode: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/logs/log.txt
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
2023-04-15 21:57:38 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-15 21:57:38 - r - INFO: - ================================================================================
|
3 |
-
2023-04-15 21:57:38 - r - INFO: - Name Value Type
|
4 |
-
2023-04-15 21:57:38 - r - INFO: - env_name gym <class 'str'>
|
5 |
-
2023-04-15 21:57:38 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-15 21:57:38 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-15 21:57:38 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-15 21:57:38 - r - INFO: - render_mode None <class 'str'>
|
9 |
-
2023-04-15 21:57:38 - r - INFO: - algo_name PER_DQN <class 'str'>
|
10 |
-
2023-04-15 21:57:38 - r - INFO: - mode train <class 'str'>
|
11 |
-
2023-04-15 21:57:38 - r - INFO: - mp_backend ray <class 'str'>
|
12 |
-
2023-04-15 21:57:38 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-15 21:57:38 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-15 21:57:38 - r - INFO: - train_eps 250 <class 'int'>
|
15 |
-
2023-04-15 21:57:38 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-15 21:57:38 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-15 21:57:38 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-15 21:57:38 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-15 21:57:38 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
20 |
-
2023-04-15 21:57:38 - r - INFO: - load_path Train_CartPole-v1_PER_DQN <class 'str'>
|
21 |
-
2023-04-15 21:57:38 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-15 21:57:38 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-15 21:57:38 - r - INFO: - n_workers 2 <class 'int'>
|
24 |
-
2023-04-15 21:57:38 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-15 21:57:38 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-15 21:57:38 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-15 21:57:38 - r - INFO: - hidden_dim 256 <class 'int'>
|
28 |
-
2023-04-15 21:57:38 - r - INFO: - gamma 0.99 <class 'float'>
|
29 |
-
2023-04-15 21:57:38 - r - INFO: - lr 0.0001 <class 'float'>
|
30 |
-
2023-04-15 21:57:38 - r - INFO: - buffer_size 100000 <class 'int'>
|
31 |
-
2023-04-15 21:57:38 - r - INFO: - per_alpha 0.6 <class 'float'>
|
32 |
-
2023-04-15 21:57:38 - r - INFO: - per_beta 0.4 <class 'float'>
|
33 |
-
2023-04-15 21:57:38 - r - INFO: - per_beta_annealing 0.001 <class 'float'>
|
34 |
-
2023-04-15 21:57:38 - r - INFO: - per_epsilon 0.01 <class 'float'>
|
35 |
-
2023-04-15 21:57:38 - r - INFO: - batch_size 64 <class 'int'>
|
36 |
-
2023-04-15 21:57:38 - r - INFO: - target_update 4 <class 'int'>
|
37 |
-
2023-04-15 21:57:38 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
38 |
-
2023-04-15 21:57:38 - r - INFO: - id CartPole-v1 <class 'str'>
|
39 |
-
2023-04-15 21:57:38 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738 <class 'str'>
|
40 |
-
2023-04-15 21:57:38 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/results <class 'str'>
|
41 |
-
2023-04-15 21:57:38 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/logs <class 'str'>
|
42 |
-
2023-04-15 21:57:38 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/traj <class 'str'>
|
43 |
-
2023-04-15 21:57:38 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/videos <class 'str'>
|
44 |
-
2023-04-15 21:57:38 - r - INFO: - ================================================================================
|
45 |
-
2023-04-15 21:57:40 - r - INFO: - n_states: 4, n_actions: 2
|
46 |
-
2023-04-15 21:57:40 - r - INFO: - Start training!
|
47 |
-
2023-04-15 21:57:40 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu
|
48 |
-
2023-04-15 22:00:44 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0efe3ec576afef2311748067e61af0fe6c939f7a2c2a1500001987a5d0092ce3
|
3 |
-
size 272407
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/learning_curve.png
DELETED
Binary file (52.8 kB)
|
|
ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/res.csv
DELETED
@@ -1,251 +0,0 @@
|
|
1 |
-
episodes,rewards
|
2 |
-
0,18.0
|
3 |
-
1,18.0
|
4 |
-
2,39.0
|
5 |
-
3,28.0
|
6 |
-
4,15.0
|
7 |
-
5,14.0
|
8 |
-
6,39.0
|
9 |
-
7,52.0
|
10 |
-
8,36.0
|
11 |
-
9,28.0
|
12 |
-
10,13.0
|
13 |
-
11,16.0
|
14 |
-
12,20.0
|
15 |
-
13,19.0
|
16 |
-
14,31.0
|
17 |
-
15,11.0
|
18 |
-
16,10.0
|
19 |
-
17,22.0
|
20 |
-
18,23.0
|
21 |
-
19,16.0
|
22 |
-
20,11.0
|
23 |
-
21,12.0
|
24 |
-
22,12.0
|
25 |
-
23,12.0
|
26 |
-
24,16.0
|
27 |
-
25,14.0
|
28 |
-
26,16.0
|
29 |
-
27,12.0
|
30 |
-
28,21.0
|
31 |
-
29,25.0
|
32 |
-
30,9.0
|
33 |
-
31,10.0
|
34 |
-
32,9.0
|
35 |
-
33,41.0
|
36 |
-
34,22.0
|
37 |
-
35,19.0
|
38 |
-
36,13.0
|
39 |
-
37,12.0
|
40 |
-
38,16.0
|
41 |
-
39,13.0
|
42 |
-
40,13.0
|
43 |
-
41,9.0
|
44 |
-
42,11.0
|
45 |
-
43,13.0
|
46 |
-
44,11.0
|
47 |
-
45,11.0
|
48 |
-
46,11.0
|
49 |
-
47,11.0
|
50 |
-
48,10.0
|
51 |
-
49,11.0
|
52 |
-
50,10.0
|
53 |
-
51,14.0
|
54 |
-
52,12.0
|
55 |
-
53,9.0
|
56 |
-
54,10.0
|
57 |
-
55,9.0
|
58 |
-
56,10.0
|
59 |
-
57,10.0
|
60 |
-
58,12.0
|
61 |
-
59,9.0
|
62 |
-
60,10.0
|
63 |
-
61,9.0
|
64 |
-
62,11.0
|
65 |
-
63,13.0
|
66 |
-
64,10.0
|
67 |
-
65,12.0
|
68 |
-
66,15.0
|
69 |
-
67,9.0
|
70 |
-
68,11.0
|
71 |
-
69,10.0
|
72 |
-
70,10.0
|
73 |
-
71,9.0
|
74 |
-
72,10.0
|
75 |
-
73,9.0
|
76 |
-
74,11.0
|
77 |
-
75,9.0
|
78 |
-
76,10.0
|
79 |
-
77,9.0
|
80 |
-
78,9.0
|
81 |
-
79,11.0
|
82 |
-
80,11.0
|
83 |
-
81,10.0
|
84 |
-
82,12.0
|
85 |
-
83,29.0
|
86 |
-
84,14.0
|
87 |
-
85,11.0
|
88 |
-
86,14.0
|
89 |
-
87,10.0
|
90 |
-
88,10.0
|
91 |
-
89,15.0
|
92 |
-
90,18.0
|
93 |
-
91,16.0
|
94 |
-
92,15.0
|
95 |
-
93,17.0
|
96 |
-
94,12.0
|
97 |
-
95,70.0
|
98 |
-
96,27.0
|
99 |
-
97,23.0
|
100 |
-
98,115.0
|
101 |
-
99,77.0
|
102 |
-
100,34.0
|
103 |
-
101,25.0
|
104 |
-
102,18.0
|
105 |
-
103,24.0
|
106 |
-
104,19.0
|
107 |
-
105,29.0
|
108 |
-
106,33.0
|
109 |
-
107,77.0
|
110 |
-
108,44.0
|
111 |
-
109,35.0
|
112 |
-
110,51.0
|
113 |
-
111,31.0
|
114 |
-
112,53.0
|
115 |
-
113,28.0
|
116 |
-
114,33.0
|
117 |
-
115,47.0
|
118 |
-
116,69.0
|
119 |
-
117,30.0
|
120 |
-
118,30.0
|
121 |
-
119,59.0
|
122 |
-
120,41.0
|
123 |
-
121,33.0
|
124 |
-
122,82.0
|
125 |
-
123,58.0
|
126 |
-
124,31.0
|
127 |
-
125,40.0
|
128 |
-
126,38.0
|
129 |
-
127,57.0
|
130 |
-
128,34.0
|
131 |
-
129,47.0
|
132 |
-
130,36.0
|
133 |
-
131,32.0
|
134 |
-
132,38.0
|
135 |
-
133,37.0
|
136 |
-
134,57.0
|
137 |
-
135,33.0
|
138 |
-
136,52.0
|
139 |
-
137,72.0
|
140 |
-
138,55.0
|
141 |
-
139,88.0
|
142 |
-
140,50.0
|
143 |
-
141,35.0
|
144 |
-
142,49.0
|
145 |
-
143,35.0
|
146 |
-
144,54.0
|
147 |
-
145,39.0
|
148 |
-
146,34.0
|
149 |
-
147,47.0
|
150 |
-
148,34.0
|
151 |
-
149,61.0
|
152 |
-
150,39.0
|
153 |
-
151,54.0
|
154 |
-
152,69.0
|
155 |
-
153,72.0
|
156 |
-
154,65.0
|
157 |
-
155,51.0
|
158 |
-
156,101.0
|
159 |
-
157,40.0
|
160 |
-
158,49.0
|
161 |
-
159,65.0
|
162 |
-
160,43.0
|
163 |
-
161,47.0
|
164 |
-
162,154.0
|
165 |
-
163,88.0
|
166 |
-
164,99.0
|
167 |
-
165,72.0
|
168 |
-
166,152.0
|
169 |
-
167,53.0
|
170 |
-
168,74.0
|
171 |
-
169,87.0
|
172 |
-
170,62.0
|
173 |
-
171,104.0
|
174 |
-
172,80.0
|
175 |
-
173,113.0
|
176 |
-
174,75.0
|
177 |
-
175,200.0
|
178 |
-
176,69.0
|
179 |
-
177,200.0
|
180 |
-
178,200.0
|
181 |
-
179,200.0
|
182 |
-
180,130.0
|
183 |
-
181,200.0
|
184 |
-
182,150.0
|
185 |
-
183,191.0
|
186 |
-
184,200.0
|
187 |
-
185,200.0
|
188 |
-
186,200.0
|
189 |
-
187,196.0
|
190 |
-
188,175.0
|
191 |
-
189,200.0
|
192 |
-
190,200.0
|
193 |
-
191,200.0
|
194 |
-
192,200.0
|
195 |
-
193,200.0
|
196 |
-
194,200.0
|
197 |
-
195,200.0
|
198 |
-
196,200.0
|
199 |
-
197,200.0
|
200 |
-
198,200.0
|
201 |
-
199,200.0
|
202 |
-
200,197.0
|
203 |
-
201,200.0
|
204 |
-
202,200.0
|
205 |
-
203,200.0
|
206 |
-
204,200.0
|
207 |
-
205,200.0
|
208 |
-
206,200.0
|
209 |
-
207,200.0
|
210 |
-
208,200.0
|
211 |
-
209,200.0
|
212 |
-
210,200.0
|
213 |
-
211,200.0
|
214 |
-
212,200.0
|
215 |
-
213,200.0
|
216 |
-
214,200.0
|
217 |
-
215,200.0
|
218 |
-
216,200.0
|
219 |
-
217,200.0
|
220 |
-
218,200.0
|
221 |
-
219,200.0
|
222 |
-
220,200.0
|
223 |
-
221,200.0
|
224 |
-
222,200.0
|
225 |
-
223,200.0
|
226 |
-
224,200.0
|
227 |
-
225,200.0
|
228 |
-
226,200.0
|
229 |
-
227,200.0
|
230 |
-
228,200.0
|
231 |
-
229,200.0
|
232 |
-
230,200.0
|
233 |
-
231,200.0
|
234 |
-
232,200.0
|
235 |
-
233,200.0
|
236 |
-
234,200.0
|
237 |
-
235,200.0
|
238 |
-
236,200.0
|
239 |
-
237,200.0
|
240 |
-
238,200.0
|
241 |
-
239,200.0
|
242 |
-
240,200.0
|
243 |
-
241,200.0
|
244 |
-
242,200.0
|
245 |
-
243,200.0
|
246 |
-
244,200.0
|
247 |
-
245,200.0
|
248 |
-
246,200.0
|
249 |
-
247,200.0
|
250 |
-
248,200.0
|
251 |
-
249,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/config.yaml
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DQN
|
3 |
-
collect_traj: true
|
4 |
-
device: cpu
|
5 |
-
env_name: gym
|
6 |
-
load_checkpoint: false
|
7 |
-
load_model_step: best
|
8 |
-
load_path: Train_single_CartPole-v1_DQN_20230515-211721
|
9 |
-
max_episode: 100
|
10 |
-
max_step: 200
|
11 |
-
mode: train
|
12 |
-
model_save_fre: 500
|
13 |
-
mp_backend: ray
|
14 |
-
n_workers: 2
|
15 |
-
online_eval: true
|
16 |
-
online_eval_episode: 10
|
17 |
-
save_fig: true
|
18 |
-
seed: 1
|
19 |
-
show_fig: false
|
20 |
-
algo_cfg:
|
21 |
-
batch_size: 64
|
22 |
-
buffer_size: 100000
|
23 |
-
buffer_type: REPLAY_QUE
|
24 |
-
epsilon_decay: 500
|
25 |
-
epsilon_end: 0.01
|
26 |
-
epsilon_start: 0.95
|
27 |
-
gamma: 0.95
|
28 |
-
lr: 0.0001
|
29 |
-
target_update: 4
|
30 |
-
value_layers:
|
31 |
-
- activation: relu
|
32 |
-
layer_dim:
|
33 |
-
- 256
|
34 |
-
layer_type: linear
|
35 |
-
- activation: relu
|
36 |
-
layer_dim:
|
37 |
-
- 256
|
38 |
-
layer_type: linear
|
39 |
-
env_cfg:
|
40 |
-
id: CartPole-v1
|
41 |
-
ignore_params:
|
42 |
-
- wrapper
|
43 |
-
- ignore_params
|
44 |
-
render_mode: null
|
45 |
-
wrapper: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/logs/log.txt
DELETED
@@ -1,166 +0,0 @@
|
|
1 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - General Configs:
|
2 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
|
3 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type
|
4 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - algo_name DQN <class 'str'>
|
6 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - mode train <class 'str'>
|
7 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - collect_traj 1 <class 'bool'>
|
8 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - mp_backend ray <class 'str'>
|
9 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
10 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - device cpu <class 'str'>
|
12 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - max_episode 100 <class 'int'>
|
13 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
14 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
17 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
|
18 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - show_fig 0 <class 'bool'>
|
19 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - save_fig 1 <class 'bool'>
|
20 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
21 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
22 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
|
23 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - Algo Configs:
|
24 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
|
25 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type
|
26 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
27 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
28 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
29 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
|
30 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
31 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
32 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
33 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
34 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
35 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
36 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
|
37 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - Env Configs:
|
38 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
|
39 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type
|
40 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
41 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - render_mode None <class 'str'>
|
42 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - wrapper None <class 'str'>
|
43 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
44 |
-
2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================
|
45 |
-
2023-05-15 22:19:21 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
46 |
-
2023-05-15 22:19:24 - RayLog - INFO: - Worker 0 finished episode 0 with reward 12.0 in 12 steps
|
47 |
-
2023-05-15 22:19:24 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps
|
48 |
-
2023-05-15 22:19:24 - RayLog - INFO: - Worker 0 finished episode 1 with reward 21.0 in 21 steps
|
49 |
-
2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 3 with reward 18.0 in 18 steps
|
50 |
-
2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 2 with reward 32.0 in 32 steps
|
51 |
-
2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 5 with reward 13.0 in 13 steps
|
52 |
-
2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 4 with reward 23.0 in 23 steps
|
53 |
-
2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 6 with reward 9.0 in 9 steps
|
54 |
-
2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 7 with reward 12.0 in 12 steps
|
55 |
-
2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps
|
56 |
-
2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 10 with reward 17.0 in 17 steps
|
57 |
-
2023-05-15 22:19:26 - RayLog - INFO: - Worker 0 finished episode 9 with reward 19.0 in 19 steps
|
58 |
-
2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 11 with reward 9.0 in 9 steps
|
59 |
-
2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 13 with reward 14.0 in 14 steps
|
60 |
-
2023-05-15 22:19:26 - RayLog - INFO: - Worker 0 finished episode 12 with reward 25.0 in 25 steps
|
61 |
-
2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 14 with reward 13.0 in 13 steps
|
62 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 15 with reward 12.0 in 12 steps
|
63 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 16 with reward 13.0 in 13 steps
|
64 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 17 with reward 17.0 in 17 steps
|
65 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 18 with reward 9.0 in 9 steps
|
66 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 19 with reward 13.0 in 13 steps
|
67 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 20 with reward 11.0 in 11 steps
|
68 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 21 with reward 11.0 in 11 steps
|
69 |
-
2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 22 with reward 11.0 in 11 steps
|
70 |
-
2023-05-15 22:19:28 - RayLog - INFO: - Worker 1 finished episode 24 with reward 11.0 in 11 steps
|
71 |
-
2023-05-15 22:19:28 - RayLog - INFO: - Worker 1 finished episode 25 with reward 9.0 in 9 steps
|
72 |
-
2023-05-15 22:19:28 - RayLog - INFO: - Worker 0 finished episode 23 with reward 23.0 in 23 steps
|
73 |
-
2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 26 with reward 15.0 in 15 steps
|
74 |
-
2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 28 with reward 16.0 in 16 steps
|
75 |
-
2023-05-15 22:19:29 - RayLog - INFO: - Worker 0 finished episode 27 with reward 38.0 in 38 steps
|
76 |
-
2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 29 with reward 11.0 in 11 steps
|
77 |
-
2023-05-15 22:19:29 - RayLog - INFO: - Worker 0 finished episode 30 with reward 13.0 in 13 steps
|
78 |
-
2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 31 with reward 12.0 in 12 steps
|
79 |
-
2023-05-15 22:19:30 - RayLog - INFO: - Worker 1 finished episode 33 with reward 12.0 in 12 steps
|
80 |
-
2023-05-15 22:19:30 - RayLog - INFO: - Worker 0 finished episode 32 with reward 14.0 in 14 steps
|
81 |
-
2023-05-15 22:19:30 - RayLog - INFO: - Worker 0 finished episode 35 with reward 9.0 in 9 steps
|
82 |
-
2023-05-15 22:19:30 - RayLog - INFO: - Worker 1 finished episode 34 with reward 11.0 in 11 steps
|
83 |
-
2023-05-15 22:19:32 - RayLog - INFO: - update_step: 500, online_eval_reward: 9.000
|
84 |
-
2023-05-15 22:19:32 - RayLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
|
85 |
-
2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 36 with reward 11.0 in 11 steps
|
86 |
-
2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 37 with reward 14.0 in 14 steps
|
87 |
-
2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 38 with reward 12.0 in 12 steps
|
88 |
-
2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 39 with reward 13.0 in 13 steps
|
89 |
-
2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 40 with reward 11.0 in 11 steps
|
90 |
-
2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 41 with reward 10.0 in 10 steps
|
91 |
-
2023-05-15 22:19:33 - RayLog - INFO: - Worker 0 finished episode 42 with reward 11.0 in 11 steps
|
92 |
-
2023-05-15 22:19:33 - RayLog - INFO: - Worker 1 finished episode 43 with reward 10.0 in 10 steps
|
93 |
-
2023-05-15 22:19:33 - RayLog - INFO: - Worker 0 finished episode 44 with reward 10.0 in 10 steps
|
94 |
-
2023-05-15 22:19:33 - RayLog - INFO: - Worker 1 finished episode 45 with reward 21.0 in 21 steps
|
95 |
-
2023-05-15 22:19:34 - RayLog - INFO: - Worker 0 finished episode 46 with reward 36.0 in 36 steps
|
96 |
-
2023-05-15 22:19:34 - RayLog - INFO: - Worker 1 finished episode 47 with reward 30.0 in 30 steps
|
97 |
-
2023-05-15 22:19:34 - RayLog - INFO: - Worker 1 finished episode 49 with reward 19.0 in 19 steps
|
98 |
-
2023-05-15 22:19:34 - RayLog - INFO: - Worker 0 finished episode 48 with reward 28.0 in 28 steps
|
99 |
-
2023-05-15 22:19:35 - RayLog - INFO: - Worker 1 finished episode 50 with reward 17.0 in 17 steps
|
100 |
-
2023-05-15 22:19:35 - RayLog - INFO: - Worker 0 finished episode 51 with reward 28.0 in 28 steps
|
101 |
-
2023-05-15 22:19:35 - RayLog - INFO: - Worker 1 finished episode 52 with reward 23.0 in 23 steps
|
102 |
-
2023-05-15 22:19:36 - RayLog - INFO: - Worker 0 finished episode 53 with reward 46.0 in 46 steps
|
103 |
-
2023-05-15 22:19:36 - RayLog - INFO: - Worker 1 finished episode 54 with reward 37.0 in 37 steps
|
104 |
-
2023-05-15 22:19:36 - RayLog - INFO: - Worker 1 finished episode 56 with reward 27.0 in 27 steps
|
105 |
-
2023-05-15 22:19:37 - RayLog - INFO: - Worker 0 finished episode 55 with reward 56.0 in 56 steps
|
106 |
-
2023-05-15 22:19:37 - RayLog - INFO: - update_step: 1000, online_eval_reward: 94.000
|
107 |
-
2023-05-15 22:19:37 - RayLog - INFO: - current update step obtain a better online_eval_reward: 94.000, save the best model!
|
108 |
-
2023-05-15 22:19:37 - RayLog - INFO: - Worker 1 finished episode 57 with reward 35.0 in 35 steps
|
109 |
-
2023-05-15 22:19:38 - RayLog - INFO: - Worker 1 finished episode 59 with reward 29.0 in 29 steps
|
110 |
-
2023-05-15 22:19:38 - RayLog - INFO: - Worker 0 finished episode 58 with reward 65.0 in 65 steps
|
111 |
-
2023-05-15 22:19:39 - RayLog - INFO: - Worker 1 finished episode 60 with reward 37.0 in 37 steps
|
112 |
-
2023-05-15 22:19:39 - RayLog - INFO: - Worker 1 finished episode 62 with reward 34.0 in 34 steps
|
113 |
-
2023-05-15 22:19:40 - RayLog - INFO: - Worker 0 finished episode 61 with reward 70.0 in 70 steps
|
114 |
-
2023-05-15 22:19:40 - RayLog - INFO: - Worker 1 finished episode 63 with reward 39.0 in 39 steps
|
115 |
-
2023-05-15 22:19:41 - RayLog - INFO: - Worker 1 finished episode 65 with reward 35.0 in 35 steps
|
116 |
-
2023-05-15 22:19:41 - RayLog - INFO: - Worker 0 finished episode 64 with reward 55.0 in 55 steps
|
117 |
-
2023-05-15 22:19:42 - RayLog - INFO: - Worker 1 finished episode 66 with reward 37.0 in 37 steps
|
118 |
-
2023-05-15 22:19:42 - RayLog - INFO: - Worker 0 finished episode 67 with reward 53.0 in 53 steps
|
119 |
-
2023-05-15 22:19:42 - RayLog - INFO: - Worker 1 finished episode 68 with reward 32.0 in 32 steps
|
120 |
-
2023-05-15 22:19:42 - RayLog - INFO: - update_step: 1500, online_eval_reward: 57.000
|
121 |
-
2023-05-15 22:19:43 - RayLog - INFO: - Worker 1 finished episode 70 with reward 51.0 in 51 steps
|
122 |
-
2023-05-15 22:19:44 - RayLog - INFO: - Worker 0 finished episode 69 with reward 67.0 in 67 steps
|
123 |
-
2023-05-15 22:19:44 - RayLog - INFO: - Worker 1 finished episode 71 with reward 40.0 in 40 steps
|
124 |
-
2023-05-15 22:19:45 - RayLog - INFO: - Worker 0 finished episode 72 with reward 68.0 in 68 steps
|
125 |
-
2023-05-15 22:19:46 - RayLog - INFO: - Worker 1 finished episode 73 with reward 79.0 in 79 steps
|
126 |
-
2023-05-15 22:19:48 - RayLog - INFO: - update_step: 2000, online_eval_reward: 138.000
|
127 |
-
2023-05-15 22:19:48 - RayLog - INFO: - current update step obtain a better online_eval_reward: 138.000, save the best model!
|
128 |
-
2023-05-15 22:19:48 - RayLog - INFO: - Worker 0 finished episode 74 with reward 124.0 in 124 steps
|
129 |
-
2023-05-15 22:19:49 - RayLog - INFO: - Worker 1 finished episode 75 with reward 133.0 in 133 steps
|
130 |
-
2023-05-15 22:19:52 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps
|
131 |
-
2023-05-15 22:19:53 - RayLog - INFO: - update_step: 2500, online_eval_reward: 200.000
|
132 |
-
2023-05-15 22:19:53 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
133 |
-
2023-05-15 22:19:53 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps
|
134 |
-
2023-05-15 22:19:56 - RayLog - INFO: - Worker 0 finished episode 78 with reward 187.0 in 187 steps
|
135 |
-
2023-05-15 22:19:57 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps
|
136 |
-
2023-05-15 22:19:58 - RayLog - INFO: - update_step: 3000, online_eval_reward: 200.000
|
137 |
-
2023-05-15 22:20:00 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps
|
138 |
-
2023-05-15 22:20:02 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps
|
139 |
-
2023-05-15 22:20:04 - RayLog - INFO: - update_step: 3500, online_eval_reward: 165.000
|
140 |
-
2023-05-15 22:20:04 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps
|
141 |
-
2023-05-15 22:20:06 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps
|
142 |
-
2023-05-15 22:20:08 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps
|
143 |
-
2023-05-15 22:20:09 - RayLog - INFO: - update_step: 4000, online_eval_reward: 200.000
|
144 |
-
2023-05-15 22:20:10 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps
|
145 |
-
2023-05-15 22:20:12 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps
|
146 |
-
2023-05-15 22:20:14 - RayLog - INFO: - update_step: 4500, online_eval_reward: 200.000
|
147 |
-
2023-05-15 22:20:14 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps
|
148 |
-
2023-05-15 22:20:16 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps
|
149 |
-
2023-05-15 22:20:18 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps
|
150 |
-
2023-05-15 22:20:19 - RayLog - INFO: - update_step: 5000, online_eval_reward: 200.000
|
151 |
-
2023-05-15 22:20:20 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps
|
152 |
-
2023-05-15 22:20:22 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps
|
153 |
-
2023-05-15 22:20:24 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000
|
154 |
-
2023-05-15 22:20:24 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps
|
155 |
-
2023-05-15 22:20:26 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps
|
156 |
-
2023-05-15 22:20:28 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps
|
157 |
-
2023-05-15 22:20:29 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000
|
158 |
-
2023-05-15 22:20:30 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps
|
159 |
-
2023-05-15 22:20:32 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps
|
160 |
-
2023-05-15 22:20:34 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000
|
161 |
-
2023-05-15 22:20:34 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps
|
162 |
-
2023-05-15 22:20:37 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps
|
163 |
-
2023-05-15 22:20:38 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps
|
164 |
-
2023-05-15 22:20:40 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000
|
165 |
-
2023-05-15 22:20:40 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps
|
166 |
-
2023-05-15 22:20:43 - SimpleLog - INFO: - Finish training! total time consumed: 87.42s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1000
DELETED
Binary file (545 kB)
|
|
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1500
DELETED
Binary file (545 kB)
|
|
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2000
DELETED
Binary file (545 kB)
|
|
ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2500
DELETED
Binary file (545 kB)
|
|