diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/config.yaml b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/config.yaml deleted file mode 100644 index 91654883827265098bcc856e73636f35001bd9f4..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/config.yaml +++ /dev/null @@ -1,48 +0,0 @@ -general_cfg: - algo_name: PER_DQN - device: cuda - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_PER_DQN_20230331-225815 - max_steps: 200 - mode: test - new_step_api: true - render: false - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 100 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - per_alpha: 0.6 - per_beta: 0.4 - per_beta_annealing: 0.001 - per_epsilon: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/logs/log.txt b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/logs/log.txt deleted file mode 100644 index 2c2203b84e5cd69cc152d08d11073728d338cb4b..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/logs/log.txt +++ /dev/null @@ -1,54 +0,0 @@ -2023-03-31 23:14:42 - r - INFO: - Hyperparameters: -2023-03-31 23:14:42 - r - INFO: - ================================================================================ -2023-03-31 23:14:42 - r - INFO: - Name Value Type -2023-03-31 23:14:42 - r - INFO: - env_name CartPole-v1 -2023-03-31 23:14:42 - r - INFO: - new_step_api 1 -2023-03-31 23:14:42 - r - INFO: - wrapper None -2023-03-31 23:14:42 - r - INFO: - render 0 -2023-03-31 23:14:42 - r - INFO: - algo_name PER_DQN -2023-03-31 23:14:42 - r - INFO: - mode test -2023-03-31 23:14:42 - r - INFO: - seed 1 -2023-03-31 23:14:42 - r - INFO: - device cuda -2023-03-31 23:14:42 - r - INFO: - train_eps 100 -2023-03-31 23:14:42 - r - INFO: - test_eps 10 -2023-03-31 23:14:42 - r - INFO: - eval_eps 10 -2023-03-31 23:14:42 - r - INFO: - eval_per_episode 5 -2023-03-31 23:14:42 - r - INFO: - max_steps 200 -2023-03-31 23:14:42 - r - INFO: - load_checkpoint 1 -2023-03-31 23:14:42 - r - INFO: - load_path Train_CartPole-v1_PER_DQN_20230331-225815 -2023-03-31 23:14:42 - r - INFO: - show_fig 0 -2023-03-31 23:14:42 - r - INFO: - save_fig 1 -2023-03-31 23:14:42 - r - INFO: - epsilon_start 0.95 -2023-03-31 23:14:42 - r - INFO: - epsilon_end 0.01 -2023-03-31 23:14:42 - r - INFO: - epsilon_decay 500 -2023-03-31 23:14:42 - r - INFO: - hidden_dim 256 -2023-03-31 23:14:42 - r - INFO: - gamma 0.99 -2023-03-31 23:14:42 - r - INFO: - lr 0.0001 -2023-03-31 23:14:42 - r - INFO: - buffer_size 100000 -2023-03-31 23:14:42 - r - INFO: - per_alpha 0.6 -2023-03-31 23:14:42 - r - INFO: - per_beta 0.4 -2023-03-31 23:14:42 - r - INFO: - per_beta_annealing 0.001 -2023-03-31 23:14:42 - r - INFO: - per_epsilon 0.01 -2023-03-31 23:14:42 - r - INFO: - batch_size 64 -2023-03-31 23:14:42 - r - INFO: - target_update 4 -2023-03-31 23:14:42 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-03-31 23:14:42 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442 -2023-03-31 23:14:42 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/results -2023-03-31 23:14:42 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/logs -2023-03-31 23:14:42 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/traj -2023-03-31 23:14:42 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs -2023-03-31 23:14:42 - r - INFO: - ================================================================================ -2023-03-31 23:14:42 - r - INFO: - n_states: 4, n_actions: 2 -2023-03-31 23:14:43 - r - INFO: - Start testing! -2023-03-31 23:14:43 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda -2023-03-31 23:14:44 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2023-03-31 23:14:44 - r - INFO: - Finish testing! diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/models/checkpoint.pt b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/models/checkpoint.pt deleted file mode 100644 index 663b12f22263098d243896df93ba28f600d9a5fe..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c438616b97ca890557a9e9b1cd42decfc5decc64e5aee660d89158290e92683d -size 272471 diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/learning_curve.png b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/learning_curve.png deleted file mode 100644 index e6669f6e45a8470b88f8a09b7e3e7cee16f30d2c..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/learning_curve.png and /dev/null differ diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/res.csv b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/res.csv deleted file mode 100644 index cbbcf2eb2cccfce2f3060e96b3484890fe578ac1..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,200.0,200 -2,200.0,200 -3,200.0,200 -4,200.0,200 -5,200.0,200 -6,200.0,200 -7,200.0,200 -8,200.0,200 -9,200.0,200 diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs/events.out.tfevents.1680275682.DESKTOP-H34HQIQ.290376.0 b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs/events.out.tfevents.1680275682.DESKTOP-H34HQIQ.290376.0 deleted file mode 100644 index dca8765e122e7c8281d95e93e0c43e2faede67f4..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_20230331-231442/tb_logs/events.out.tfevents.1680275682.DESKTOP-H34HQIQ.290376.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5157a74a9c33c3bb078d1509c6bd0013251490c4413c68decd709494f31a8d60 -size 40 diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/config.yaml b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/config.yaml deleted file mode 100644 index eb754572a76f62b2f98f257e5b3d70cbfdd2bc31..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/config.yaml +++ /dev/null @@ -1,55 +0,0 @@ -general_cfg: - algo_name: PER_DQN - device: cpu - env_name: gym - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_gym_PER_DQN_20230415-215002 - max_steps: 200 - mode: test - mp_backend: mp - n_workers: 1 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 200 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - per_alpha: 0.6 - per_beta: 0.4 - per_beta_annealing: 0.001 - per_epsilon: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear -env_cfg: - id: CartPole-v1 - new_step_api: true - render_mode: null diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/logs/log.txt b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/logs/log.txt deleted file mode 100644 index d1217d2d110ec32b297fe04b77ca5858db432dbb..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/logs/log.txt +++ /dev/null @@ -1,58 +0,0 @@ -2023-04-15 21:51:47 - r - INFO: - Hyperparameters: -2023-04-15 21:51:47 - r - INFO: - ================================================================================ -2023-04-15 21:51:47 - r - INFO: - Name Value Type -2023-04-15 21:51:47 - r - INFO: - env_name gym -2023-04-15 21:51:47 - r - INFO: - new_step_api 1 -2023-04-15 21:51:47 - r - INFO: - wrapper None -2023-04-15 21:51:47 - r - INFO: - render 0 -2023-04-15 21:51:47 - r - INFO: - render_mode None -2023-04-15 21:51:47 - r - INFO: - algo_name PER_DQN -2023-04-15 21:51:47 - r - INFO: - mode test -2023-04-15 21:51:47 - r - INFO: - mp_backend mp -2023-04-15 21:51:47 - r - INFO: - seed 1 -2023-04-15 21:51:47 - r - INFO: - device cpu -2023-04-15 21:51:47 - r - INFO: - train_eps 200 -2023-04-15 21:51:47 - r - INFO: - test_eps 10 -2023-04-15 21:51:47 - r - INFO: - eval_eps 10 -2023-04-15 21:51:47 - r - INFO: - eval_per_episode 5 -2023-04-15 21:51:47 - r - INFO: - max_steps 200 -2023-04-15 21:51:47 - r - INFO: - load_checkpoint 1 -2023-04-15 21:51:47 - r - INFO: - load_path Train_gym_PER_DQN_20230415-215002 -2023-04-15 21:51:47 - r - INFO: - show_fig 0 -2023-04-15 21:51:47 - r - INFO: - save_fig 1 -2023-04-15 21:51:47 - r - INFO: - n_workers 1 -2023-04-15 21:51:47 - r - INFO: - epsilon_start 0.95 -2023-04-15 21:51:47 - r - INFO: - epsilon_end 0.01 -2023-04-15 21:51:47 - r - INFO: - epsilon_decay 500 -2023-04-15 21:51:47 - r - INFO: - hidden_dim 256 -2023-04-15 21:51:47 - r - INFO: - gamma 0.99 -2023-04-15 21:51:47 - r - INFO: - lr 0.0001 -2023-04-15 21:51:47 - r - INFO: - buffer_size 100000 -2023-04-15 21:51:47 - r - INFO: - per_alpha 0.6 -2023-04-15 21:51:47 - r - INFO: - per_beta 0.4 -2023-04-15 21:51:47 - r - INFO: - per_beta_annealing 0.001 -2023-04-15 21:51:47 - r - INFO: - per_epsilon 0.01 -2023-04-15 21:51:47 - r - INFO: - batch_size 64 -2023-04-15 21:51:47 - r - INFO: - target_update 4 -2023-04-15 21:51:47 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-15 21:51:47 - r - INFO: - id CartPole-v1 -2023-04-15 21:51:47 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147 -2023-04-15 21:51:47 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/results -2023-04-15 21:51:47 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/logs -2023-04-15 21:51:47 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/traj -2023-04-15 21:51:47 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-215147/videos -2023-04-15 21:51:47 - r - INFO: - ================================================================================ -2023-04-15 21:51:47 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-15 21:51:47 - r - INFO: - Start testing! -2023-04-15 21:51:47 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu -2023-04-15 21:51:47 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2023-04-15 21:51:47 - r - INFO: - Finish testing! diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/models/checkpoint.pt b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/models/checkpoint.pt deleted file mode 100644 index 1f68746694a6ae30f06b2351e23e70ef9deafc34..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4251c7f141686d5391c5c933b493b27a184102ccf1596bead1dccaa6cc0bd9a -size 272407 diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/learning_curve.png b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/learning_curve.png deleted file mode 100644 index 3d1599fb5f661c59b34fc753553b016cbc1f9e75..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/learning_curve.png and /dev/null differ diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/res.csv b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/res.csv deleted file mode 100644 index cbbcf2eb2cccfce2f3060e96b3484890fe578ac1..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_mp_20230415-215147/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,200.0,200 -2,200.0,200 -3,200.0,200 -4,200.0,200 -5,200.0,200 -6,200.0,200 -7,200.0,200 -8,200.0,200 -9,200.0,200 diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/config.yaml b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/config.yaml deleted file mode 100644 index 51a2e800e946e5822c90c42de6eb847f8db744cc..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/config.yaml +++ /dev/null @@ -1,55 +0,0 @@ -general_cfg: - algo_name: PER_DQN - device: cpu - env_name: gym - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_PER_DQN_ray_20230415-215738 - max_steps: 200 - mode: test - mp_backend: ray - n_workers: 1 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 250 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - per_alpha: 0.6 - per_beta: 0.4 - per_beta_annealing: 0.001 - per_epsilon: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear -env_cfg: - id: CartPole-v1 - new_step_api: true - render_mode: null diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/logs/log.txt b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/logs/log.txt deleted file mode 100644 index d545c207f7126a8b9d0b68b035501e24fbc4c325..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/logs/log.txt +++ /dev/null @@ -1,58 +0,0 @@ -2023-04-15 22:05:40 - r - INFO: - Hyperparameters: -2023-04-15 22:05:40 - r - INFO: - ================================================================================ -2023-04-15 22:05:40 - r - INFO: - Name Value Type -2023-04-15 22:05:40 - r - INFO: - env_name gym -2023-04-15 22:05:40 - r - INFO: - new_step_api 1 -2023-04-15 22:05:40 - r - INFO: - wrapper None -2023-04-15 22:05:40 - r - INFO: - render 0 -2023-04-15 22:05:40 - r - INFO: - render_mode None -2023-04-15 22:05:40 - r - INFO: - algo_name PER_DQN -2023-04-15 22:05:40 - r - INFO: - mode test -2023-04-15 22:05:40 - r - INFO: - mp_backend ray -2023-04-15 22:05:40 - r - INFO: - seed 1 -2023-04-15 22:05:40 - r - INFO: - device cpu -2023-04-15 22:05:40 - r - INFO: - train_eps 250 -2023-04-15 22:05:40 - r - INFO: - test_eps 10 -2023-04-15 22:05:40 - r - INFO: - eval_eps 10 -2023-04-15 22:05:40 - r - INFO: - eval_per_episode 5 -2023-04-15 22:05:40 - r - INFO: - max_steps 200 -2023-04-15 22:05:40 - r - INFO: - load_checkpoint 1 -2023-04-15 22:05:40 - r - INFO: - load_path Train_CartPole-v1_PER_DQN_ray_20230415-215738 -2023-04-15 22:05:40 - r - INFO: - show_fig 0 -2023-04-15 22:05:40 - r - INFO: - save_fig 1 -2023-04-15 22:05:40 - r - INFO: - n_workers 1 -2023-04-15 22:05:40 - r - INFO: - epsilon_start 0.95 -2023-04-15 22:05:40 - r - INFO: - epsilon_end 0.01 -2023-04-15 22:05:40 - r - INFO: - epsilon_decay 500 -2023-04-15 22:05:40 - r - INFO: - hidden_dim 256 -2023-04-15 22:05:40 - r - INFO: - gamma 0.99 -2023-04-15 22:05:40 - r - INFO: - lr 0.0001 -2023-04-15 22:05:40 - r - INFO: - buffer_size 100000 -2023-04-15 22:05:40 - r - INFO: - per_alpha 0.6 -2023-04-15 22:05:40 - r - INFO: - per_beta 0.4 -2023-04-15 22:05:40 - r - INFO: - per_beta_annealing 0.001 -2023-04-15 22:05:40 - r - INFO: - per_epsilon 0.01 -2023-04-15 22:05:40 - r - INFO: - batch_size 64 -2023-04-15 22:05:40 - r - INFO: - target_update 4 -2023-04-15 22:05:40 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-15 22:05:40 - r - INFO: - id CartPole-v1 -2023-04-15 22:05:40 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540 -2023-04-15 22:05:40 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/results -2023-04-15 22:05:40 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/logs -2023-04-15 22:05:40 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/traj -2023-04-15 22:05:40 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_gym_PER_DQN_20230415-220540/videos -2023-04-15 22:05:40 - r - INFO: - ================================================================================ -2023-04-15 22:05:40 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-15 22:05:40 - r - INFO: - Start testing! -2023-04-15 22:05:40 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu -2023-04-15 22:05:40 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:40 - r - INFO: - Episode: 2/10, Reward: 199.000, Step: 199 -2023-04-15 22:05:40 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:40 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:40 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:40 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:40 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:40 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:41 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:41 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2023-04-15 22:05:41 - r - INFO: - Finish testing! diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/models/checkpoint.pt b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/models/checkpoint.pt deleted file mode 100644 index 75946dbb92769075d2500d272c9ae471ca3675eb..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0efe3ec576afef2311748067e61af0fe6c939f7a2c2a1500001987a5d0092ce3 -size 272407 diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/learning_curve.png b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/learning_curve.png deleted file mode 100644 index 04f725dd5c226a7baa0918e354e668324718445f..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/learning_curve.png and /dev/null differ diff --git a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/res.csv b/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/res.csv deleted file mode 100644 index 08489d2627812baa1fb01515831f31b697056794..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_CartPole-v1_PER_DQN_ray_20230415-220540/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,199.0,199 -2,200.0,200 -3,200.0,200 -4,200.0,200 -5,200.0,200 -6,200.0,200 -7,200.0,200 -8,200.0,200 -9,200.0,200 diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/logs/log.txt b/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/logs/log.txt deleted file mode 100644 index 57a99180430ba1e6fe64fd3bd09dfb7e904e299f..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/logs/log.txt +++ /dev/null @@ -1,57 +0,0 @@ -2023-05-15 21:19:26 - SimpleLog - INFO: - General Configs: -2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type -2023-05-15 21:19:26 - SimpleLog - INFO: - env_name gym -2023-05-15 21:19:26 - SimpleLog - INFO: - algo_name DQN -2023-05-15 21:19:26 - SimpleLog - INFO: - mode test -2023-05-15 21:19:26 - SimpleLog - INFO: - collect_traj 1 -2023-05-15 21:19:26 - SimpleLog - INFO: - mp_backend single -2023-05-15 21:19:26 - SimpleLog - INFO: - n_workers 1 -2023-05-15 21:19:26 - SimpleLog - INFO: - seed 1 -2023-05-15 21:19:26 - SimpleLog - INFO: - device cpu -2023-05-15 21:19:26 - SimpleLog - INFO: - max_episode 10 -2023-05-15 21:19:26 - SimpleLog - INFO: - max_step 200 -2023-05-15 21:19:26 - SimpleLog - INFO: - online_eval 1 -2023-05-15 21:19:26 - SimpleLog - INFO: - online_eval_episode 10 -2023-05-15 21:19:26 - SimpleLog - INFO: - load_checkpoint 1 -2023-05-15 21:19:26 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 -2023-05-15 21:19:26 - SimpleLog - INFO: - show_fig 0 -2023-05-15 21:19:26 - SimpleLog - INFO: - save_fig 1 -2023-05-15 21:19:26 - SimpleLog - INFO: - load_model_step best -2023-05-15 21:19:26 - SimpleLog - INFO: - model_save_fre 500 -2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:19:26 - SimpleLog - INFO: - Algo Configs: -2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type -2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_start 0.95 -2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_end 0.01 -2023-05-15 21:19:26 - SimpleLog - INFO: - epsilon_decay 500 -2023-05-15 21:19:26 - SimpleLog - INFO: - gamma 0.95 -2023-05-15 21:19:26 - SimpleLog - INFO: - lr 0.0001 -2023-05-15 21:19:26 - SimpleLog - INFO: - buffer_size 100000 -2023-05-15 21:19:26 - SimpleLog - INFO: - batch_size 64 -2023-05-15 21:19:26 - SimpleLog - INFO: - target_update 4 -2023-05-15 21:19:26 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] -2023-05-15 21:19:26 - SimpleLog - INFO: - buffer_type REPLAY_QUE -2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:19:26 - SimpleLog - INFO: - Env Configs: -2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:19:26 - SimpleLog - INFO: - Name Value Type -2023-05-15 21:19:26 - SimpleLog - INFO: - id CartPole-v1 -2023-05-15 21:19:26 - SimpleLog - INFO: - render_mode rgb_array -2023-05-15 21:19:26 - SimpleLog - INFO: - wrapper None -2023-05-15 21:19:26 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] -2023-05-15 21:19:26 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:19:26 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) -2023-05-15 21:19:26 - SimpleLog - INFO: - Start testing! -2023-05-15 21:19:26 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:30 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:30 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:31 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:32 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:32 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:33 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:33 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:19:33 - SimpleLog - INFO: - Finish testing! total time consumed: 7.28s diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/results/trajs_0.pkl b/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/results/trajs_0.pkl deleted file mode 100644 index 8d7e2a9ab7a8025ba357a61f7c7ba89c0cacb387..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/results/trajs_0.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fae5aa5ceb51833f761621229159f743bbc8e8a6766007136b3f2af48a1a001 -size 130746 diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/interact/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.0 b/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/interact/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.0 deleted file mode 100644 index a5eb29cf540657759cc2b373b27e7a5ce08946a0..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/interact/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f5348239a851416ae5ead991b86721f16f71d5541ce3e49671133bb408edec2 -size 1056 diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/model/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.1 b/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/model/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.1 deleted file mode 100644 index 06fc4e645766b00692487094cb074c5d3dead96d..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/tb_logs/model/events.out.tfevents.1684156766.DESKTOP-H34HQIQ.16348.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:778bf4752bfe3bce34855fa51be3e7fdeb15c8d13d02779f6ba433435fa2fdf4 -size 40 diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/videos/video.gif b/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/videos/video.gif deleted file mode 100644 index 2c5a427744aa816b8fe87013db7f1df59bf0a040..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/videos/video.gif +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a417ad8def4a6907872c3de9cd2883536a4b41e1c0d36bb98af3830d6eb76739 -size 131807 diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/config.yaml b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/config.yaml similarity index 60% rename from ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/config.yaml rename to ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/config.yaml index f21a1ee07dea61df45e6e3f90ed256afb9bca707..49449a60c4a3d17cbf4f539978166a30467ae663 100644 --- a/ClassControl/CartPole-v1/Test_single_CartPole-v1_DQN_20230515-211926/config.yaml +++ b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/config.yaml @@ -1,39 +1,43 @@ general_cfg: - algo_name: DQN - collect_traj: true - device: cpu + algo_name: PER_DQN + collect_traj: false + device: cuda env_name: gym load_checkpoint: true load_model_step: best - load_path: Train_single_CartPole-v1_DQN_20230515-211721 + load_path: Train_single_CartPole-v1_PER_DQN_20230518-232215 max_episode: 10 max_step: 200 mode: test model_save_fre: 500 mp_backend: single - n_workers: 1 + n_learners: 1 + n_workers: 2 online_eval: true online_eval_episode: 10 - save_fig: true seed: 1 - show_fig: false + share_buffer: true algo_cfg: batch_size: 64 buffer_size: 100000 - buffer_type: REPLAY_QUE - epsilon_decay: 500 + buffer_type: PER_QUE + epsilon_decay: 1000 epsilon_end: 0.01 epsilon_start: 0.95 - gamma: 0.95 + gamma: 0.99 lr: 0.0001 + per_alpha: 0.6 + per_beta: 0.4 + per_beta_annealing: 0.001 + per_epsilon: 0.01 target_update: 4 value_layers: - activation: relu - layer_dim: + layer_size: - 256 layer_type: linear - activation: relu - layer_dim: + layer_size: - 256 layer_type: linear env_cfg: @@ -41,5 +45,5 @@ env_cfg: ignore_params: - wrapper - ignore_params - render_mode: rgb_array + render_mode: null wrapper: null diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/logs/log.txt b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/logs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8f165aad3a00cb3c1c45007d06be5df3c0e6d92 --- /dev/null +++ b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/logs/log.txt @@ -0,0 +1,61 @@ +2023-05-18 23:23:30 - SimpleLog - INFO: - General Configs: +2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type +2023-05-18 23:23:30 - SimpleLog - INFO: - env_name gym +2023-05-18 23:23:30 - SimpleLog - INFO: - algo_name PER_DQN +2023-05-18 23:23:30 - SimpleLog - INFO: - mode test +2023-05-18 23:23:30 - SimpleLog - INFO: - device cuda +2023-05-18 23:23:30 - SimpleLog - INFO: - seed 1 +2023-05-18 23:23:30 - SimpleLog - INFO: - max_episode 10 +2023-05-18 23:23:30 - SimpleLog - INFO: - max_step 200 +2023-05-18 23:23:30 - SimpleLog - INFO: - collect_traj 0 +2023-05-18 23:23:30 - SimpleLog - INFO: - mp_backend single +2023-05-18 23:23:30 - SimpleLog - INFO: - n_workers 2 +2023-05-18 23:23:30 - SimpleLog - INFO: - n_learners 1 +2023-05-18 23:23:30 - SimpleLog - INFO: - share_buffer 1 +2023-05-18 23:23:30 - SimpleLog - INFO: - online_eval 1 +2023-05-18 23:23:30 - SimpleLog - INFO: - online_eval_episode 10 +2023-05-18 23:23:30 - SimpleLog - INFO: - model_save_fre 500 +2023-05-18 23:23:30 - SimpleLog - INFO: - load_checkpoint 1 +2023-05-18 23:23:30 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_PER_DQN_20230518-232215 +2023-05-18 23:23:30 - SimpleLog - INFO: - load_model_step best +2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:23:30 - SimpleLog - INFO: - Algo Configs: +2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type +2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_start 0.95 +2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_end 0.01 +2023-05-18 23:23:30 - SimpleLog - INFO: - epsilon_decay 1000 +2023-05-18 23:23:30 - SimpleLog - INFO: - gamma 0.99 +2023-05-18 23:23:30 - SimpleLog - INFO: - lr 0.0001 +2023-05-18 23:23:30 - SimpleLog - INFO: - buffer_type PER_QUE +2023-05-18 23:23:30 - SimpleLog - INFO: - buffer_size 100000 +2023-05-18 23:23:30 - SimpleLog - INFO: - per_alpha 0.6 +2023-05-18 23:23:30 - SimpleLog - INFO: - per_beta 0.4 +2023-05-18 23:23:30 - SimpleLog - INFO: - per_beta_annealing 0.001 +2023-05-18 23:23:30 - SimpleLog - INFO: - per_epsilon 0.01 +2023-05-18 23:23:30 - SimpleLog - INFO: - batch_size 64 +2023-05-18 23:23:30 - SimpleLog - INFO: - target_update 4 +2023-05-18 23:23:30 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] +2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:23:30 - SimpleLog - INFO: - Env Configs: +2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:23:30 - SimpleLog - INFO: - Name Value Type +2023-05-18 23:23:30 - SimpleLog - INFO: - id CartPole-v1 +2023-05-18 23:23:30 - SimpleLog - INFO: - render_mode None +2023-05-18 23:23:30 - SimpleLog - INFO: - wrapper None +2023-05-18 23:23:30 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] +2023-05-18 23:23:30 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:23:30 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) +2023-05-18 23:23:31 - SimpleLog - INFO: - Start testing! +2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:32 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:33 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:23:33 - SimpleLog - INFO: - Finish testing! total time consumed: 2.60s diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/interact/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.0 b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/interact/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.0 new file mode 100644 index 0000000000000000000000000000000000000000..7c5188e5f0d4708bd5d221252eb44fee880a46e6 --- /dev/null +++ b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/interact/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc3ae7a23e9f72498a6b6190433f9925fc02af039e03defef47a6fda915a140c +size 1056 diff --git a/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/model/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.1 b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/model/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.1 new file mode 100644 index 0000000000000000000000000000000000000000..d1c43bba7969b7680aaec8466912caca288be18e --- /dev/null +++ b/ClassControl/CartPole-v1/Test_single_CartPole-v1_PER_DQN_20230518-232330/tb_logs/model/events.out.tfevents.1684423410.DESKTOP-H34HQIQ.83344.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fea5313fdd99c138f919e97e5556cb1d9e4370727b560fe1eeb6469d023588 +size 40 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/config.yaml b/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/config.yaml deleted file mode 100644 index f9959e23bb897e5ce0aa6bc94d1c20dc032aed88..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/config.yaml +++ /dev/null @@ -1,48 +0,0 @@ -general_cfg: - algo_name: PER_DQN - device: cuda - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: Train_CartPole-v1_PER_DQN - max_steps: 200 - mode: train - new_step_api: true - render: false - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 200 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - per_alpha: 0.6 - per_beta: 0.4 - per_beta_annealing: 0.001 - per_epsilon: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/logs/log.txt b/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/logs/log.txt deleted file mode 100644 index 03b684d4aa892c1fea6f70f2282235b147994d89..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/logs/log.txt +++ /dev/null @@ -1,260 +0,0 @@ -2023-03-31 23:37:49 - r - INFO: - Hyperparameters: -2023-03-31 23:37:49 - r - INFO: - ================================================================================ -2023-03-31 23:37:49 - r - INFO: - Name Value Type -2023-03-31 23:37:49 - r - INFO: - env_name CartPole-v1 -2023-03-31 23:37:49 - r - INFO: - new_step_api 1 -2023-03-31 23:37:49 - r - INFO: - wrapper None -2023-03-31 23:37:49 - r - INFO: - render 0 -2023-03-31 23:37:49 - r - INFO: - algo_name PER_DQN -2023-03-31 23:37:49 - r - INFO: - mode train -2023-03-31 23:37:49 - r - INFO: - seed 1 -2023-03-31 23:37:49 - r - INFO: - device cuda -2023-03-31 23:37:49 - r - INFO: - train_eps 200 -2023-03-31 23:37:49 - r - INFO: - test_eps 10 -2023-03-31 23:37:49 - r - INFO: - eval_eps 10 -2023-03-31 23:37:49 - r - INFO: - eval_per_episode 5 -2023-03-31 23:37:49 - r - INFO: - max_steps 200 -2023-03-31 23:37:49 - r - INFO: - load_checkpoint 0 -2023-03-31 23:37:49 - r - INFO: - load_path Train_CartPole-v1_PER_DQN -2023-03-31 23:37:49 - r - INFO: - show_fig 0 -2023-03-31 23:37:49 - r - INFO: - save_fig 1 -2023-03-31 23:37:49 - r - INFO: - epsilon_start 0.95 -2023-03-31 23:37:49 - r - INFO: - epsilon_end 0.01 -2023-03-31 23:37:49 - r - INFO: - epsilon_decay 500 -2023-03-31 23:37:49 - r - INFO: - hidden_dim 256 -2023-03-31 23:37:49 - r - INFO: - gamma 0.99 -2023-03-31 23:37:49 - r - INFO: - lr 0.0001 -2023-03-31 23:37:49 - r - INFO: - buffer_size 100000 -2023-03-31 23:37:49 - r - INFO: - per_alpha 0.6 -2023-03-31 23:37:49 - r - INFO: - per_beta 0.4 -2023-03-31 23:37:49 - r - INFO: - per_beta_annealing 0.001 -2023-03-31 23:37:49 - r - INFO: - per_epsilon 0.01 -2023-03-31 23:37:49 - r - INFO: - batch_size 64 -2023-03-31 23:37:49 - r - INFO: - target_update 4 -2023-03-31 23:37:49 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-03-31 23:37:49 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749 -2023-03-31 23:37:49 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/results -2023-03-31 23:37:49 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/logs -2023-03-31 23:37:49 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/traj -2023-03-31 23:37:49 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-233749/tb_logs -2023-03-31 23:37:49 - r - INFO: - ================================================================================ -2023-03-31 23:37:49 - r - INFO: - n_states: 4, n_actions: 2 -2023-03-31 23:37:50 - r - INFO: - Start training! -2023-03-31 23:37:50 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda -2023-03-31 23:37:51 - r - INFO: - Episode: 1/200, Reward: 16.000, Step: 16 -2023-03-31 23:37:51 - r - INFO: - Episode: 2/200, Reward: 15.000, Step: 15 -2023-03-31 23:37:51 - r - INFO: - Episode: 3/200, Reward: 25.000, Step: 25 -2023-03-31 23:37:51 - r - INFO: - Episode: 4/200, Reward: 16.000, Step: 16 -2023-03-31 23:37:51 - r - INFO: - Episode: 5/200, Reward: 20.000, Step: 20 -2023-03-31 23:37:51 - r - INFO: - Current episode 5 has the best eval reward: 9.000 -2023-03-31 23:37:51 - r - INFO: - Episode: 6/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:51 - r - INFO: - Episode: 7/200, Reward: 24.000, Step: 24 -2023-03-31 23:37:51 - r - INFO: - Episode: 8/200, Reward: 20.000, Step: 20 -2023-03-31 23:37:51 - r - INFO: - Episode: 9/200, Reward: 20.000, Step: 20 -2023-03-31 23:37:51 - r - INFO: - Episode: 10/200, Reward: 25.000, Step: 25 -2023-03-31 23:37:51 - r - INFO: - Current episode 10 has the best eval reward: 9.100 -2023-03-31 23:37:51 - r - INFO: - Episode: 11/200, Reward: 9.000, Step: 9 -2023-03-31 23:37:51 - r - INFO: - Episode: 12/200, Reward: 23.000, Step: 23 -2023-03-31 23:37:51 - r - INFO: - Episode: 13/200, Reward: 14.000, Step: 14 -2023-03-31 23:37:51 - r - INFO: - Episode: 14/200, Reward: 12.000, Step: 12 -2023-03-31 23:37:51 - r - INFO: - Episode: 15/200, Reward: 11.000, Step: 11 -2023-03-31 23:37:51 - r - INFO: - Episode: 16/200, Reward: 17.000, Step: 17 -2023-03-31 23:37:51 - r - INFO: - Episode: 17/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:51 - r - INFO: - Episode: 18/200, Reward: 17.000, Step: 17 -2023-03-31 23:37:51 - r - INFO: - Episode: 19/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:51 - r - INFO: - Episode: 20/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:52 - r - INFO: - Episode: 21/200, Reward: 22.000, Step: 22 -2023-03-31 23:37:52 - r - INFO: - Episode: 22/200, Reward: 18.000, Step: 18 -2023-03-31 23:37:52 - r - INFO: - Episode: 23/200, Reward: 13.000, Step: 13 -2023-03-31 23:37:52 - r - INFO: - Episode: 24/200, Reward: 13.000, Step: 13 -2023-03-31 23:37:52 - r - INFO: - Episode: 25/200, Reward: 9.000, Step: 9 -2023-03-31 23:37:52 - r - INFO: - Current episode 25 has the best eval reward: 9.600 -2023-03-31 23:37:52 - r - INFO: - Episode: 26/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:52 - r - INFO: - Episode: 27/200, Reward: 13.000, Step: 13 -2023-03-31 23:37:52 - r - INFO: - Episode: 28/200, Reward: 11.000, Step: 11 -2023-03-31 23:37:52 - r - INFO: - Episode: 29/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:52 - r - INFO: - Episode: 30/200, Reward: 12.000, Step: 12 -2023-03-31 23:37:52 - r - INFO: - Episode: 31/200, Reward: 14.000, Step: 14 -2023-03-31 23:37:52 - r - INFO: - Episode: 32/200, Reward: 11.000, Step: 11 -2023-03-31 23:37:52 - r - INFO: - Episode: 33/200, Reward: 18.000, Step: 18 -2023-03-31 23:37:52 - r - INFO: - Episode: 34/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:52 - r - INFO: - Episode: 35/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:52 - r - INFO: - Episode: 36/200, Reward: 8.000, Step: 8 -2023-03-31 23:37:52 - r - INFO: - Episode: 37/200, Reward: 12.000, Step: 12 -2023-03-31 23:37:52 - r - INFO: - Episode: 38/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:52 - r - INFO: - Episode: 39/200, Reward: 11.000, Step: 11 -2023-03-31 23:37:52 - r - INFO: - Episode: 40/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:53 - r - INFO: - Episode: 41/200, Reward: 9.000, Step: 9 -2023-03-31 23:37:53 - r - INFO: - Episode: 42/200, Reward: 12.000, Step: 12 -2023-03-31 23:37:53 - r - INFO: - Episode: 43/200, Reward: 9.000, Step: 9 -2023-03-31 23:37:53 - r - INFO: - Episode: 44/200, Reward: 13.000, Step: 13 -2023-03-31 23:37:53 - r - INFO: - Episode: 45/200, Reward: 13.000, Step: 13 -2023-03-31 23:37:53 - r - INFO: - Episode: 46/200, Reward: 12.000, Step: 12 -2023-03-31 23:37:53 - r - INFO: - Episode: 47/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:53 - r - INFO: - Episode: 48/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:53 - r - INFO: - Episode: 49/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:53 - r - INFO: - Episode: 50/200, Reward: 13.000, Step: 13 -2023-03-31 23:37:53 - r - INFO: - Episode: 51/200, Reward: 10.000, Step: 10 -2023-03-31 23:37:53 - r - INFO: - Episode: 52/200, Reward: 15.000, Step: 15 -2023-03-31 23:37:53 - r - INFO: - Episode: 53/200, Reward: 18.000, Step: 18 -2023-03-31 23:37:53 - r - INFO: - Episode: 54/200, Reward: 18.000, Step: 18 -2023-03-31 23:37:53 - r - INFO: - Episode: 55/200, Reward: 16.000, Step: 16 -2023-03-31 23:37:53 - r - INFO: - Current episode 55 has the best eval reward: 28.000 -2023-03-31 23:37:53 - r - INFO: - Episode: 56/200, Reward: 47.000, Step: 47 -2023-03-31 23:37:54 - r - INFO: - Episode: 57/200, Reward: 87.000, Step: 87 -2023-03-31 23:37:54 - r - INFO: - Episode: 58/200, Reward: 20.000, Step: 20 -2023-03-31 23:37:54 - r - INFO: - Episode: 59/200, Reward: 47.000, Step: 47 -2023-03-31 23:37:54 - r - INFO: - Episode: 60/200, Reward: 17.000, Step: 17 -2023-03-31 23:37:54 - r - INFO: - Episode: 61/200, Reward: 37.000, Step: 37 -2023-03-31 23:37:54 - r - INFO: - Episode: 62/200, Reward: 43.000, Step: 43 -2023-03-31 23:37:54 - r - INFO: - Episode: 63/200, Reward: 33.000, Step: 33 -2023-03-31 23:37:55 - r - INFO: - Episode: 64/200, Reward: 18.000, Step: 18 -2023-03-31 23:37:55 - r - INFO: - Episode: 65/200, Reward: 29.000, Step: 29 -2023-03-31 23:37:55 - r - INFO: - Current episode 65 has the best eval reward: 30.700 -2023-03-31 23:37:55 - r - INFO: - Episode: 66/200, Reward: 30.000, Step: 30 -2023-03-31 23:37:55 - r - INFO: - Episode: 67/200, Reward: 23.000, Step: 23 -2023-03-31 23:37:55 - r - INFO: - Episode: 68/200, Reward: 26.000, Step: 26 -2023-03-31 23:37:55 - r - INFO: - Episode: 69/200, Reward: 18.000, Step: 18 -2023-03-31 23:37:55 - r - INFO: - Episode: 70/200, Reward: 20.000, Step: 20 -2023-03-31 23:37:55 - r - INFO: - Episode: 71/200, Reward: 26.000, Step: 26 -2023-03-31 23:37:55 - r - INFO: - Episode: 72/200, Reward: 16.000, Step: 16 -2023-03-31 23:37:55 - r - INFO: - Episode: 73/200, Reward: 23.000, Step: 23 -2023-03-31 23:37:56 - r - INFO: - Episode: 74/200, Reward: 30.000, Step: 30 -2023-03-31 23:37:56 - r - INFO: - Episode: 75/200, Reward: 23.000, Step: 23 -2023-03-31 23:37:56 - r - INFO: - Episode: 76/200, Reward: 26.000, Step: 26 -2023-03-31 23:37:56 - r - INFO: - Episode: 77/200, Reward: 34.000, Step: 34 -2023-03-31 23:37:56 - r - INFO: - Episode: 78/200, Reward: 29.000, Step: 29 -2023-03-31 23:37:56 - r - INFO: - Episode: 79/200, Reward: 32.000, Step: 32 -2023-03-31 23:37:56 - r - INFO: - Episode: 80/200, Reward: 23.000, Step: 23 -2023-03-31 23:37:57 - r - INFO: - Episode: 81/200, Reward: 32.000, Step: 32 -2023-03-31 23:37:57 - r - INFO: - Episode: 82/200, Reward: 72.000, Step: 72 -2023-03-31 23:37:57 - r - INFO: - Episode: 83/200, Reward: 105.000, Step: 105 -2023-03-31 23:37:58 - r - INFO: - Episode: 84/200, Reward: 63.000, Step: 63 -2023-03-31 23:37:58 - r - INFO: - Episode: 85/200, Reward: 119.000, Step: 119 -2023-03-31 23:37:59 - r - INFO: - Current episode 85 has the best eval reward: 86.500 -2023-03-31 23:37:59 - r - INFO: - Episode: 86/200, Reward: 52.000, Step: 52 -2023-03-31 23:37:59 - r - INFO: - Episode: 87/200, Reward: 155.000, Step: 155 -2023-03-31 23:38:00 - r - INFO: - Episode: 88/200, Reward: 79.000, Step: 79 -2023-03-31 23:38:00 - r - INFO: - Episode: 89/200, Reward: 44.000, Step: 44 -2023-03-31 23:38:00 - r - INFO: - Episode: 90/200, Reward: 140.000, Step: 140 -2023-03-31 23:38:01 - r - INFO: - Episode: 91/200, Reward: 86.000, Step: 86 -2023-03-31 23:38:01 - r - INFO: - Episode: 92/200, Reward: 183.000, Step: 183 -2023-03-31 23:38:02 - r - INFO: - Episode: 93/200, Reward: 112.000, Step: 112 -2023-03-31 23:38:03 - r - INFO: - Episode: 94/200, Reward: 190.000, Step: 190 -2023-03-31 23:38:03 - r - INFO: - Episode: 95/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:04 - r - INFO: - Current episode 95 has the best eval reward: 164.200 -2023-03-31 23:38:05 - r - INFO: - Episode: 96/200, Reward: 157.000, Step: 157 -2023-03-31 23:38:05 - r - INFO: - Episode: 97/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:06 - r - INFO: - Episode: 98/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:07 - r - INFO: - Episode: 99/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:08 - r - INFO: - Episode: 100/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:08 - r - INFO: - Current episode 100 has the best eval reward: 200.000 -2023-03-31 23:38:09 - r - INFO: - Episode: 101/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:10 - r - INFO: - Episode: 102/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:11 - r - INFO: - Episode: 103/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:12 - r - INFO: - Episode: 104/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:12 - r - INFO: - Episode: 105/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:13 - r - INFO: - Current episode 105 has the best eval reward: 200.000 -2023-03-31 23:38:14 - r - INFO: - Episode: 106/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:15 - r - INFO: - Episode: 107/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:16 - r - INFO: - Episode: 108/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:17 - r - INFO: - Episode: 109/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:17 - r - INFO: - Episode: 110/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:19 - r - INFO: - Episode: 111/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:20 - r - INFO: - Episode: 112/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:21 - r - INFO: - Episode: 113/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:22 - r - INFO: - Episode: 114/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:23 - r - INFO: - Episode: 115/200, Reward: 190.000, Step: 190 -2023-03-31 23:38:24 - r - INFO: - Episode: 116/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:25 - r - INFO: - Episode: 117/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:26 - r - INFO: - Episode: 118/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:27 - r - INFO: - Episode: 119/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:28 - r - INFO: - Episode: 120/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:28 - r - INFO: - Current episode 120 has the best eval reward: 200.000 -2023-03-31 23:38:29 - r - INFO: - Episode: 121/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:30 - r - INFO: - Episode: 122/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:31 - r - INFO: - Episode: 123/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:32 - r - INFO: - Episode: 124/200, Reward: 198.000, Step: 198 -2023-03-31 23:38:33 - r - INFO: - Episode: 125/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:35 - r - INFO: - Episode: 126/200, Reward: 188.000, Step: 188 -2023-03-31 23:38:36 - r - INFO: - Episode: 127/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:37 - r - INFO: - Episode: 128/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:38 - r - INFO: - Episode: 129/200, Reward: 175.000, Step: 175 -2023-03-31 23:38:39 - r - INFO: - Episode: 130/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:41 - r - INFO: - Episode: 131/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:42 - r - INFO: - Episode: 132/200, Reward: 172.000, Step: 172 -2023-03-31 23:38:43 - r - INFO: - Episode: 133/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:44 - r - INFO: - Episode: 134/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:45 - r - INFO: - Episode: 135/200, Reward: 179.000, Step: 179 -2023-03-31 23:38:46 - r - INFO: - Episode: 136/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:47 - r - INFO: - Episode: 137/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:49 - r - INFO: - Episode: 138/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:49 - r - INFO: - Episode: 139/200, Reward: 161.000, Step: 161 -2023-03-31 23:38:51 - r - INFO: - Episode: 140/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:52 - r - INFO: - Episode: 141/200, Reward: 150.000, Step: 150 -2023-03-31 23:38:53 - r - INFO: - Episode: 142/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:54 - r - INFO: - Episode: 143/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:55 - r - INFO: - Episode: 144/200, Reward: 170.000, Step: 170 -2023-03-31 23:38:56 - r - INFO: - Episode: 145/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:58 - r - INFO: - Episode: 146/200, Reward: 200.000, Step: 200 -2023-03-31 23:38:59 - r - INFO: - Episode: 147/200, Reward: 160.000, Step: 160 -2023-03-31 23:39:00 - r - INFO: - Episode: 148/200, Reward: 160.000, Step: 160 -2023-03-31 23:39:01 - r - INFO: - Episode: 149/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:02 - r - INFO: - Episode: 150/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:04 - r - INFO: - Episode: 151/200, Reward: 177.000, Step: 177 -2023-03-31 23:39:05 - r - INFO: - Episode: 152/200, Reward: 193.000, Step: 193 -2023-03-31 23:39:06 - r - INFO: - Episode: 153/200, Reward: 182.000, Step: 182 -2023-03-31 23:39:08 - r - INFO: - Episode: 154/200, Reward: 176.000, Step: 176 -2023-03-31 23:39:09 - r - INFO: - Episode: 155/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:11 - r - INFO: - Episode: 156/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:12 - r - INFO: - Episode: 157/200, Reward: 171.000, Step: 171 -2023-03-31 23:39:13 - r - INFO: - Episode: 158/200, Reward: 192.000, Step: 192 -2023-03-31 23:39:14 - r - INFO: - Episode: 159/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:15 - r - INFO: - Episode: 160/200, Reward: 179.000, Step: 179 -2023-03-31 23:39:17 - r - INFO: - Episode: 161/200, Reward: 177.000, Step: 177 -2023-03-31 23:39:18 - r - INFO: - Episode: 162/200, Reward: 199.000, Step: 199 -2023-03-31 23:39:19 - r - INFO: - Episode: 163/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:21 - r - INFO: - Episode: 164/200, Reward: 186.000, Step: 186 -2023-03-31 23:39:22 - r - INFO: - Episode: 165/200, Reward: 178.000, Step: 178 -2023-03-31 23:39:23 - r - INFO: - Episode: 166/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:25 - r - INFO: - Episode: 167/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:26 - r - INFO: - Episode: 168/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:27 - r - INFO: - Episode: 169/200, Reward: 179.000, Step: 179 -2023-03-31 23:39:29 - r - INFO: - Episode: 170/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:31 - r - INFO: - Episode: 171/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:32 - r - INFO: - Episode: 172/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:34 - r - INFO: - Episode: 173/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:35 - r - INFO: - Episode: 174/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:36 - r - INFO: - Episode: 175/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:37 - r - INFO: - Current episode 175 has the best eval reward: 200.000 -2023-03-31 23:39:38 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:40 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:41 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:43 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:44 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:45 - r - INFO: - Current episode 180 has the best eval reward: 200.000 -2023-03-31 23:39:46 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:47 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:49 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:50 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:52 - r - INFO: - Episode: 185/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:52 - r - INFO: - Current episode 185 has the best eval reward: 200.000 -2023-03-31 23:39:54 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:55 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:57 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200 -2023-03-31 23:39:58 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:00 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:00 - r - INFO: - Current episode 190 has the best eval reward: 200.000 -2023-03-31 23:40:02 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:03 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:05 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:06 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:08 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:09 - r - INFO: - Current episode 195 has the best eval reward: 200.000 -2023-03-31 23:40:10 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:13 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:17 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:24 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:29 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200 -2023-03-31 23:40:32 - r - INFO: - Current episode 200 has the best eval reward: 200.000 -2023-03-31 23:40:32 - r - INFO: - Finish training! diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/models/checkpoint.pt b/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/models/checkpoint.pt deleted file mode 100644 index 2cc0fd9a643ca074843f2f5fb68af45d3fb79a03..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4ba22dbfbe3211e48c45027f9c4efb9981cdf6ddbd972b57201fb68ca90d2fd -size 272471 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/learning_curve.png b/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/learning_curve.png deleted file mode 100644 index a135ef2b38b5e2b08e174562b1fc70a5b1f7600b..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/learning_curve.png and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/res.csv b/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/res.csv deleted file mode 100644 index 5fb086b610314270fadc3d50b77c9d9493b2abd2..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/results/res.csv +++ /dev/null @@ -1,201 +0,0 @@ -episodes,rewards,steps -0,16.0,16 -1,15.0,15 -2,25.0,25 -3,16.0,16 -4,20.0,20 -5,10.0,10 -6,24.0,24 -7,20.0,20 -8,20.0,20 -9,25.0,25 -10,9.0,9 -11,23.0,23 -12,14.0,14 -13,12.0,12 -14,11.0,11 -15,17.0,17 -16,10.0,10 -17,17.0,17 -18,10.0,10 -19,10.0,10 -20,22.0,22 -21,18.0,18 -22,13.0,13 -23,13.0,13 -24,9.0,9 -25,10.0,10 -26,13.0,13 -27,11.0,11 -28,10.0,10 -29,12.0,12 -30,14.0,14 -31,11.0,11 -32,18.0,18 -33,10.0,10 -34,10.0,10 -35,8.0,8 -36,12.0,12 -37,10.0,10 -38,11.0,11 -39,10.0,10 -40,9.0,9 -41,12.0,12 -42,9.0,9 -43,13.0,13 -44,13.0,13 -45,12.0,12 -46,10.0,10 -47,10.0,10 -48,10.0,10 -49,13.0,13 -50,10.0,10 -51,15.0,15 -52,18.0,18 -53,18.0,18 -54,16.0,16 -55,47.0,47 -56,87.0,87 -57,20.0,20 -58,47.0,47 -59,17.0,17 -60,37.0,37 -61,43.0,43 -62,33.0,33 -63,18.0,18 -64,29.0,29 -65,30.0,30 -66,23.0,23 -67,26.0,26 -68,18.0,18 -69,20.0,20 -70,26.0,26 -71,16.0,16 -72,23.0,23 -73,30.0,30 -74,23.0,23 -75,26.0,26 -76,34.0,34 -77,29.0,29 -78,32.0,32 -79,23.0,23 -80,32.0,32 -81,72.0,72 -82,105.0,105 -83,63.0,63 -84,119.0,119 -85,52.0,52 -86,155.0,155 -87,79.0,79 -88,44.0,44 -89,140.0,140 -90,86.0,86 -91,183.0,183 -92,112.0,112 -93,190.0,190 -94,200.0,200 -95,157.0,157 -96,200.0,200 -97,200.0,200 -98,200.0,200 -99,200.0,200 -100,200.0,200 -101,200.0,200 -102,200.0,200 -103,200.0,200 -104,200.0,200 -105,200.0,200 -106,200.0,200 -107,200.0,200 -108,200.0,200 -109,200.0,200 -110,200.0,200 -111,200.0,200 -112,200.0,200 -113,200.0,200 -114,190.0,190 -115,200.0,200 -116,200.0,200 -117,200.0,200 -118,200.0,200 -119,200.0,200 -120,200.0,200 -121,200.0,200 -122,200.0,200 -123,198.0,198 -124,200.0,200 -125,188.0,188 -126,200.0,200 -127,200.0,200 -128,175.0,175 -129,200.0,200 -130,200.0,200 -131,172.0,172 -132,200.0,200 -133,200.0,200 -134,179.0,179 -135,200.0,200 -136,200.0,200 -137,200.0,200 -138,161.0,161 -139,200.0,200 -140,150.0,150 -141,200.0,200 -142,200.0,200 -143,170.0,170 -144,200.0,200 -145,200.0,200 -146,160.0,160 -147,160.0,160 -148,200.0,200 -149,200.0,200 -150,177.0,177 -151,193.0,193 -152,182.0,182 -153,176.0,176 -154,200.0,200 -155,200.0,200 -156,171.0,171 -157,192.0,192 -158,200.0,200 -159,179.0,179 -160,177.0,177 -161,199.0,199 -162,200.0,200 -163,186.0,186 -164,178.0,178 -165,200.0,200 -166,200.0,200 -167,200.0,200 -168,179.0,179 -169,200.0,200 -170,200.0,200 -171,200.0,200 -172,200.0,200 -173,200.0,200 -174,200.0,200 -175,200.0,200 -176,200.0,200 -177,200.0,200 -178,200.0,200 -179,200.0,200 -180,200.0,200 -181,200.0,200 -182,200.0,200 -183,200.0,200 -184,200.0,200 -185,200.0,200 -186,200.0,200 -187,200.0,200 -188,200.0,200 -189,200.0,200 -190,200.0,200 -191,200.0,200 -192,200.0,200 -193,200.0,200 -194,200.0,200 -195,200.0,200 -196,200.0,200 -197,200.0,200 -198,200.0,200 -199,200.0,200 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/tb_logs/events.out.tfevents.1680277069.DESKTOP-H34HQIQ.305216.0 b/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/tb_logs/events.out.tfevents.1680277069.DESKTOP-H34HQIQ.305216.0 deleted file mode 100644 index 87c8dfa061d1e18c0ff30c2e3765c1547c4018bd..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PERQue_DQN_20230331-233749/tb_logs/events.out.tfevents.1680277069.DESKTOP-H34HQIQ.305216.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ead3d7b1b3efd92eecfb7f314b1922f372c92614db7819dbfa6e06770b12d37 -size 40 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/config.yaml b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/config.yaml deleted file mode 100644 index f9959e23bb897e5ce0aa6bc94d1c20dc032aed88..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/config.yaml +++ /dev/null @@ -1,48 +0,0 @@ -general_cfg: - algo_name: PER_DQN - device: cuda - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: Train_CartPole-v1_PER_DQN - max_steps: 200 - mode: train - new_step_api: true - render: false - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 200 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - per_alpha: 0.6 - per_beta: 0.4 - per_beta_annealing: 0.001 - per_epsilon: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/logs/log.txt b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/logs/log.txt deleted file mode 100644 index ebe5dc335588441dce4e01cb0981373f17a82a12..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/logs/log.txt +++ /dev/null @@ -1,267 +0,0 @@ -2023-03-31 22:58:15 - r - INFO: - Hyperparameters: -2023-03-31 22:58:15 - r - INFO: - ================================================================================ -2023-03-31 22:58:15 - r - INFO: - Name Value Type -2023-03-31 22:58:15 - r - INFO: - env_name CartPole-v1 -2023-03-31 22:58:15 - r - INFO: - new_step_api 1 -2023-03-31 22:58:15 - r - INFO: - wrapper None -2023-03-31 22:58:15 - r - INFO: - render 0 -2023-03-31 22:58:15 - r - INFO: - algo_name PER_DQN -2023-03-31 22:58:15 - r - INFO: - mode train -2023-03-31 22:58:15 - r - INFO: - seed 1 -2023-03-31 22:58:15 - r - INFO: - device cuda -2023-03-31 22:58:15 - r - INFO: - train_eps 200 -2023-03-31 22:58:15 - r - INFO: - test_eps 10 -2023-03-31 22:58:15 - r - INFO: - eval_eps 10 -2023-03-31 22:58:15 - r - INFO: - eval_per_episode 5 -2023-03-31 22:58:15 - r - INFO: - max_steps 200 -2023-03-31 22:58:15 - r - INFO: - load_checkpoint 0 -2023-03-31 22:58:15 - r - INFO: - load_path Train_CartPole-v1_PER_DQN -2023-03-31 22:58:15 - r - INFO: - show_fig 0 -2023-03-31 22:58:15 - r - INFO: - save_fig 1 -2023-03-31 22:58:15 - r - INFO: - epsilon_start 0.95 -2023-03-31 22:58:15 - r - INFO: - epsilon_end 0.01 -2023-03-31 22:58:15 - r - INFO: - epsilon_decay 500 -2023-03-31 22:58:15 - r - INFO: - hidden_dim 256 -2023-03-31 22:58:15 - r - INFO: - gamma 0.99 -2023-03-31 22:58:15 - r - INFO: - lr 0.0001 -2023-03-31 22:58:15 - r - INFO: - buffer_size 100000 -2023-03-31 22:58:15 - r - INFO: - per_alpha 0.6 -2023-03-31 22:58:15 - r - INFO: - per_beta 0.4 -2023-03-31 22:58:15 - r - INFO: - per_beta_annealing 0.001 -2023-03-31 22:58:15 - r - INFO: - per_epsilon 0.01 -2023-03-31 22:58:15 - r - INFO: - batch_size 64 -2023-03-31 22:58:15 - r - INFO: - target_update 4 -2023-03-31 22:58:15 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-03-31 22:58:15 - r - INFO: - task_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815 -2023-03-31 22:58:15 - r - INFO: - res_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/results -2023-03-31 22:58:15 - r - INFO: - log_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/logs -2023-03-31 22:58:15 - r - INFO: - traj_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/traj -2023-03-31 22:58:15 - r - INFO: - tb_dir C:\Users\24438\Desktop\joyrl-offline/tasks/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs -2023-03-31 22:58:15 - r - INFO: - ================================================================================ -2023-03-31 22:58:15 - r - INFO: - n_states: 4, n_actions: 2 -2023-03-31 22:58:16 - r - INFO: - Start training! -2023-03-31 22:58:16 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda -2023-03-31 22:58:17 - r - INFO: - Episode: 1/200, Reward: 15.000, Step: 15 -2023-03-31 22:58:17 - r - INFO: - Episode: 2/200, Reward: 29.000, Step: 29 -2023-03-31 22:58:17 - r - INFO: - Episode: 3/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:17 - r - INFO: - Episode: 4/200, Reward: 14.000, Step: 14 -2023-03-31 22:58:17 - r - INFO: - Episode: 5/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:17 - r - INFO: - Current episode 5 has the best eval reward: 9.600 -2023-03-31 22:58:17 - r - INFO: - Episode: 6/200, Reward: 39.000, Step: 39 -2023-03-31 22:58:18 - r - INFO: - Episode: 7/200, Reward: 35.000, Step: 35 -2023-03-31 22:58:18 - r - INFO: - Episode: 8/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:18 - r - INFO: - Episode: 9/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:18 - r - INFO: - Episode: 10/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:18 - r - INFO: - Episode: 11/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:18 - r - INFO: - Episode: 12/200, Reward: 34.000, Step: 34 -2023-03-31 22:58:18 - r - INFO: - Episode: 13/200, Reward: 15.000, Step: 15 -2023-03-31 22:58:18 - r - INFO: - Episode: 14/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:18 - r - INFO: - Episode: 15/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:19 - r - INFO: - Episode: 16/200, Reward: 19.000, Step: 19 -2023-03-31 22:58:19 - r - INFO: - Episode: 17/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:19 - r - INFO: - Episode: 18/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:19 - r - INFO: - Episode: 19/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:19 - r - INFO: - Episode: 20/200, Reward: 19.000, Step: 19 -2023-03-31 22:58:19 - r - INFO: - Current episode 20 has the best eval reward: 9.700 -2023-03-31 22:58:19 - r - INFO: - Episode: 21/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:19 - r - INFO: - Episode: 22/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:19 - r - INFO: - Episode: 23/200, Reward: 14.000, Step: 14 -2023-03-31 22:58:19 - r - INFO: - Episode: 24/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:19 - r - INFO: - Episode: 25/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:19 - r - INFO: - Episode: 26/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:19 - r - INFO: - Episode: 27/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:19 - r - INFO: - Episode: 28/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:19 - r - INFO: - Episode: 29/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:20 - r - INFO: - Episode: 30/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:20 - r - INFO: - Episode: 31/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:20 - r - INFO: - Episode: 32/200, Reward: 8.000, Step: 8 -2023-03-31 22:58:20 - r - INFO: - Episode: 33/200, Reward: 8.000, Step: 8 -2023-03-31 22:58:20 - r - INFO: - Episode: 34/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:20 - r - INFO: - Episode: 35/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:20 - r - INFO: - Episode: 36/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:20 - r - INFO: - Episode: 37/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:20 - r - INFO: - Episode: 38/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:20 - r - INFO: - Episode: 39/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:20 - r - INFO: - Episode: 40/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:20 - r - INFO: - Episode: 41/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:20 - r - INFO: - Episode: 42/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:20 - r - INFO: - Episode: 43/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:20 - r - INFO: - Episode: 44/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:20 - r - INFO: - Episode: 45/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:21 - r - INFO: - Current episode 45 has the best eval reward: 10.600 -2023-03-31 22:58:21 - r - INFO: - Episode: 46/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:21 - r - INFO: - Episode: 47/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:21 - r - INFO: - Episode: 48/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:21 - r - INFO: - Episode: 49/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:21 - r - INFO: - Episode: 50/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:21 - r - INFO: - Episode: 51/200, Reward: 18.000, Step: 18 -2023-03-31 22:58:21 - r - INFO: - Episode: 52/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:21 - r - INFO: - Episode: 53/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:21 - r - INFO: - Episode: 54/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:21 - r - INFO: - Episode: 55/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:21 - r - INFO: - Episode: 56/200, Reward: 8.000, Step: 8 -2023-03-31 22:58:21 - r - INFO: - Episode: 57/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:21 - r - INFO: - Episode: 58/200, Reward: 11.000, Step: 11 -2023-03-31 22:58:21 - r - INFO: - Episode: 59/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:22 - r - INFO: - Episode: 60/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:22 - r - INFO: - Episode: 61/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:22 - r - INFO: - Episode: 62/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:22 - r - INFO: - Episode: 63/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:22 - r - INFO: - Episode: 64/200, Reward: 8.000, Step: 8 -2023-03-31 22:58:22 - r - INFO: - Episode: 65/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:22 - r - INFO: - Episode: 66/200, Reward: 9.000, Step: 9 -2023-03-31 22:58:22 - r - INFO: - Episode: 67/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:22 - r - INFO: - Episode: 68/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:22 - r - INFO: - Episode: 69/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:22 - r - INFO: - Episode: 70/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:22 - r - INFO: - Current episode 70 has the best eval reward: 12.500 -2023-03-31 22:58:22 - r - INFO: - Episode: 71/200, Reward: 10.000, Step: 10 -2023-03-31 22:58:22 - r - INFO: - Episode: 72/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:22 - r - INFO: - Episode: 73/200, Reward: 20.000, Step: 20 -2023-03-31 22:58:23 - r - INFO: - Episode: 74/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:23 - r - INFO: - Episode: 75/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:23 - r - INFO: - Current episode 75 has the best eval reward: 13.000 -2023-03-31 22:58:23 - r - INFO: - Episode: 76/200, Reward: 15.000, Step: 15 -2023-03-31 22:58:23 - r - INFO: - Episode: 77/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:23 - r - INFO: - Episode: 78/200, Reward: 19.000, Step: 19 -2023-03-31 22:58:23 - r - INFO: - Episode: 79/200, Reward: 14.000, Step: 14 -2023-03-31 22:58:23 - r - INFO: - Episode: 80/200, Reward: 12.000, Step: 12 -2023-03-31 22:58:23 - r - INFO: - Current episode 80 has the best eval reward: 15.400 -2023-03-31 22:58:23 - r - INFO: - Episode: 81/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:24 - r - INFO: - Episode: 82/200, Reward: 14.000, Step: 14 -2023-03-31 22:58:24 - r - INFO: - Episode: 83/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:24 - r - INFO: - Episode: 84/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:24 - r - INFO: - Episode: 85/200, Reward: 14.000, Step: 14 -2023-03-31 22:58:24 - r - INFO: - Current episode 85 has the best eval reward: 16.000 -2023-03-31 22:58:24 - r - INFO: - Episode: 86/200, Reward: 18.000, Step: 18 -2023-03-31 22:58:24 - r - INFO: - Episode: 87/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:24 - r - INFO: - Episode: 88/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:24 - r - INFO: - Episode: 89/200, Reward: 13.000, Step: 13 -2023-03-31 22:58:25 - r - INFO: - Episode: 90/200, Reward: 21.000, Step: 21 -2023-03-31 22:58:25 - r - INFO: - Current episode 90 has the best eval reward: 18.800 -2023-03-31 22:58:25 - r - INFO: - Episode: 91/200, Reward: 17.000, Step: 17 -2023-03-31 22:58:25 - r - INFO: - Episode: 92/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:25 - r - INFO: - Episode: 93/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:25 - r - INFO: - Episode: 94/200, Reward: 22.000, Step: 22 -2023-03-31 22:58:25 - r - INFO: - Episode: 95/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:26 - r - INFO: - Current episode 95 has the best eval reward: 22.000 -2023-03-31 22:58:26 - r - INFO: - Episode: 96/200, Reward: 14.000, Step: 14 -2023-03-31 22:58:26 - r - INFO: - Episode: 97/200, Reward: 20.000, Step: 20 -2023-03-31 22:58:26 - r - INFO: - Episode: 98/200, Reward: 24.000, Step: 24 -2023-03-31 22:58:26 - r - INFO: - Episode: 99/200, Reward: 21.000, Step: 21 -2023-03-31 22:58:26 - r - INFO: - Episode: 100/200, Reward: 22.000, Step: 22 -2023-03-31 22:58:27 - r - INFO: - Episode: 101/200, Reward: 21.000, Step: 21 -2023-03-31 22:58:27 - r - INFO: - Episode: 102/200, Reward: 19.000, Step: 19 -2023-03-31 22:58:27 - r - INFO: - Episode: 103/200, Reward: 18.000, Step: 18 -2023-03-31 22:58:27 - r - INFO: - Episode: 104/200, Reward: 18.000, Step: 18 -2023-03-31 22:58:27 - r - INFO: - Episode: 105/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:27 - r - INFO: - Episode: 106/200, Reward: 16.000, Step: 16 -2023-03-31 22:58:27 - r - INFO: - Episode: 107/200, Reward: 19.000, Step: 19 -2023-03-31 22:58:28 - r - INFO: - Episode: 108/200, Reward: 18.000, Step: 18 -2023-03-31 22:58:28 - r - INFO: - Episode: 109/200, Reward: 21.000, Step: 21 -2023-03-31 22:58:28 - r - INFO: - Episode: 110/200, Reward: 24.000, Step: 24 -2023-03-31 22:58:28 - r - INFO: - Current episode 110 has the best eval reward: 23.300 -2023-03-31 22:58:28 - r - INFO: - Episode: 111/200, Reward: 24.000, Step: 24 -2023-03-31 22:58:28 - r - INFO: - Episode: 112/200, Reward: 27.000, Step: 27 -2023-03-31 22:58:29 - r - INFO: - Episode: 113/200, Reward: 35.000, Step: 35 -2023-03-31 22:58:29 - r - INFO: - Episode: 114/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:29 - r - INFO: - Episode: 115/200, Reward: 29.000, Step: 29 -2023-03-31 22:58:29 - r - INFO: - Current episode 115 has the best eval reward: 24.100 -2023-03-31 22:58:29 - r - INFO: - Episode: 116/200, Reward: 25.000, Step: 25 -2023-03-31 22:58:29 - r - INFO: - Episode: 117/200, Reward: 20.000, Step: 20 -2023-03-31 22:58:30 - r - INFO: - Episode: 118/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:30 - r - INFO: - Episode: 119/200, Reward: 21.000, Step: 21 -2023-03-31 22:58:30 - r - INFO: - Episode: 120/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:30 - r - INFO: - Current episode 120 has the best eval reward: 24.500 -2023-03-31 22:58:30 - r - INFO: - Episode: 121/200, Reward: 17.000, Step: 17 -2023-03-31 22:58:30 - r - INFO: - Episode: 122/200, Reward: 19.000, Step: 19 -2023-03-31 22:58:30 - r - INFO: - Episode: 123/200, Reward: 19.000, Step: 19 -2023-03-31 22:58:31 - r - INFO: - Episode: 124/200, Reward: 21.000, Step: 21 -2023-03-31 22:58:31 - r - INFO: - Episode: 125/200, Reward: 24.000, Step: 24 -2023-03-31 22:58:31 - r - INFO: - Current episode 125 has the best eval reward: 25.600 -2023-03-31 22:58:31 - r - INFO: - Episode: 126/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:31 - r - INFO: - Episode: 127/200, Reward: 22.000, Step: 22 -2023-03-31 22:58:31 - r - INFO: - Episode: 128/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:31 - r - INFO: - Episode: 129/200, Reward: 22.000, Step: 22 -2023-03-31 22:58:32 - r - INFO: - Episode: 130/200, Reward: 28.000, Step: 28 -2023-03-31 22:58:32 - r - INFO: - Current episode 130 has the best eval reward: 29.800 -2023-03-31 22:58:32 - r - INFO: - Episode: 131/200, Reward: 32.000, Step: 32 -2023-03-31 22:58:32 - r - INFO: - Episode: 132/200, Reward: 35.000, Step: 35 -2023-03-31 22:58:32 - r - INFO: - Episode: 133/200, Reward: 27.000, Step: 27 -2023-03-31 22:58:33 - r - INFO: - Episode: 134/200, Reward: 24.000, Step: 24 -2023-03-31 22:58:33 - r - INFO: - Episode: 135/200, Reward: 37.000, Step: 37 -2023-03-31 22:58:33 - r - INFO: - Current episode 135 has the best eval reward: 35.700 -2023-03-31 22:58:33 - r - INFO: - Episode: 136/200, Reward: 33.000, Step: 33 -2023-03-31 22:58:34 - r - INFO: - Episode: 137/200, Reward: 39.000, Step: 39 -2023-03-31 22:58:34 - r - INFO: - Episode: 138/200, Reward: 24.000, Step: 24 -2023-03-31 22:58:34 - r - INFO: - Episode: 139/200, Reward: 24.000, Step: 24 -2023-03-31 22:58:34 - r - INFO: - Episode: 140/200, Reward: 40.000, Step: 40 -2023-03-31 22:58:35 - r - INFO: - Current episode 140 has the best eval reward: 40.200 -2023-03-31 22:58:35 - r - INFO: - Episode: 141/200, Reward: 31.000, Step: 31 -2023-03-31 22:58:35 - r - INFO: - Episode: 142/200, Reward: 30.000, Step: 30 -2023-03-31 22:58:35 - r - INFO: - Episode: 143/200, Reward: 25.000, Step: 25 -2023-03-31 22:58:35 - r - INFO: - Episode: 144/200, Reward: 23.000, Step: 23 -2023-03-31 22:58:35 - r - INFO: - Episode: 145/200, Reward: 29.000, Step: 29 -2023-03-31 22:58:36 - r - INFO: - Current episode 145 has the best eval reward: 58.500 -2023-03-31 22:58:36 - r - INFO: - Episode: 146/200, Reward: 51.000, Step: 51 -2023-03-31 22:58:37 - r - INFO: - Episode: 147/200, Reward: 73.000, Step: 73 -2023-03-31 22:58:37 - r - INFO: - Episode: 148/200, Reward: 38.000, Step: 38 -2023-03-31 22:58:37 - r - INFO: - Episode: 149/200, Reward: 37.000, Step: 37 -2023-03-31 22:58:37 - r - INFO: - Episode: 150/200, Reward: 32.000, Step: 32 -2023-03-31 22:58:38 - r - INFO: - Episode: 151/200, Reward: 43.000, Step: 43 -2023-03-31 22:58:38 - r - INFO: - Episode: 152/200, Reward: 29.000, Step: 29 -2023-03-31 22:58:38 - r - INFO: - Episode: 153/200, Reward: 33.000, Step: 33 -2023-03-31 22:58:38 - r - INFO: - Episode: 154/200, Reward: 31.000, Step: 31 -2023-03-31 22:58:39 - r - INFO: - Episode: 155/200, Reward: 41.000, Step: 41 -2023-03-31 22:58:39 - r - INFO: - Episode: 156/200, Reward: 79.000, Step: 79 -2023-03-31 22:58:40 - r - INFO: - Episode: 157/200, Reward: 47.000, Step: 47 -2023-03-31 22:58:40 - r - INFO: - Episode: 158/200, Reward: 32.000, Step: 32 -2023-03-31 22:58:40 - r - INFO: - Episode: 159/200, Reward: 36.000, Step: 36 -2023-03-31 22:58:41 - r - INFO: - Episode: 160/200, Reward: 76.000, Step: 76 -2023-03-31 22:58:41 - r - INFO: - Current episode 160 has the best eval reward: 75.000 -2023-03-31 22:58:41 - r - INFO: - Episode: 161/200, Reward: 73.000, Step: 73 -2023-03-31 22:58:42 - r - INFO: - Episode: 162/200, Reward: 59.000, Step: 59 -2023-03-31 22:58:42 - r - INFO: - Episode: 163/200, Reward: 102.000, Step: 102 -2023-03-31 22:58:43 - r - INFO: - Episode: 164/200, Reward: 87.000, Step: 87 -2023-03-31 22:58:44 - r - INFO: - Episode: 165/200, Reward: 94.000, Step: 94 -2023-03-31 22:58:44 - r - INFO: - Current episode 165 has the best eval reward: 143.300 -2023-03-31 22:58:45 - r - INFO: - Episode: 166/200, Reward: 116.000, Step: 116 -2023-03-31 22:58:46 - r - INFO: - Episode: 167/200, Reward: 135.000, Step: 135 -2023-03-31 22:58:47 - r - INFO: - Episode: 168/200, Reward: 140.000, Step: 140 -2023-03-31 22:58:48 - r - INFO: - Episode: 169/200, Reward: 167.000, Step: 167 -2023-03-31 22:58:49 - r - INFO: - Episode: 170/200, Reward: 128.000, Step: 128 -2023-03-31 22:58:50 - r - INFO: - Current episode 170 has the best eval reward: 157.400 -2023-03-31 22:58:51 - r - INFO: - Episode: 171/200, Reward: 200.000, Step: 200 -2023-03-31 22:58:52 - r - INFO: - Episode: 172/200, Reward: 135.000, Step: 135 -2023-03-31 22:58:53 - r - INFO: - Episode: 173/200, Reward: 163.000, Step: 163 -2023-03-31 22:58:54 - r - INFO: - Episode: 174/200, Reward: 180.000, Step: 180 -2023-03-31 22:58:56 - r - INFO: - Episode: 175/200, Reward: 185.000, Step: 185 -2023-03-31 22:58:56 - r - INFO: - Current episode 175 has the best eval reward: 165.700 -2023-03-31 22:58:57 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200 -2023-03-31 22:58:59 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:00 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:01 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:03 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:04 - r - INFO: - Current episode 180 has the best eval reward: 200.000 -2023-03-31 22:59:05 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:06 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:08 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:09 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:10 - r - INFO: - Episode: 185/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:12 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:18 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:22 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:24 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:26 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:28 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:29 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:30 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:32 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:33 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:35 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:37 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:38 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:39 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:40 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200 -2023-03-31 22:59:41 - r - INFO: - Current episode 200 has the best eval reward: 200.000 -2023-03-31 22:59:41 - r - INFO: - Finish training! diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/models/checkpoint.pt b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/models/checkpoint.pt deleted file mode 100644 index 663b12f22263098d243896df93ba28f600d9a5fe..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c438616b97ca890557a9e9b1cd42decfc5decc64e5aee660d89158290e92683d -size 272471 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/learning_curve.png b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/learning_curve.png deleted file mode 100644 index 451d068a4636d034ba0343c28a90c5da7ba0b748..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/learning_curve.png and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/res.csv b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/res.csv deleted file mode 100644 index fbca00787d5bb2ba30327255354cba17eeb53d1b..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/results/res.csv +++ /dev/null @@ -1,201 +0,0 @@ -episodes,rewards,steps -0,15.0,15 -1,29.0,29 -2,13.0,13 -3,14.0,14 -4,11.0,11 -5,39.0,39 -6,35.0,35 -7,16.0,16 -8,13.0,13 -9,12.0,12 -10,11.0,11 -11,34.0,34 -12,15.0,15 -13,23.0,23 -14,9.0,9 -15,19.0,19 -16,9.0,9 -17,10.0,10 -18,16.0,16 -19,19.0,19 -20,11.0,11 -21,10.0,10 -22,14.0,14 -23,12.0,12 -24,16.0,16 -25,11.0,11 -26,10.0,10 -27,16.0,16 -28,12.0,12 -29,16.0,16 -30,11.0,11 -31,8.0,8 -32,8.0,8 -33,12.0,12 -34,10.0,10 -35,9.0,9 -36,11.0,11 -37,10.0,10 -38,11.0,11 -39,10.0,10 -40,10.0,10 -41,10.0,10 -42,10.0,10 -43,9.0,9 -44,11.0,11 -45,10.0,10 -46,10.0,10 -47,11.0,11 -48,10.0,10 -49,13.0,13 -50,18.0,18 -51,12.0,12 -52,10.0,10 -53,10.0,10 -54,11.0,11 -55,8.0,8 -56,16.0,16 -57,11.0,11 -58,9.0,9 -59,9.0,9 -60,10.0,10 -61,10.0,10 -62,9.0,9 -63,8.0,8 -64,10.0,10 -65,9.0,9 -66,10.0,10 -67,12.0,12 -68,12.0,12 -69,12.0,12 -70,10.0,10 -71,13.0,13 -72,20.0,20 -73,12.0,12 -74,13.0,13 -75,15.0,15 -76,13.0,13 -77,19.0,19 -78,14.0,14 -79,12.0,12 -80,13.0,13 -81,14.0,14 -82,13.0,13 -83,13.0,13 -84,14.0,14 -85,18.0,18 -86,23.0,23 -87,13.0,13 -88,13.0,13 -89,21.0,21 -90,17.0,17 -91,23.0,23 -92,16.0,16 -93,22.0,22 -94,23.0,23 -95,14.0,14 -96,20.0,20 -97,24.0,24 -98,21.0,21 -99,22.0,22 -100,21.0,21 -101,19.0,19 -102,18.0,18 -103,18.0,18 -104,23.0,23 -105,16.0,16 -106,19.0,19 -107,18.0,18 -108,21.0,21 -109,24.0,24 -110,24.0,24 -111,27.0,27 -112,35.0,35 -113,23.0,23 -114,29.0,29 -115,25.0,25 -116,20.0,20 -117,23.0,23 -118,21.0,21 -119,23.0,23 -120,17.0,17 -121,19.0,19 -122,19.0,19 -123,21.0,21 -124,24.0,24 -125,23.0,23 -126,22.0,22 -127,23.0,23 -128,22.0,22 -129,28.0,28 -130,32.0,32 -131,35.0,35 -132,27.0,27 -133,24.0,24 -134,37.0,37 -135,33.0,33 -136,39.0,39 -137,24.0,24 -138,24.0,24 -139,40.0,40 -140,31.0,31 -141,30.0,30 -142,25.0,25 -143,23.0,23 -144,29.0,29 -145,51.0,51 -146,73.0,73 -147,38.0,38 -148,37.0,37 -149,32.0,32 -150,43.0,43 -151,29.0,29 -152,33.0,33 -153,31.0,31 -154,41.0,41 -155,79.0,79 -156,47.0,47 -157,32.0,32 -158,36.0,36 -159,76.0,76 -160,73.0,73 -161,59.0,59 -162,102.0,102 -163,87.0,87 -164,94.0,94 -165,116.0,116 -166,135.0,135 -167,140.0,140 -168,167.0,167 -169,128.0,128 -170,200.0,200 -171,135.0,135 -172,163.0,163 -173,180.0,180 -174,185.0,185 -175,200.0,200 -176,200.0,200 -177,200.0,200 -178,200.0,200 -179,200.0,200 -180,200.0,200 -181,200.0,200 -182,200.0,200 -183,200.0,200 -184,200.0,200 -185,200.0,200 -186,200.0,200 -187,200.0,200 -188,200.0,200 -189,200.0,200 -190,200.0,200 -191,200.0,200 -192,200.0,200 -193,200.0,200 -194,200.0,200 -195,200.0,200 -196,200.0,200 -197,200.0,200 -198,200.0,200 -199,200.0,200 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs/events.out.tfevents.1680274695.DESKTOP-H34HQIQ.317208.0 b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs/events.out.tfevents.1680274695.DESKTOP-H34HQIQ.317208.0 deleted file mode 100644 index a33114a37f26100b5d347c05fc48bcd6650e4f4f..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_20230331-225815/tb_logs/events.out.tfevents.1680274695.DESKTOP-H34HQIQ.317208.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4daaaaabe093b8f9d6baf9504a0c5b9e14d2ea89477d20323c5eacbf5942b64 -size 40 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/config.yaml b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/config.yaml deleted file mode 100644 index ecd5ec863df5c9deb6872d3d93f996a4b2c4471a..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/config.yaml +++ /dev/null @@ -1,55 +0,0 @@ -general_cfg: - algo_name: PER_DQN - device: cpu - env_name: gym - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: Train_CartPole-v1_PER_DQN - max_steps: 200 - mode: train - mp_backend: mp - n_workers: 2 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 200 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - per_alpha: 0.6 - per_beta: 0.4 - per_beta_annealing: 0.001 - per_epsilon: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear -env_cfg: - id: CartPole-v1 - new_step_api: true - render_mode: null diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/logs/log.txt b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/logs/log.txt deleted file mode 100644 index a23796be2fdbbf755f36361d92924558c4d83299..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/logs/log.txt +++ /dev/null @@ -1,48 +0,0 @@ -2023-04-15 21:50:02 - r - INFO: - Hyperparameters: -2023-04-15 21:50:02 - r - INFO: - ================================================================================ -2023-04-15 21:50:02 - r - INFO: - Name Value Type -2023-04-15 21:50:02 - r - INFO: - env_name gym -2023-04-15 21:50:02 - r - INFO: - new_step_api 1 -2023-04-15 21:50:02 - r - INFO: - wrapper None -2023-04-15 21:50:02 - r - INFO: - render 0 -2023-04-15 21:50:02 - r - INFO: - render_mode None -2023-04-15 21:50:02 - r - INFO: - algo_name PER_DQN -2023-04-15 21:50:02 - r - INFO: - mode train -2023-04-15 21:50:02 - r - INFO: - mp_backend mp -2023-04-15 21:50:02 - r - INFO: - seed 1 -2023-04-15 21:50:02 - r - INFO: - device cpu -2023-04-15 21:50:02 - r - INFO: - train_eps 200 -2023-04-15 21:50:02 - r - INFO: - test_eps 10 -2023-04-15 21:50:02 - r - INFO: - eval_eps 10 -2023-04-15 21:50:02 - r - INFO: - eval_per_episode 5 -2023-04-15 21:50:02 - r - INFO: - max_steps 200 -2023-04-15 21:50:02 - r - INFO: - load_checkpoint 0 -2023-04-15 21:50:02 - r - INFO: - load_path Train_CartPole-v1_PER_DQN -2023-04-15 21:50:02 - r - INFO: - show_fig 0 -2023-04-15 21:50:02 - r - INFO: - save_fig 1 -2023-04-15 21:50:02 - r - INFO: - n_workers 2 -2023-04-15 21:50:02 - r - INFO: - epsilon_start 0.95 -2023-04-15 21:50:02 - r - INFO: - epsilon_end 0.01 -2023-04-15 21:50:02 - r - INFO: - epsilon_decay 500 -2023-04-15 21:50:02 - r - INFO: - hidden_dim 256 -2023-04-15 21:50:02 - r - INFO: - gamma 0.99 -2023-04-15 21:50:02 - r - INFO: - lr 0.0001 -2023-04-15 21:50:02 - r - INFO: - buffer_size 100000 -2023-04-15 21:50:02 - r - INFO: - per_alpha 0.6 -2023-04-15 21:50:02 - r - INFO: - per_beta 0.4 -2023-04-15 21:50:02 - r - INFO: - per_beta_annealing 0.001 -2023-04-15 21:50:02 - r - INFO: - per_epsilon 0.01 -2023-04-15 21:50:02 - r - INFO: - batch_size 64 -2023-04-15 21:50:02 - r - INFO: - target_update 4 -2023-04-15 21:50:02 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-15 21:50:02 - r - INFO: - id CartPole-v1 -2023-04-15 21:50:02 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002 -2023-04-15 21:50:02 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/results -2023-04-15 21:50:02 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/logs -2023-04-15 21:50:02 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/traj -2023-04-15 21:50:02 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215002/videos -2023-04-15 21:50:02 - r - INFO: - ================================================================================ -2023-04-15 21:50:02 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-15 21:50:02 - r - INFO: - Start training! -2023-04-15 21:50:02 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu -2023-04-15 21:51:00 - r - INFO: - Finish training! diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/models/checkpoint.pt b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/models/checkpoint.pt deleted file mode 100644 index 1f68746694a6ae30f06b2351e23e70ef9deafc34..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4251c7f141686d5391c5c933b493b27a184102ccf1596bead1dccaa6cc0bd9a -size 272407 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/learning_curve.png b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/learning_curve.png deleted file mode 100644 index aca4e3fc511d1ddaf613b80d9d6972bbe7ae11bf..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/learning_curve.png and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/res.csv b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/res.csv deleted file mode 100644 index a99e05cb22519b9c31c2af389db33fa7c6a7c771..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_mp__20230415-215002/results/res.csv +++ /dev/null @@ -1,202 +0,0 @@ -episodes,rewards -0,18.0 -1,19.0 -2,17.0 -3,24.0 -4,18.0 -5,17.0 -6,13.0 -7,16.0 -8,21.0 -9,30.0 -10,17.0 -11,18.0 -12,11.0 -13,13.0 -14,16.0 -15,14.0 -16,28.0 -17,12.0 -18,14.0 -19,19.0 -20,11.0 -21,10.0 -22,31.0 -23,23.0 -24,22.0 -25,11.0 -26,16.0 -27,12.0 -28,12.0 -29,16.0 -30,12.0 -31,16.0 -32,14.0 -33,21.0 -34,12.0 -35,9.0 -36,9.0 -37,26.0 -38,11.0 -39,22.0 -40,17.0 -41,21.0 -42,16.0 -43,27.0 -44,13.0 -45,18.0 -46,19.0 -47,11.0 -48,11.0 -49,16.0 -50,10.0 -51,9.0 -52,9.0 -53,16.0 -54,9.0 -55,12.0 -56,11.0 -57,11.0 -58,10.0 -59,12.0 -60,10.0 -61,14.0 -62,11.0 -63,12.0 -64,12.0 -65,18.0 -66,12.0 -67,16.0 -68,14.0 -69,23.0 -70,20.0 -71,23.0 -72,17.0 -73,18.0 -74,22.0 -75,22.0 -76,49.0 -77,24.0 -78,60.0 -79,35.0 -80,51.0 -81,78.0 -82,49.0 -83,75.0 -84,100.0 -85,78.0 -86,61.0 -87,65.0 -88,86.0 -89,105.0 -90,54.0 -91,60.0 -92,37.0 -93,149.0 -94,44.0 -95,104.0 -96,200.0 -97,112.0 -98,163.0 -99,167.0 -100,113.0 -101,152.0 -102,200.0 -103,200.0 -104,200.0 -105,200.0 -106,200.0 -107,200.0 -108,200.0 -109,200.0 -110,200.0 -111,200.0 -112,200.0 -113,200.0 -114,200.0 -115,200.0 -116,200.0 -117,200.0 -118,200.0 -119,200.0 -120,200.0 -121,200.0 -122,200.0 -123,200.0 -124,200.0 -125,200.0 -126,200.0 -127,200.0 -128,200.0 -129,200.0 -130,191.0 -131,200.0 -132,189.0 -133,200.0 -134,200.0 -135,200.0 -136,185.0 -137,200.0 -138,197.0 -139,200.0 -140,188.0 -141,200.0 -142,199.0 -143,200.0 -144,200.0 -145,200.0 -146,200.0 -147,200.0 -148,200.0 -149,200.0 -150,200.0 -151,200.0 -152,200.0 -153,200.0 -154,200.0 -155,200.0 -156,200.0 -157,200.0 -158,200.0 -159,200.0 -160,200.0 -161,200.0 -162,200.0 -163,200.0 -164,200.0 -165,200.0 -166,200.0 -167,200.0 -168,200.0 -169,200.0 -170,200.0 -171,200.0 -172,200.0 -173,200.0 -174,200.0 -175,200.0 -176,200.0 -177,200.0 -178,200.0 -179,200.0 -180,200.0 -181,200.0 -182,200.0 -183,200.0 -184,200.0 -185,200.0 -186,200.0 -187,200.0 -188,200.0 -189,200.0 -190,200.0 -191,200.0 -192,200.0 -193,200.0 -194,200.0 -195,200.0 -196,200.0 -197,200.0 -198,200.0 -199,200.0 -200,200.0 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/config.yaml b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/config.yaml deleted file mode 100644 index 4c952bbbc81e0dbca9a835dedbcbe370b4b2c56c..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/config.yaml +++ /dev/null @@ -1,55 +0,0 @@ -general_cfg: - algo_name: PER_DQN - device: cpu - env_name: gym - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: Train_CartPole-v1_PER_DQN - max_steps: 200 - mode: train - mp_backend: ray - n_workers: 2 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 250 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - per_alpha: 0.6 - per_beta: 0.4 - per_beta_annealing: 0.001 - per_epsilon: 0.01 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear -env_cfg: - id: CartPole-v1 - new_step_api: true - render_mode: null diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/logs/log.txt b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/logs/log.txt deleted file mode 100644 index 5ceecb4d4d23771092ae836d2623fb13d16df89c..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/logs/log.txt +++ /dev/null @@ -1,48 +0,0 @@ -2023-04-15 21:57:38 - r - INFO: - Hyperparameters: -2023-04-15 21:57:38 - r - INFO: - ================================================================================ -2023-04-15 21:57:38 - r - INFO: - Name Value Type -2023-04-15 21:57:38 - r - INFO: - env_name gym -2023-04-15 21:57:38 - r - INFO: - new_step_api 1 -2023-04-15 21:57:38 - r - INFO: - wrapper None -2023-04-15 21:57:38 - r - INFO: - render 0 -2023-04-15 21:57:38 - r - INFO: - render_mode None -2023-04-15 21:57:38 - r - INFO: - algo_name PER_DQN -2023-04-15 21:57:38 - r - INFO: - mode train -2023-04-15 21:57:38 - r - INFO: - mp_backend ray -2023-04-15 21:57:38 - r - INFO: - seed 1 -2023-04-15 21:57:38 - r - INFO: - device cpu -2023-04-15 21:57:38 - r - INFO: - train_eps 250 -2023-04-15 21:57:38 - r - INFO: - test_eps 10 -2023-04-15 21:57:38 - r - INFO: - eval_eps 10 -2023-04-15 21:57:38 - r - INFO: - eval_per_episode 5 -2023-04-15 21:57:38 - r - INFO: - max_steps 200 -2023-04-15 21:57:38 - r - INFO: - load_checkpoint 0 -2023-04-15 21:57:38 - r - INFO: - load_path Train_CartPole-v1_PER_DQN -2023-04-15 21:57:38 - r - INFO: - show_fig 0 -2023-04-15 21:57:38 - r - INFO: - save_fig 1 -2023-04-15 21:57:38 - r - INFO: - n_workers 2 -2023-04-15 21:57:38 - r - INFO: - epsilon_start 0.95 -2023-04-15 21:57:38 - r - INFO: - epsilon_end 0.01 -2023-04-15 21:57:38 - r - INFO: - epsilon_decay 500 -2023-04-15 21:57:38 - r - INFO: - hidden_dim 256 -2023-04-15 21:57:38 - r - INFO: - gamma 0.99 -2023-04-15 21:57:38 - r - INFO: - lr 0.0001 -2023-04-15 21:57:38 - r - INFO: - buffer_size 100000 -2023-04-15 21:57:38 - r - INFO: - per_alpha 0.6 -2023-04-15 21:57:38 - r - INFO: - per_beta 0.4 -2023-04-15 21:57:38 - r - INFO: - per_beta_annealing 0.001 -2023-04-15 21:57:38 - r - INFO: - per_epsilon 0.01 -2023-04-15 21:57:38 - r - INFO: - batch_size 64 -2023-04-15 21:57:38 - r - INFO: - target_update 4 -2023-04-15 21:57:38 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-15 21:57:38 - r - INFO: - id CartPole-v1 -2023-04-15 21:57:38 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738 -2023-04-15 21:57:38 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/results -2023-04-15 21:57:38 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/logs -2023-04-15 21:57:38 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/traj -2023-04-15 21:57:38 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_gym_PER_DQN_20230415-215738/videos -2023-04-15 21:57:38 - r - INFO: - ================================================================================ -2023-04-15 21:57:40 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-15 21:57:40 - r - INFO: - Start training! -2023-04-15 21:57:40 - r - INFO: - Env: gym, Algorithm: PER_DQN, Device: cpu -2023-04-15 22:00:44 - r - INFO: - Finish training! diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/models/checkpoint.pt b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/models/checkpoint.pt deleted file mode 100644 index 75946dbb92769075d2500d272c9ae471ca3675eb..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0efe3ec576afef2311748067e61af0fe6c939f7a2c2a1500001987a5d0092ce3 -size 272407 diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/learning_curve.png b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/learning_curve.png deleted file mode 100644 index c88da98b353de91b7405ae49eddcf81756f6b0df..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/learning_curve.png and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/res.csv b/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/res.csv deleted file mode 100644 index 1fb6ea0f87d54588fe26438bf1a48f163836131a..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_CartPole-v1_PER_DQN_ray_20230415-215738/results/res.csv +++ /dev/null @@ -1,251 +0,0 @@ -episodes,rewards -0,18.0 -1,18.0 -2,39.0 -3,28.0 -4,15.0 -5,14.0 -6,39.0 -7,52.0 -8,36.0 -9,28.0 -10,13.0 -11,16.0 -12,20.0 -13,19.0 -14,31.0 -15,11.0 -16,10.0 -17,22.0 -18,23.0 -19,16.0 -20,11.0 -21,12.0 -22,12.0 -23,12.0 -24,16.0 -25,14.0 -26,16.0 -27,12.0 -28,21.0 -29,25.0 -30,9.0 -31,10.0 -32,9.0 -33,41.0 -34,22.0 -35,19.0 -36,13.0 -37,12.0 -38,16.0 -39,13.0 -40,13.0 -41,9.0 -42,11.0 -43,13.0 -44,11.0 -45,11.0 -46,11.0 -47,11.0 -48,10.0 -49,11.0 -50,10.0 -51,14.0 -52,12.0 -53,9.0 -54,10.0 -55,9.0 -56,10.0 -57,10.0 -58,12.0 -59,9.0 -60,10.0 -61,9.0 -62,11.0 -63,13.0 -64,10.0 -65,12.0 -66,15.0 -67,9.0 -68,11.0 -69,10.0 -70,10.0 -71,9.0 -72,10.0 -73,9.0 -74,11.0 -75,9.0 -76,10.0 -77,9.0 -78,9.0 -79,11.0 -80,11.0 -81,10.0 -82,12.0 -83,29.0 -84,14.0 -85,11.0 -86,14.0 -87,10.0 -88,10.0 -89,15.0 -90,18.0 -91,16.0 -92,15.0 -93,17.0 -94,12.0 -95,70.0 -96,27.0 -97,23.0 -98,115.0 -99,77.0 -100,34.0 -101,25.0 -102,18.0 -103,24.0 -104,19.0 -105,29.0 -106,33.0 -107,77.0 -108,44.0 -109,35.0 -110,51.0 -111,31.0 -112,53.0 -113,28.0 -114,33.0 -115,47.0 -116,69.0 -117,30.0 -118,30.0 -119,59.0 -120,41.0 -121,33.0 -122,82.0 -123,58.0 -124,31.0 -125,40.0 -126,38.0 -127,57.0 -128,34.0 -129,47.0 -130,36.0 -131,32.0 -132,38.0 -133,37.0 -134,57.0 -135,33.0 -136,52.0 -137,72.0 -138,55.0 -139,88.0 -140,50.0 -141,35.0 -142,49.0 -143,35.0 -144,54.0 -145,39.0 -146,34.0 -147,47.0 -148,34.0 -149,61.0 -150,39.0 -151,54.0 -152,69.0 -153,72.0 -154,65.0 -155,51.0 -156,101.0 -157,40.0 -158,49.0 -159,65.0 -160,43.0 -161,47.0 -162,154.0 -163,88.0 -164,99.0 -165,72.0 -166,152.0 -167,53.0 -168,74.0 -169,87.0 -170,62.0 -171,104.0 -172,80.0 -173,113.0 -174,75.0 -175,200.0 -176,69.0 -177,200.0 -178,200.0 -179,200.0 -180,130.0 -181,200.0 -182,150.0 -183,191.0 -184,200.0 -185,200.0 -186,200.0 -187,196.0 -188,175.0 -189,200.0 -190,200.0 -191,200.0 -192,200.0 -193,200.0 -194,200.0 -195,200.0 -196,200.0 -197,200.0 -198,200.0 -199,200.0 -200,197.0 -201,200.0 -202,200.0 -203,200.0 -204,200.0 -205,200.0 -206,200.0 -207,200.0 -208,200.0 -209,200.0 -210,200.0 -211,200.0 -212,200.0 -213,200.0 -214,200.0 -215,200.0 -216,200.0 -217,200.0 -218,200.0 -219,200.0 -220,200.0 -221,200.0 -222,200.0 -223,200.0 -224,200.0 -225,200.0 -226,200.0 -227,200.0 -228,200.0 -229,200.0 -230,200.0 -231,200.0 -232,200.0 -233,200.0 -234,200.0 -235,200.0 -236,200.0 -237,200.0 -238,200.0 -239,200.0 -240,200.0 -241,200.0 -242,200.0 -243,200.0 -244,200.0 -245,200.0 -246,200.0 -247,200.0 -248,200.0 -249,200.0 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/config.yaml b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/config.yaml deleted file mode 100644 index e3f0e0accd86f07ed207f4f0648230c7befd8f0d..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/config.yaml +++ /dev/null @@ -1,45 +0,0 @@ -general_cfg: - algo_name: DQN - collect_traj: true - device: cpu - env_name: gym - load_checkpoint: false - load_model_step: best - load_path: Train_single_CartPole-v1_DQN_20230515-211721 - max_episode: 100 - max_step: 200 - mode: train - model_save_fre: 500 - mp_backend: ray - n_workers: 2 - online_eval: true - online_eval_episode: 10 - save_fig: true - seed: 1 - show_fig: false -algo_cfg: - batch_size: 64 - buffer_size: 100000 - buffer_type: REPLAY_QUE - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.95 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - layer_type: linear -env_cfg: - id: CartPole-v1 - ignore_params: - - wrapper - - ignore_params - render_mode: null - wrapper: null diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/logs/log.txt b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/logs/log.txt deleted file mode 100644 index d6ec72747bff623ab2fda248b2b2deaceb58ba26..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/logs/log.txt +++ /dev/null @@ -1,166 +0,0 @@ -2023-05-15 22:19:16 - SimpleLog - INFO: - General Configs: -2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================ -2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type -2023-05-15 22:19:16 - SimpleLog - INFO: - env_name gym -2023-05-15 22:19:16 - SimpleLog - INFO: - algo_name DQN -2023-05-15 22:19:16 - SimpleLog - INFO: - mode train -2023-05-15 22:19:16 - SimpleLog - INFO: - collect_traj 1 -2023-05-15 22:19:16 - SimpleLog - INFO: - mp_backend ray -2023-05-15 22:19:16 - SimpleLog - INFO: - n_workers 2 -2023-05-15 22:19:16 - SimpleLog - INFO: - seed 1 -2023-05-15 22:19:16 - SimpleLog - INFO: - device cpu -2023-05-15 22:19:16 - SimpleLog - INFO: - max_episode 100 -2023-05-15 22:19:16 - SimpleLog - INFO: - max_step 200 -2023-05-15 22:19:16 - SimpleLog - INFO: - online_eval 1 -2023-05-15 22:19:16 - SimpleLog - INFO: - online_eval_episode 10 -2023-05-15 22:19:16 - SimpleLog - INFO: - load_checkpoint 0 -2023-05-15 22:19:16 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 -2023-05-15 22:19:16 - SimpleLog - INFO: - show_fig 0 -2023-05-15 22:19:16 - SimpleLog - INFO: - save_fig 1 -2023-05-15 22:19:16 - SimpleLog - INFO: - load_model_step best -2023-05-15 22:19:16 - SimpleLog - INFO: - model_save_fre 500 -2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================ -2023-05-15 22:19:16 - SimpleLog - INFO: - Algo Configs: -2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================ -2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type -2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_start 0.95 -2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_end 0.01 -2023-05-15 22:19:16 - SimpleLog - INFO: - epsilon_decay 500 -2023-05-15 22:19:16 - SimpleLog - INFO: - gamma 0.95 -2023-05-15 22:19:16 - SimpleLog - INFO: - lr 0.0001 -2023-05-15 22:19:16 - SimpleLog - INFO: - buffer_size 100000 -2023-05-15 22:19:16 - SimpleLog - INFO: - batch_size 64 -2023-05-15 22:19:16 - SimpleLog - INFO: - target_update 4 -2023-05-15 22:19:16 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] -2023-05-15 22:19:16 - SimpleLog - INFO: - buffer_type REPLAY_QUE -2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================ -2023-05-15 22:19:16 - SimpleLog - INFO: - Env Configs: -2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================ -2023-05-15 22:19:16 - SimpleLog - INFO: - Name Value Type -2023-05-15 22:19:16 - SimpleLog - INFO: - id CartPole-v1 -2023-05-15 22:19:16 - SimpleLog - INFO: - render_mode None -2023-05-15 22:19:16 - SimpleLog - INFO: - wrapper None -2023-05-15 22:19:16 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] -2023-05-15 22:19:16 - SimpleLog - INFO: - ================================================================================ -2023-05-15 22:19:21 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) -2023-05-15 22:19:24 - RayLog - INFO: - Worker 0 finished episode 0 with reward 12.0 in 12 steps -2023-05-15 22:19:24 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps -2023-05-15 22:19:24 - RayLog - INFO: - Worker 0 finished episode 1 with reward 21.0 in 21 steps -2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 3 with reward 18.0 in 18 steps -2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 2 with reward 32.0 in 32 steps -2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 5 with reward 13.0 in 13 steps -2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 4 with reward 23.0 in 23 steps -2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 6 with reward 9.0 in 9 steps -2023-05-15 22:19:25 - RayLog - INFO: - Worker 0 finished episode 7 with reward 12.0 in 12 steps -2023-05-15 22:19:25 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps -2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 10 with reward 17.0 in 17 steps -2023-05-15 22:19:26 - RayLog - INFO: - Worker 0 finished episode 9 with reward 19.0 in 19 steps -2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 11 with reward 9.0 in 9 steps -2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 13 with reward 14.0 in 14 steps -2023-05-15 22:19:26 - RayLog - INFO: - Worker 0 finished episode 12 with reward 25.0 in 25 steps -2023-05-15 22:19:26 - RayLog - INFO: - Worker 1 finished episode 14 with reward 13.0 in 13 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 15 with reward 12.0 in 12 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 16 with reward 13.0 in 13 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 17 with reward 17.0 in 17 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 18 with reward 9.0 in 9 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 19 with reward 13.0 in 13 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 20 with reward 11.0 in 11 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 0 finished episode 21 with reward 11.0 in 11 steps -2023-05-15 22:19:27 - RayLog - INFO: - Worker 1 finished episode 22 with reward 11.0 in 11 steps -2023-05-15 22:19:28 - RayLog - INFO: - Worker 1 finished episode 24 with reward 11.0 in 11 steps -2023-05-15 22:19:28 - RayLog - INFO: - Worker 1 finished episode 25 with reward 9.0 in 9 steps -2023-05-15 22:19:28 - RayLog - INFO: - Worker 0 finished episode 23 with reward 23.0 in 23 steps -2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 26 with reward 15.0 in 15 steps -2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 28 with reward 16.0 in 16 steps -2023-05-15 22:19:29 - RayLog - INFO: - Worker 0 finished episode 27 with reward 38.0 in 38 steps -2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 29 with reward 11.0 in 11 steps -2023-05-15 22:19:29 - RayLog - INFO: - Worker 0 finished episode 30 with reward 13.0 in 13 steps -2023-05-15 22:19:29 - RayLog - INFO: - Worker 1 finished episode 31 with reward 12.0 in 12 steps -2023-05-15 22:19:30 - RayLog - INFO: - Worker 1 finished episode 33 with reward 12.0 in 12 steps -2023-05-15 22:19:30 - RayLog - INFO: - Worker 0 finished episode 32 with reward 14.0 in 14 steps -2023-05-15 22:19:30 - RayLog - INFO: - Worker 0 finished episode 35 with reward 9.0 in 9 steps -2023-05-15 22:19:30 - RayLog - INFO: - Worker 1 finished episode 34 with reward 11.0 in 11 steps -2023-05-15 22:19:32 - RayLog - INFO: - update_step: 500, online_eval_reward: 9.000 -2023-05-15 22:19:32 - RayLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model! -2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 36 with reward 11.0 in 11 steps -2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 37 with reward 14.0 in 14 steps -2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 38 with reward 12.0 in 12 steps -2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 39 with reward 13.0 in 13 steps -2023-05-15 22:19:32 - RayLog - INFO: - Worker 0 finished episode 40 with reward 11.0 in 11 steps -2023-05-15 22:19:32 - RayLog - INFO: - Worker 1 finished episode 41 with reward 10.0 in 10 steps -2023-05-15 22:19:33 - RayLog - INFO: - Worker 0 finished episode 42 with reward 11.0 in 11 steps -2023-05-15 22:19:33 - RayLog - INFO: - Worker 1 finished episode 43 with reward 10.0 in 10 steps -2023-05-15 22:19:33 - RayLog - INFO: - Worker 0 finished episode 44 with reward 10.0 in 10 steps -2023-05-15 22:19:33 - RayLog - INFO: - Worker 1 finished episode 45 with reward 21.0 in 21 steps -2023-05-15 22:19:34 - RayLog - INFO: - Worker 0 finished episode 46 with reward 36.0 in 36 steps -2023-05-15 22:19:34 - RayLog - INFO: - Worker 1 finished episode 47 with reward 30.0 in 30 steps -2023-05-15 22:19:34 - RayLog - INFO: - Worker 1 finished episode 49 with reward 19.0 in 19 steps -2023-05-15 22:19:34 - RayLog - INFO: - Worker 0 finished episode 48 with reward 28.0 in 28 steps -2023-05-15 22:19:35 - RayLog - INFO: - Worker 1 finished episode 50 with reward 17.0 in 17 steps -2023-05-15 22:19:35 - RayLog - INFO: - Worker 0 finished episode 51 with reward 28.0 in 28 steps -2023-05-15 22:19:35 - RayLog - INFO: - Worker 1 finished episode 52 with reward 23.0 in 23 steps -2023-05-15 22:19:36 - RayLog - INFO: - Worker 0 finished episode 53 with reward 46.0 in 46 steps -2023-05-15 22:19:36 - RayLog - INFO: - Worker 1 finished episode 54 with reward 37.0 in 37 steps -2023-05-15 22:19:36 - RayLog - INFO: - Worker 1 finished episode 56 with reward 27.0 in 27 steps -2023-05-15 22:19:37 - RayLog - INFO: - Worker 0 finished episode 55 with reward 56.0 in 56 steps -2023-05-15 22:19:37 - RayLog - INFO: - update_step: 1000, online_eval_reward: 94.000 -2023-05-15 22:19:37 - RayLog - INFO: - current update step obtain a better online_eval_reward: 94.000, save the best model! -2023-05-15 22:19:37 - RayLog - INFO: - Worker 1 finished episode 57 with reward 35.0 in 35 steps -2023-05-15 22:19:38 - RayLog - INFO: - Worker 1 finished episode 59 with reward 29.0 in 29 steps -2023-05-15 22:19:38 - RayLog - INFO: - Worker 0 finished episode 58 with reward 65.0 in 65 steps -2023-05-15 22:19:39 - RayLog - INFO: - Worker 1 finished episode 60 with reward 37.0 in 37 steps -2023-05-15 22:19:39 - RayLog - INFO: - Worker 1 finished episode 62 with reward 34.0 in 34 steps -2023-05-15 22:19:40 - RayLog - INFO: - Worker 0 finished episode 61 with reward 70.0 in 70 steps -2023-05-15 22:19:40 - RayLog - INFO: - Worker 1 finished episode 63 with reward 39.0 in 39 steps -2023-05-15 22:19:41 - RayLog - INFO: - Worker 1 finished episode 65 with reward 35.0 in 35 steps -2023-05-15 22:19:41 - RayLog - INFO: - Worker 0 finished episode 64 with reward 55.0 in 55 steps -2023-05-15 22:19:42 - RayLog - INFO: - Worker 1 finished episode 66 with reward 37.0 in 37 steps -2023-05-15 22:19:42 - RayLog - INFO: - Worker 0 finished episode 67 with reward 53.0 in 53 steps -2023-05-15 22:19:42 - RayLog - INFO: - Worker 1 finished episode 68 with reward 32.0 in 32 steps -2023-05-15 22:19:42 - RayLog - INFO: - update_step: 1500, online_eval_reward: 57.000 -2023-05-15 22:19:43 - RayLog - INFO: - Worker 1 finished episode 70 with reward 51.0 in 51 steps -2023-05-15 22:19:44 - RayLog - INFO: - Worker 0 finished episode 69 with reward 67.0 in 67 steps -2023-05-15 22:19:44 - RayLog - INFO: - Worker 1 finished episode 71 with reward 40.0 in 40 steps -2023-05-15 22:19:45 - RayLog - INFO: - Worker 0 finished episode 72 with reward 68.0 in 68 steps -2023-05-15 22:19:46 - RayLog - INFO: - Worker 1 finished episode 73 with reward 79.0 in 79 steps -2023-05-15 22:19:48 - RayLog - INFO: - update_step: 2000, online_eval_reward: 138.000 -2023-05-15 22:19:48 - RayLog - INFO: - current update step obtain a better online_eval_reward: 138.000, save the best model! -2023-05-15 22:19:48 - RayLog - INFO: - Worker 0 finished episode 74 with reward 124.0 in 124 steps -2023-05-15 22:19:49 - RayLog - INFO: - Worker 1 finished episode 75 with reward 133.0 in 133 steps -2023-05-15 22:19:52 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps -2023-05-15 22:19:53 - RayLog - INFO: - update_step: 2500, online_eval_reward: 200.000 -2023-05-15 22:19:53 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! -2023-05-15 22:19:53 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps -2023-05-15 22:19:56 - RayLog - INFO: - Worker 0 finished episode 78 with reward 187.0 in 187 steps -2023-05-15 22:19:57 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps -2023-05-15 22:19:58 - RayLog - INFO: - update_step: 3000, online_eval_reward: 200.000 -2023-05-15 22:20:00 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps -2023-05-15 22:20:02 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps -2023-05-15 22:20:04 - RayLog - INFO: - update_step: 3500, online_eval_reward: 165.000 -2023-05-15 22:20:04 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps -2023-05-15 22:20:06 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps -2023-05-15 22:20:08 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps -2023-05-15 22:20:09 - RayLog - INFO: - update_step: 4000, online_eval_reward: 200.000 -2023-05-15 22:20:10 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps -2023-05-15 22:20:12 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps -2023-05-15 22:20:14 - RayLog - INFO: - update_step: 4500, online_eval_reward: 200.000 -2023-05-15 22:20:14 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps -2023-05-15 22:20:16 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps -2023-05-15 22:20:18 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps -2023-05-15 22:20:19 - RayLog - INFO: - update_step: 5000, online_eval_reward: 200.000 -2023-05-15 22:20:20 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps -2023-05-15 22:20:22 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps -2023-05-15 22:20:24 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000 -2023-05-15 22:20:24 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps -2023-05-15 22:20:26 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps -2023-05-15 22:20:28 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps -2023-05-15 22:20:29 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000 -2023-05-15 22:20:30 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps -2023-05-15 22:20:32 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps -2023-05-15 22:20:34 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000 -2023-05-15 22:20:34 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps -2023-05-15 22:20:37 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps -2023-05-15 22:20:38 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps -2023-05-15 22:20:40 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000 -2023-05-15 22:20:40 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps -2023-05-15 22:20:43 - SimpleLog - INFO: - Finish training! total time consumed: 87.42s diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1000 deleted file mode 100644 index 420affc154150295755e8a0feba420d5c3f9603b..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1500 deleted file mode 100644 index 8d041c47770ee04a35b3a0bf9f508b66801c7b43..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/1500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2000 deleted file mode 100644 index af99a2189e2272e946b2332258ce73e388df402c..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2500 deleted file mode 100644 index 9024777871ee6ddcb14c5c57466334992233c951..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/2500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/3000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/3000 deleted file mode 100644 index 2454cb0052296fba8a288a983f035f72697addfd..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/3000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/3500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/3500 deleted file mode 100644 index e6329af77934477af2a093cd2783749d1b077df7..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/3500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/4000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/4000 deleted file mode 100644 index 95485f0b5953672165cc10eaf20fd71a5fb92532..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/4000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/4500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/4500 deleted file mode 100644 index 695e655b688ede6cd96ccc8be2ebdca4bd5e0d7c..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/4500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/500 deleted file mode 100644 index 945009444d139d5a0050490de5d4280944c1fca4..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/5000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/5000 deleted file mode 100644 index 697f3d75f0fbef8c9a2254fb7f0f5220edb9fe2a..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/5000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/5500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/5500 deleted file mode 100644 index 462784933d6e0cee7746781d63464f891686efab..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/5500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/6000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/6000 deleted file mode 100644 index e034de4781176a29137e6dccded308a84e6f260f..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/6000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/6500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/6500 deleted file mode 100644 index 37130f9fc68c7097b6c0af66848d867eabc774ce..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/6500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/7000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/7000 deleted file mode 100644 index 43fd6d22782cb9197fe8f966fbfa603a73bf883b..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/7000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/best b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/best deleted file mode 100644 index 9024777871ee6ddcb14c5c57466334992233c951..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/models/best and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/interact/events.out.tfevents.1684160356.DESKTOP-H34HQIQ.63896.0 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/interact/events.out.tfevents.1684160356.DESKTOP-H34HQIQ.63896.0 deleted file mode 100644 index c1ce53e64d33577d62d915ff555249470b556213..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/interact/events.out.tfevents.1684160356.DESKTOP-H34HQIQ.63896.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:696016c2758c0ccea9cb50504c6968dadacdaae4c399a656535de3d8cbd0dc05 -size 40 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/interact/events.out.tfevents.1684160363.DESKTOP-H34HQIQ.19952.0 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/interact/events.out.tfevents.1684160363.DESKTOP-H34HQIQ.19952.0 deleted file mode 100644 index 2a88fbf7d25c597103e74228db51d47a06bdb4a9..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/interact/events.out.tfevents.1684160363.DESKTOP-H34HQIQ.19952.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c39d72aaeb990179b01ad4a2bbaba1d46da5d2f48bdc6e892248d0641c70f310 -size 10028 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/model/events.out.tfevents.1684160356.DESKTOP-H34HQIQ.63896.1 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/model/events.out.tfevents.1684160356.DESKTOP-H34HQIQ.63896.1 deleted file mode 100644 index 1b0d971ed2287aa916ae13e39e26c5ae1bab6211..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/model/events.out.tfevents.1684160356.DESKTOP-H34HQIQ.63896.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe9bc5429757d5aaaa9a50bfc3728575b26888176dd0cab2a5566229a9165e50 -size 40 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/model/events.out.tfevents.1684160363.DESKTOP-H34HQIQ.19952.1 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/model/events.out.tfevents.1684160363.DESKTOP-H34HQIQ.19952.1 deleted file mode 100644 index 444406ca55374a8f3a2d824cafe3f61577e7a65a..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DQN_20230515-221916/tb_logs/model/events.out.tfevents.1684160363.DESKTOP-H34HQIQ.19952.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d0a82a1d116fc74439b132d0e405286bf1aa52a0883adcfe3b91391fabc89e5a -size 344824 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/config.yaml b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/config.yaml deleted file mode 100644 index 9e3e73ae6492da75e8f7939447e8829ae04954ff..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/config.yaml +++ /dev/null @@ -1,44 +0,0 @@ -general_cfg: - algo_name: DuelingDQN - collect_traj: false - device: cpu - env_name: gym - load_checkpoint: false - load_model_step: best - load_path: Train_single_CartPole-v1_DQN_20230515-211721 - max_episode: 100 - max_step: 200 - mode: train - model_save_fre: 500 - mp_backend: ray - n_workers: 2 - online_eval: true - online_eval_episode: 10 - seed: 1 -algo_cfg: - batch_size: 64 - buffer_size: 100000 - buffer_type: REPLAY_QUE - dueling: true - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.95 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - layer_type: linear -env_cfg: - id: CartPole-v1 - ignore_params: - - wrapper - - ignore_params - render_mode: null - wrapper: null diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt deleted file mode 100644 index bcffce3fb5a1368423f613fdad8aeff5cb7734d6..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt +++ /dev/null @@ -1,169 +0,0 @@ -2023-05-17 22:41:29 - SimpleLog - INFO: - General Configs: -2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type -2023-05-17 22:41:29 - SimpleLog - INFO: - env_name gym -2023-05-17 22:41:29 - SimpleLog - INFO: - algo_name DuelingDQN -2023-05-17 22:41:29 - SimpleLog - INFO: - mode train -2023-05-17 22:41:29 - SimpleLog - INFO: - device cpu -2023-05-17 22:41:29 - SimpleLog - INFO: - seed 1 -2023-05-17 22:41:29 - SimpleLog - INFO: - max_episode 100 -2023-05-17 22:41:29 - SimpleLog - INFO: - max_step 200 -2023-05-17 22:41:29 - SimpleLog - INFO: - collect_traj 0 -2023-05-17 22:41:29 - SimpleLog - INFO: - mp_backend ray -2023-05-17 22:41:29 - SimpleLog - INFO: - n_workers 2 -2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval 1 -2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval_episode 10 -2023-05-17 22:41:29 - SimpleLog - INFO: - model_save_fre 500 -2023-05-17 22:41:29 - SimpleLog - INFO: - load_checkpoint 0 -2023-05-17 22:41:29 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 -2023-05-17 22:41:29 - SimpleLog - INFO: - load_model_step best -2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:41:29 - SimpleLog - INFO: - Algo Configs: -2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type -2023-05-17 22:41:29 - SimpleLog - INFO: - dueling 1 -2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_start 0.95 -2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_end 0.01 -2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_decay 500 -2023-05-17 22:41:29 - SimpleLog - INFO: - gamma 0.95 -2023-05-17 22:41:29 - SimpleLog - INFO: - lr 0.0001 -2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_size 100000 -2023-05-17 22:41:29 - SimpleLog - INFO: - batch_size 64 -2023-05-17 22:41:29 - SimpleLog - INFO: - target_update 4 -2023-05-17 22:41:29 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] -2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_type REPLAY_QUE -2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:41:29 - SimpleLog - INFO: - Env Configs: -2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type -2023-05-17 22:41:29 - SimpleLog - INFO: - id CartPole-v1 -2023-05-17 22:41:29 - SimpleLog - INFO: - render_mode None -2023-05-17 22:41:29 - SimpleLog - INFO: - wrapper None -2023-05-17 22:41:29 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] -2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:41:35 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) -2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps -2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 0 with reward 23.0 in 23 steps -2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 2 with reward 10.0 in 10 steps -2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 3 with reward 9.0 in 9 steps -2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 2 with reward 29.0 in 29 steps -2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 4 with reward 11.0 in 11 steps -2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 6 with reward 15.0 in 15 steps -2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 18.0 in 18 steps -2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 7 with reward 9.0 in 9 steps -2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps -2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 10 with reward 13.0 in 13 steps -2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 9 with reward 25.0 in 25 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 12.0 in 12 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 13 with reward 10.0 in 10 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 33.0 in 33 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 14 with reward 9.0 in 9 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 15 with reward 10.0 in 10 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 16 with reward 13.0 in 13 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 17 with reward 16.0 in 16 steps -2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 18 with reward 9.0 in 9 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 20 with reward 11.0 in 11 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 19 with reward 16.0 in 16 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 18.0 in 18 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 22 with reward 18.0 in 18 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 23 with reward 11.0 in 11 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 24 with reward 9.0 in 9 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 26 with reward 9.0 in 9 steps -2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps -2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 11.0 in 11 steps -2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 27 with reward 12.0 in 12 steps -2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 15.0 in 15 steps -2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 30 with reward 19.0 in 19 steps -2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 31 with reward 10.0 in 10 steps -2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 32 with reward 13.0 in 13 steps -2023-05-17 22:41:44 - RayLog - INFO: - update_step: 500, online_eval_reward: 200.000 -2023-05-17 22:41:44 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! -2023-05-17 22:41:45 - RayLog - INFO: - Worker 0 finished episode 33 with reward 97.0 in 97 steps -2023-05-17 22:41:45 - RayLog - INFO: - Worker 1 finished episode 34 with reward 96.0 in 96 steps -2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 36 with reward 24.0 in 24 steps -2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 35 with reward 34.0 in 34 steps -2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 37 with reward 17.0 in 17 steps -2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 38 with reward 23.0 in 23 steps -2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 39 with reward 16.0 in 16 steps -2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 41 with reward 17.0 in 17 steps -2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 40 with reward 24.0 in 24 steps -2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 42 with reward 21.0 in 21 steps -2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 43 with reward 29.0 in 29 steps -2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 44 with reward 22.0 in 22 steps -2023-05-17 22:41:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 100.000 -2023-05-17 22:41:49 - RayLog - INFO: - Worker 0 finished episode 45 with reward 84.0 in 84 steps -2023-05-17 22:41:49 - RayLog - INFO: - Worker 1 finished episode 46 with reward 75.0 in 75 steps -2023-05-17 22:41:50 - RayLog - INFO: - Worker 1 finished episode 48 with reward 52.0 in 52 steps -2023-05-17 22:41:50 - RayLog - INFO: - Worker 0 finished episode 47 with reward 66.0 in 66 steps -2023-05-17 22:41:51 - RayLog - INFO: - Worker 1 finished episode 49 with reward 63.0 in 63 steps -2023-05-17 22:41:52 - RayLog - INFO: - Worker 0 finished episode 50 with reward 94.0 in 94 steps -2023-05-17 22:41:53 - RayLog - INFO: - Worker 1 finished episode 51 with reward 75.0 in 75 steps -2023-05-17 22:41:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 120.000 -2023-05-17 22:41:54 - RayLog - INFO: - Worker 0 finished episode 52 with reward 102.0 in 102 steps -2023-05-17 22:41:55 - RayLog - INFO: - Worker 1 finished episode 53 with reward 93.0 in 93 steps -2023-05-17 22:41:57 - RayLog - INFO: - Worker 1 finished episode 55 with reward 126.0 in 126 steps -2023-05-17 22:41:58 - RayLog - INFO: - Worker 0 finished episode 54 with reward 200.0 in 200 steps -2023-05-17 22:41:59 - RayLog - INFO: - update_step: 2000, online_eval_reward: 200.000 -2023-05-17 22:42:01 - RayLog - INFO: - Worker 1 finished episode 56 with reward 200.0 in 200 steps -2023-05-17 22:42:02 - RayLog - INFO: - Worker 0 finished episode 57 with reward 200.0 in 200 steps -2023-05-17 22:42:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 167.000 -2023-05-17 22:42:05 - RayLog - INFO: - Worker 1 finished episode 58 with reward 200.0 in 200 steps -2023-05-17 22:42:06 - RayLog - INFO: - Worker 0 finished episode 59 with reward 168.0 in 168 steps -2023-05-17 22:42:09 - RayLog - INFO: - Worker 0 finished episode 61 with reward 164.0 in 164 steps -2023-05-17 22:42:09 - RayLog - INFO: - update_step: 3000, online_eval_reward: 145.000 -2023-05-17 22:42:09 - RayLog - INFO: - Worker 1 finished episode 60 with reward 189.0 in 189 steps -2023-05-17 22:42:12 - RayLog - INFO: - Worker 0 finished episode 62 with reward 152.0 in 152 steps -2023-05-17 22:42:12 - RayLog - INFO: - Worker 1 finished episode 63 with reward 162.0 in 162 steps -2023-05-17 22:42:14 - RayLog - INFO: - update_step: 3500, online_eval_reward: 151.000 -2023-05-17 22:42:15 - RayLog - INFO: - Worker 0 finished episode 64 with reward 143.0 in 143 steps -2023-05-17 22:42:16 - RayLog - INFO: - Worker 1 finished episode 65 with reward 163.0 in 163 steps -2023-05-17 22:42:19 - RayLog - INFO: - Worker 0 finished episode 66 with reward 187.0 in 187 steps -2023-05-17 22:42:19 - RayLog - INFO: - update_step: 4000, online_eval_reward: 189.000 -2023-05-17 22:42:20 - RayLog - INFO: - Worker 1 finished episode 67 with reward 200.0 in 200 steps -2023-05-17 22:42:22 - RayLog - INFO: - Worker 0 finished episode 68 with reward 173.0 in 173 steps -2023-05-17 22:42:23 - RayLog - INFO: - Worker 1 finished episode 69 with reward 170.0 in 170 steps -2023-05-17 22:42:24 - RayLog - INFO: - update_step: 4500, online_eval_reward: 178.000 -2023-05-17 22:42:26 - RayLog - INFO: - Worker 0 finished episode 70 with reward 200.0 in 200 steps -2023-05-17 22:42:27 - RayLog - INFO: - Worker 1 finished episode 71 with reward 200.0 in 200 steps -2023-05-17 22:42:30 - RayLog - INFO: - update_step: 5000, online_eval_reward: 197.000 -2023-05-17 22:42:30 - RayLog - INFO: - Worker 0 finished episode 72 with reward 200.0 in 200 steps -2023-05-17 22:42:31 - RayLog - INFO: - Worker 1 finished episode 73 with reward 200.0 in 200 steps -2023-05-17 22:42:35 - RayLog - INFO: - Worker 0 finished episode 74 with reward 197.0 in 197 steps -2023-05-17 22:42:35 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000 -2023-05-17 22:42:36 - RayLog - INFO: - Worker 1 finished episode 75 with reward 200.0 in 200 steps -2023-05-17 22:42:39 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps -2023-05-17 22:42:40 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps -2023-05-17 22:42:40 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000 -2023-05-17 22:42:43 - RayLog - INFO: - Worker 0 finished episode 78 with reward 200.0 in 200 steps -2023-05-17 22:42:44 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps -2023-05-17 22:42:45 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000 -2023-05-17 22:42:47 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps -2023-05-17 22:42:48 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps -2023-05-17 22:42:51 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000 -2023-05-17 22:42:52 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps -2023-05-17 22:42:53 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps -2023-05-17 22:42:56 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps -2023-05-17 22:42:56 - RayLog - INFO: - update_step: 7500, online_eval_reward: 200.000 -2023-05-17 22:42:57 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps -2023-05-17 22:43:00 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps -2023-05-17 22:43:01 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps -2023-05-17 22:43:02 - RayLog - INFO: - update_step: 8000, online_eval_reward: 200.000 -2023-05-17 22:43:05 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps -2023-05-17 22:43:06 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps -2023-05-17 22:43:07 - RayLog - INFO: - update_step: 8500, online_eval_reward: 200.000 -2023-05-17 22:43:09 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps -2023-05-17 22:43:10 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps -2023-05-17 22:43:12 - RayLog - INFO: - update_step: 9000, online_eval_reward: 200.000 -2023-05-17 22:43:13 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps -2023-05-17 22:43:14 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps -2023-05-17 22:43:18 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps -2023-05-17 22:43:18 - RayLog - INFO: - update_step: 9500, online_eval_reward: 200.000 -2023-05-17 22:43:19 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps -2023-05-17 22:43:22 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps -2023-05-17 22:43:23 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps -2023-05-17 22:43:23 - RayLog - INFO: - update_step: 10000, online_eval_reward: 200.000 -2023-05-17 22:43:26 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps -2023-05-17 22:43:27 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps -2023-05-17 22:43:29 - RayLog - INFO: - update_step: 10500, online_eval_reward: 200.000 -2023-05-17 22:43:30 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps -2023-05-17 22:43:32 - SimpleLog - INFO: - Finish training! total time consumed: 122.69s diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 deleted file mode 100644 index 8a1a7fc585401da088a6bdea8d34199f552b87a1..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 deleted file mode 100644 index 0c970675fae8a72051a34f62d7ea41e45fbcc903..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 deleted file mode 100644 index 7c553e306350fa2c312319c174433ff548d18bfb..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 deleted file mode 100644 index 7b9ea91528f7d2f901b891ffe407ce1eecede849..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 deleted file mode 100644 index 0d5ea43949d6581e17c12d1edcfd4957a49a1d0b..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 deleted file mode 100644 index 6c78b231f60c254d57f3a736465317bc5fdcf2ec..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 deleted file mode 100644 index 16ff119e33c1c01249bc7f329ff7ad3517f3bb70..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 deleted file mode 100644 index 38e1eb98fd7a547b6e2ade9c92ae68b0001120f2..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 deleted file mode 100644 index 111cc0b9915b87b31a4b9c03d2a1ff0dd6aee9f2..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 deleted file mode 100644 index 861dfd6f85b9a98638b96711a5e8867af1f1dbc4..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 deleted file mode 100644 index 70db3c549b999a0e295c54cb1d08c54d477c9d65..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 deleted file mode 100644 index 2abca6df474c2346e089135b23bb062dfd3d6e6c..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 deleted file mode 100644 index 2bb550dcabf521425701b68b0e9452557bd190e9..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 deleted file mode 100644 index 50f1b3486d023d8822bd37b24b96da2f1d93332a..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 deleted file mode 100644 index 2ac84962a51dbd241e0ac6c8450777d0c7106b89..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 deleted file mode 100644 index 85cdc9fd144d298ed35b0377bad48fd29d0d80d1..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 deleted file mode 100644 index 4a03da0ae0deb5c577859b7536628633dbabe3bc..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 deleted file mode 100644 index 21e89314f50b1d5876d860cbfbd252edc0e82fbb..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 deleted file mode 100644 index 671d71639d2274a31f2b26df4377702bdf681556..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 deleted file mode 100644 index 5a056e40d83af0709d0c8882f464021d8160024c..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 deleted file mode 100644 index a4eda1f4cd16280d209cc4aa65e56c42da80854f..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best deleted file mode 100644 index 70db3c549b999a0e295c54cb1d08c54d477c9d65..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0 deleted file mode 100644 index e777e27cd322c1959613e4d59dfbe5d943f9b957..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c9b4a566642bacd5610c3e7b42d10f1feb9704e2a4cb2c004a7d85f75a0aba9 -size 40 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0 deleted file mode 100644 index 75dc2ac024a92cba2c4a47ecd6997bd519a9abef..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9867609ac2d205c8c66fe7bc380a67b26f152a046fb5e97d523f5b2bf1c147fd -size 10028 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.1 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.1 deleted file mode 100644 index cf1543c241819ce51b7b0e93a227aa18686a85a1..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4847f9588dc9c347debfc783cad98f92a9b0e1e5db600b4662da61e2dab30e6 -size 40 diff --git a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.1 b/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.1 deleted file mode 100644 index 6f91799c6abe4b9a427da88bc9e7a8f5675a606c..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f2e6e0d51dc6d1689b2eddc7d32991cac8108b99ee68d0750a53d25d5890403 -size 516618 diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/config.yaml b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/config.yaml deleted file mode 100644 index 7eb44394853316880f393d83c98ec29c05a84f8a..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/config.yaml +++ /dev/null @@ -1,45 +0,0 @@ -general_cfg: - algo_name: DQN - collect_traj: false - device: cpu - env_name: gym - load_checkpoint: false - load_model_step: best - load_path: Train_single_CartPole-v1_DQN_20230515-211721 - max_episode: 150 - max_step: 200 - mode: train - model_save_fre: 500 - mp_backend: ray - n_learners: 2 - n_workers: 10 - online_eval: true - online_eval_episode: 10 - seed: 1 - share_buffer: true -algo_cfg: - batch_size: 64 - buffer_size: 100000 - buffer_type: REPLAY_QUE - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.95 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - layer_type: linear -env_cfg: - id: CartPole-v1 - ignore_params: - - wrapper - - ignore_params - render_mode: null - wrapper: null diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/logs/log.txt b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/logs/log.txt deleted file mode 100644 index 669b0fb0fe259e1726326833207853c11bc1481a..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/logs/log.txt +++ /dev/null @@ -1,225 +0,0 @@ -2023-05-18 00:06:56 - SimpleLog - INFO: - General Configs: -2023-05-18 00:06:56 - SimpleLog - INFO: - ================================================================================ -2023-05-18 00:06:56 - SimpleLog - INFO: - Name Value Type -2023-05-18 00:06:56 - SimpleLog - INFO: - env_name gym -2023-05-18 00:06:56 - SimpleLog - INFO: - algo_name DQN -2023-05-18 00:06:56 - SimpleLog - INFO: - mode train -2023-05-18 00:06:56 - SimpleLog - INFO: - device cpu -2023-05-18 00:06:56 - SimpleLog - INFO: - seed 1 -2023-05-18 00:06:56 - SimpleLog - INFO: - max_episode 150 -2023-05-18 00:06:56 - SimpleLog - INFO: - max_step 200 -2023-05-18 00:06:56 - SimpleLog - INFO: - collect_traj 0 -2023-05-18 00:06:56 - SimpleLog - INFO: - mp_backend ray -2023-05-18 00:06:56 - SimpleLog - INFO: - n_workers 10 -2023-05-18 00:06:56 - SimpleLog - INFO: - n_learners 2 -2023-05-18 00:06:56 - SimpleLog - INFO: - share_buffer 1 -2023-05-18 00:06:56 - SimpleLog - INFO: - online_eval 1 -2023-05-18 00:06:56 - SimpleLog - INFO: - online_eval_episode 10 -2023-05-18 00:06:56 - SimpleLog - INFO: - model_save_fre 500 -2023-05-18 00:06:56 - SimpleLog - INFO: - load_checkpoint 0 -2023-05-18 00:06:56 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 -2023-05-18 00:06:56 - SimpleLog - INFO: - load_model_step best -2023-05-18 00:06:56 - SimpleLog - INFO: - ================================================================================ -2023-05-18 00:06:56 - SimpleLog - INFO: - Algo Configs: -2023-05-18 00:06:56 - SimpleLog - INFO: - ================================================================================ -2023-05-18 00:06:56 - SimpleLog - INFO: - Name Value Type -2023-05-18 00:06:56 - SimpleLog - INFO: - epsilon_start 0.95 -2023-05-18 00:06:56 - SimpleLog - INFO: - epsilon_end 0.01 -2023-05-18 00:06:56 - SimpleLog - INFO: - epsilon_decay 500 -2023-05-18 00:06:56 - SimpleLog - INFO: - gamma 0.95 -2023-05-18 00:06:56 - SimpleLog - INFO: - lr 0.0001 -2023-05-18 00:06:56 - SimpleLog - INFO: - buffer_size 100000 -2023-05-18 00:06:56 - SimpleLog - INFO: - batch_size 64 -2023-05-18 00:06:56 - SimpleLog - INFO: - target_update 4 -2023-05-18 00:06:56 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] -2023-05-18 00:06:56 - SimpleLog - INFO: - buffer_type REPLAY_QUE -2023-05-18 00:06:56 - SimpleLog - INFO: - ================================================================================ -2023-05-18 00:06:56 - SimpleLog - INFO: - Env Configs: -2023-05-18 00:06:56 - SimpleLog - INFO: - ================================================================================ -2023-05-18 00:06:56 - SimpleLog - INFO: - Name Value Type -2023-05-18 00:06:56 - SimpleLog - INFO: - id CartPole-v1 -2023-05-18 00:06:56 - SimpleLog - INFO: - render_mode None -2023-05-18 00:06:56 - SimpleLog - INFO: - wrapper None -2023-05-18 00:06:56 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] -2023-05-18 00:06:56 - SimpleLog - INFO: - ================================================================================ -2023-05-18 00:07:01 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) -2023-05-18 00:07:08 - RayLog - INFO: - Worker 1 finished episode 0 with reward 9.0 in 9 steps -2023-05-18 00:07:09 - RayLog - INFO: - Worker 8 finished episode 0 with reward 10.0 in 10 steps -2023-05-18 00:07:09 - RayLog - INFO: - Worker 2 finished episode 0 with reward 16.0 in 16 steps -2023-05-18 00:07:09 - RayLog - INFO: - Worker 0 finished episode 0 with reward 14.0 in 14 steps -2023-05-18 00:07:10 - RayLog - INFO: - Worker 3 finished episode 0 with reward 18.0 in 18 steps -2023-05-18 00:07:10 - RayLog - INFO: - Worker 6 finished episode 0 with reward 14.0 in 14 steps -2023-05-18 00:07:10 - RayLog - INFO: - Worker 9 finished episode 0 with reward 20.0 in 20 steps -2023-05-18 00:07:10 - RayLog - INFO: - Worker 5 finished episode 0 with reward 27.0 in 27 steps -2023-05-18 00:07:10 - RayLog - INFO: - Worker 2 finished episode 3 with reward 12.0 in 12 steps -2023-05-18 00:07:11 - RayLog - INFO: - Worker 8 finished episode 2 with reward 15.0 in 15 steps -2023-05-18 00:07:11 - RayLog - INFO: - Worker 3 finished episode 5 with reward 13.0 in 13 steps -2023-05-18 00:07:11 - RayLog - INFO: - Worker 1 finished episode 1 with reward 21.0 in 21 steps -2023-05-18 00:07:11 - RayLog - INFO: - Worker 7 finished episode 0 with reward 32.0 in 32 steps -2023-05-18 00:07:12 - RayLog - INFO: - Worker 0 finished episode 4 with reward 16.0 in 16 steps -2023-05-18 00:07:12 - RayLog - INFO: - Worker 6 finished episode 6 with reward 18.0 in 18 steps -2023-05-18 00:07:13 - RayLog - INFO: - Worker 4 finished episode 0 with reward 38.0 in 38 steps -2023-05-18 00:07:13 - RayLog - INFO: - Worker 2 finished episode 9 with reward 15.0 in 15 steps -2023-05-18 00:07:13 - RayLog - INFO: - Worker 5 finished episode 8 with reward 16.0 in 16 steps -2023-05-18 00:07:13 - RayLog - INFO: - Worker 8 finished episode 10 with reward 14.0 in 14 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 0 finished episode 14 with reward 10.0 in 10 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 7 finished episode 13 with reward 12.0 in 12 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 3 finished episode 11 with reward 14.0 in 14 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 6 finished episode 15 with reward 9.0 in 9 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 1 finished episode 12 with reward 20.0 in 20 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 2 finished episode 18 with reward 9.0 in 9 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 9 finished episode 7 with reward 32.0 in 32 steps -2023-05-18 00:07:14 - RayLog - INFO: - Worker 4 finished episode 16 with reward 14.0 in 14 steps -2023-05-18 00:07:15 - RayLog - INFO: - Worker 3 finished episode 22 with reward 10.0 in 10 steps -2023-05-18 00:07:15 - RayLog - INFO: - Worker 8 finished episode 19 with reward 12.0 in 12 steps -2023-05-18 00:07:15 - RayLog - INFO: - Worker 6 finished episode 23 with reward 10.0 in 10 steps -2023-05-18 00:07:15 - RayLog - INFO: - Worker 7 finished episode 21 with reward 13.0 in 13 steps -2023-05-18 00:07:15 - RayLog - INFO: - Worker 5 finished episode 19 with reward 15.0 in 15 steps -2023-05-18 00:07:16 - RayLog - INFO: - learner id: 0, update_step: 500, online_eval_reward: 10.000 -2023-05-18 00:07:16 - RayLog - INFO: - learner 0 for current update step obtain a better online_eval_reward: 10.000, save the best model! -2023-05-18 00:07:16 - RayLog - INFO: - Worker 1 finished episode 24 with reward 11.0 in 11 steps -2023-05-18 00:07:16 - RayLog - INFO: - Worker 0 finished episode 20 with reward 18.0 in 18 steps -2023-05-18 00:07:16 - RayLog - INFO: - Worker 4 finished episode 27 with reward 10.0 in 10 steps -2023-05-18 00:07:17 - RayLog - INFO: - Worker 3 finished episode 29 with reward 9.0 in 9 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 6 finished episode 30 with reward 12.0 in 12 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 7 finished episode 32 with reward 11.0 in 11 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 2 finished episode 25 with reward 11.0 in 11 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 5 finished episode 32 with reward 12.0 in 12 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 8 finished episode 29 with reward 17.0 in 17 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 0 finished episode 34 with reward 11.0 in 11 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 1 finished episode 33 with reward 12.0 in 12 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 3 finished episode 36 with reward 9.0 in 9 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 4 finished episode 35 with reward 12.0 in 12 steps -2023-05-18 00:07:18 - RayLog - INFO: - Worker 9 finished episode 26 with reward 26.0 in 26 steps -2023-05-18 00:07:19 - RayLog - INFO: - Worker 6 finished episode 37 with reward 12.0 in 12 steps -2023-05-18 00:07:19 - RayLog - INFO: - Worker 7 finished episode 39 with reward 13.0 in 13 steps -2023-05-18 00:07:20 - RayLog - INFO: - Worker 5 finished episode 40 with reward 12.0 in 12 steps -2023-05-18 00:07:20 - RayLog - INFO: - Worker 2 finished episode 39 with reward 15.0 in 15 steps -2023-05-18 00:07:20 - RayLog - INFO: - Worker 3 finished episode 44 with reward 10.0 in 10 steps -2023-05-18 00:07:20 - RayLog - INFO: - Worker 1 finished episode 44 with reward 11.0 in 11 steps -2023-05-18 00:07:21 - RayLog - INFO: - Worker 4 finished episode 45 with reward 12.0 in 12 steps -2023-05-18 00:07:21 - RayLog - INFO: - Worker 8 finished episode 41 with reward 16.0 in 16 steps -2023-05-18 00:07:21 - RayLog - INFO: - Worker 0 finished episode 42 with reward 15.0 in 15 steps -2023-05-18 00:07:21 - RayLog - INFO: - Worker 9 finished episode 46 with reward 16.0 in 16 steps -2023-05-18 00:07:22 - RayLog - INFO: - Worker 6 finished episode 47 with reward 15.0 in 15 steps -2023-05-18 00:07:24 - RayLog - INFO: - Worker 9 finished episode 56 with reward 11.0 in 11 steps -2023-05-18 00:07:24 - RayLog - INFO: - Worker 0 finished episode 55 with reward 18.0 in 18 steps -2023-05-18 00:07:24 - RayLog - INFO: - Worker 6 finished episode 57 with reward 11.0 in 11 steps -2023-05-18 00:07:24 - RayLog - INFO: - Worker 1 finished episode 52 with reward 23.0 in 23 steps -2023-05-18 00:07:24 - RayLog - INFO: - learner id: 1, update_step: 1000, online_eval_reward: 9.000 -2023-05-18 00:07:25 - RayLog - INFO: - Worker 4 finished episode 53 with reward 23.0 in 23 steps -2023-05-18 00:07:25 - RayLog - INFO: - Worker 5 finished episode 49 with reward 26.0 in 26 steps -2023-05-18 00:07:25 - RayLog - INFO: - Worker 8 finished episode 54 with reward 23.0 in 23 steps -2023-05-18 00:07:25 - RayLog - INFO: - Worker 2 finished episode 51 with reward 27.0 in 27 steps -2023-05-18 00:07:25 - RayLog - INFO: - Worker 7 finished episode 48 with reward 31.0 in 31 steps -2023-05-18 00:07:25 - RayLog - INFO: - Worker 3 finished episode 51 with reward 29.0 in 29 steps -2023-05-18 00:07:26 - RayLog - INFO: - Worker 9 finished episode 58 with reward 10.0 in 10 steps -2023-05-18 00:07:26 - RayLog - INFO: - Worker 0 finished episode 60 with reward 9.0 in 9 steps -2023-05-18 00:07:26 - RayLog - INFO: - Worker 6 finished episode 60 with reward 11.0 in 11 steps -2023-05-18 00:07:26 - RayLog - INFO: - Worker 5 finished episode 63 with reward 9.0 in 9 steps -2023-05-18 00:07:26 - RayLog - INFO: - Worker 1 finished episode 61 with reward 11.0 in 11 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 4 finished episode 63 with reward 10.0 in 10 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 8 finished episode 65 with reward 10.0 in 10 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 3 finished episode 67 with reward 10.0 in 10 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 2 finished episode 65 with reward 14.0 in 14 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 0 finished episode 69 with reward 9.0 in 9 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 7 finished episode 67 with reward 9.0 in 9 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 9 finished episode 69 with reward 12.0 in 12 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 5 finished episode 71 with reward 9.0 in 9 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 6 finished episode 70 with reward 10.0 in 10 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 1 finished episode 72 with reward 10.0 in 10 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 4 finished episode 74 with reward 10.0 in 10 steps -2023-05-18 00:07:27 - RayLog - INFO: - Worker 8 finished episode 74 with reward 10.0 in 10 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 3 finished episode 75 with reward 10.0 in 10 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 7 finished episode 78 with reward 9.0 in 9 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 2 finished episode 76 with reward 10.0 in 10 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 5 finished episode 80 with reward 9.0 in 9 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 0 finished episode 77 with reward 11.0 in 11 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 9 finished episode 80 with reward 10.0 in 10 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 6 finished episode 81 with reward 10.0 in 10 steps -2023-05-18 00:07:28 - RayLog - INFO: - Worker 4 finished episode 83 with reward 9.0 in 9 steps -2023-05-18 00:07:29 - RayLog - INFO: - Worker 8 finished episode 84 with reward 10.0 in 10 steps -2023-05-18 00:07:29 - RayLog - INFO: - Worker 1 finished episode 82 with reward 13.0 in 13 steps -2023-05-18 00:07:29 - RayLog - INFO: - Worker 3 finished episode 85 with reward 10.0 in 10 steps -2023-05-18 00:07:29 - RayLog - INFO: - Worker 7 finished episode 86 with reward 9.0 in 9 steps -2023-05-18 00:07:29 - RayLog - INFO: - Worker 2 finished episode 87 with reward 10.0 in 10 steps -2023-05-18 00:07:29 - RayLog - INFO: - Worker 5 finished episode 88 with reward 9.0 in 9 steps -2023-05-18 00:07:29 - RayLog - INFO: - Worker 0 finished episode 90 with reward 9.0 in 9 steps -2023-05-18 00:07:30 - RayLog - INFO: - Worker 4 finished episode 92 with reward 9.0 in 9 steps -2023-05-18 00:07:30 - RayLog - INFO: - Worker 6 finished episode 91 with reward 10.0 in 10 steps -2023-05-18 00:07:32 - RayLog - INFO: - Worker 3 finished episode 95 with reward 19.0 in 19 steps -2023-05-18 00:07:32 - RayLog - INFO: - learner id: 0, update_step: 1500, online_eval_reward: 35.000 -2023-05-18 00:07:32 - RayLog - INFO: - learner 0 for current update step obtain a better online_eval_reward: 35.000, save the best model! -2023-05-18 00:07:32 - RayLog - INFO: - Worker 2 finished episode 98 with reward 22.0 in 22 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 6 finished episode 101 with reward 21.0 in 21 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 4 finished episode 100 with reward 23.0 in 23 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 0 finished episode 99 with reward 28.0 in 28 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 9 finished episode 90 with reward 40.0 in 40 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 7 finished episode 96 with reward 30.0 in 30 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 5 finished episode 98 with reward 32.0 in 32 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 3 finished episode 102 with reward 20.0 in 20 steps -2023-05-18 00:07:33 - RayLog - INFO: - Worker 1 finished episode 94 with reward 37.0 in 37 steps -2023-05-18 00:07:34 - RayLog - INFO: - Worker 8 finished episode 93 with reward 43.0 in 43 steps -2023-05-18 00:07:38 - RayLog - INFO: - learner id: 1, update_step: 2000, online_eval_reward: 49.000 -2023-05-18 00:07:38 - RayLog - INFO: - learner 1 for current update step obtain a better online_eval_reward: 49.000, save the best model! -2023-05-18 00:07:42 - RayLog - INFO: - Worker 6 finished episode 104 with reward 72.0 in 72 steps -2023-05-18 00:07:42 - RayLog - INFO: - Worker 3 finished episode 110 with reward 64.0 in 64 steps -2023-05-18 00:07:43 - RayLog - INFO: - Worker 1 finished episode 111 with reward 65.0 in 65 steps -2023-05-18 00:07:43 - RayLog - INFO: - Worker 4 finished episode 105 with reward 72.0 in 72 steps -2023-05-18 00:07:43 - RayLog - INFO: - Worker 8 finished episode 112 with reward 60.0 in 60 steps -2023-05-18 00:07:43 - RayLog - INFO: - Worker 2 finished episode 103 with reward 78.0 in 78 steps -2023-05-18 00:07:43 - RayLog - INFO: - Worker 7 finished episode 108 with reward 69.0 in 69 steps -2023-05-18 00:07:43 - RayLog - INFO: - Worker 9 finished episode 107 with reward 70.0 in 70 steps -2023-05-18 00:07:44 - RayLog - INFO: - Worker 0 finished episode 106 with reward 69.0 in 69 steps -2023-05-18 00:07:44 - RayLog - INFO: - Worker 5 finished episode 109 with reward 74.0 in 74 steps -2023-05-18 00:07:45 - RayLog - INFO: - learner id: 0, update_step: 2500, online_eval_reward: 35.000 -2023-05-18 00:07:49 - RayLog - INFO: - Worker 5 finished episode 122 with reward 49.0 in 49 steps -2023-05-18 00:07:51 - RayLog - INFO: - Worker 0 finished episode 122 with reward 58.0 in 58 steps -2023-05-18 00:07:51 - RayLog - INFO: - learner id: 0, update_step: 3000, online_eval_reward: 200.000 -2023-05-18 00:07:51 - RayLog - INFO: - learner 0 for current update step obtain a better online_eval_reward: 200.000, save the best model! -2023-05-18 00:07:52 - RayLog - INFO: - Worker 7 finished episode 119 with reward 64.0 in 64 steps -2023-05-18 00:07:53 - RayLog - INFO: - Worker 4 finished episode 116 with reward 78.0 in 78 steps -2023-05-18 00:07:53 - RayLog - INFO: - Worker 3 finished episode 114 with reward 86.0 in 86 steps -2023-05-18 00:07:53 - RayLog - INFO: - Worker 8 finished episode 117 with reward 85.0 in 85 steps -2023-05-18 00:07:54 - RayLog - INFO: - Worker 6 finished episode 113 with reward 94.0 in 94 steps -2023-05-18 00:07:54 - RayLog - INFO: - Worker 1 finished episode 115 with reward 97.0 in 97 steps -2023-05-18 00:07:55 - RayLog - INFO: - Worker 2 finished episode 118 with reward 95.0 in 95 steps -2023-05-18 00:07:56 - RayLog - INFO: - Worker 0 finished episode 124 with reward 45.0 in 45 steps -2023-05-18 00:07:57 - RayLog - INFO: - learner id: 1, update_step: 3500, online_eval_reward: 56.000 -2023-05-18 00:07:58 - RayLog - INFO: - Worker 7 finished episode 125 with reward 55.0 in 55 steps -2023-05-18 00:07:59 - RayLog - INFO: - Worker 5 finished episode 123 with reward 77.0 in 77 steps -2023-05-18 00:08:00 - RayLog - INFO: - Worker 9 finished episode 120 with reward 138.0 in 138 steps -2023-05-18 00:08:04 - RayLog - INFO: - learner id: 1, update_step: 4000, online_eval_reward: 200.000 -2023-05-18 00:08:05 - RayLog - INFO: - Worker 4 finished episode 126 with reward 87.0 in 87 steps -2023-05-18 00:08:05 - RayLog - INFO: - Worker 0 finished episode 132 with reward 66.0 in 66 steps -2023-05-18 00:08:08 - RayLog - INFO: - Worker 8 finished episode 128 with reward 107.0 in 107 steps -2023-05-18 00:08:10 - RayLog - INFO: - Worker 5 finished episode 134 with reward 82.0 in 82 steps -2023-05-18 00:08:11 - RayLog - INFO: - learner id: 1, update_step: 4500, online_eval_reward: 153.000 -2023-05-18 00:08:11 - RayLog - INFO: - Worker 7 finished episode 133 with reward 93.0 in 93 steps -2023-05-18 00:08:18 - RayLog - INFO: - learner id: 0, update_step: 5000, online_eval_reward: 200.000 -2023-05-18 00:08:18 - RayLog - INFO: - Worker 3 finished episode 127 with reward 194.0 in 194 steps -2023-05-18 00:08:20 - RayLog - INFO: - Worker 2 finished episode 131 with reward 171.0 in 171 steps -2023-05-18 00:08:21 - RayLog - INFO: - Worker 1 finished episode 130 with reward 200.0 in 200 steps -2023-05-18 00:08:21 - RayLog - INFO: - Worker 6 finished episode 130 with reward 197.0 in 197 steps -2023-05-18 00:08:24 - RayLog - INFO: - learner id: 0, update_step: 5500, online_eval_reward: 200.000 -2023-05-18 00:08:26 - RayLog - INFO: - Worker 9 finished episode 135 with reward 200.0 in 200 steps -2023-05-18 00:08:30 - RayLog - INFO: - learner id: 0, update_step: 6000, online_eval_reward: 162.000 -2023-05-18 00:08:32 - RayLog - INFO: - Worker 0 finished episode 137 with reward 200.0 in 200 steps -2023-05-18 00:08:33 - RayLog - INFO: - Worker 4 finished episode 136 with reward 200.0 in 200 steps -2023-05-18 00:08:34 - RayLog - INFO: - Worker 5 finished episode 139 with reward 200.0 in 200 steps -2023-05-18 00:08:35 - RayLog - INFO: - Worker 8 finished episode 138 with reward 200.0 in 200 steps -2023-05-18 00:08:36 - RayLog - INFO: - Worker 7 finished episode 140 with reward 200.0 in 200 steps -2023-05-18 00:08:37 - RayLog - INFO: - learner id: 1, update_step: 6500, online_eval_reward: 200.000 -2023-05-18 00:08:43 - RayLog - INFO: - Worker 3 finished episode 141 with reward 200.0 in 200 steps -2023-05-18 00:08:44 - RayLog - INFO: - learner id: 0, update_step: 7000, online_eval_reward: 200.000 -2023-05-18 00:08:44 - RayLog - INFO: - Worker 2 finished episode 142 with reward 200.0 in 200 steps -2023-05-18 00:08:46 - RayLog - INFO: - Worker 1 finished episode 143 with reward 200.0 in 200 steps -2023-05-18 00:08:46 - RayLog - INFO: - Worker 6 finished episode 144 with reward 200.0 in 200 steps -2023-05-18 00:08:50 - RayLog - INFO: - Worker 9 finished episode 145 with reward 200.0 in 200 steps -2023-05-18 00:08:51 - RayLog - INFO: - learner id: 0, update_step: 7500, online_eval_reward: 200.000 -2023-05-18 00:08:53 - RayLog - INFO: - Worker 4 finished episode 147 with reward 200.0 in 200 steps -2023-05-18 00:08:54 - RayLog - INFO: - Worker 0 finished episode 146 with reward 200.0 in 200 steps -2023-05-18 00:08:55 - RayLog - INFO: - Worker 5 finished episode 148 with reward 200.0 in 200 steps -2023-05-18 00:08:56 - RayLog - INFO: - Worker 7 finished episode 150 with reward 200.0 in 200 steps -2023-05-18 00:08:56 - RayLog - INFO: - Worker 8 finished episode 149 with reward 200.0 in 200 steps -2023-05-18 00:08:59 - SimpleLog - INFO: - Finish training! total time consumed: 122.73s diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/1000 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/1000 deleted file mode 100644 index 0ddfb43fdf42cbdcfedba008e0473ea8c932d8f0..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/1000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/1500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/1500 deleted file mode 100644 index a19fd6a0847f170b75e8fd579c43db6a027fed96..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/1500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/2000 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/2000 deleted file mode 100644 index 26239a57791f6b09ae6acbf410bfea33ab67982c..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/2000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/2500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/2500 deleted file mode 100644 index 5403c613eddbee8ff81b9102937308f46eb933a1..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/2500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/3000 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/3000 deleted file mode 100644 index b835806ffb0b6668614f3c8533162a283e11b902..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/3000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/3500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/3500 deleted file mode 100644 index 96f592df68d9dfeaaa5ac498c6c2f38a8ea56403..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/3500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/4000 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/4000 deleted file mode 100644 index 22758be89e378e953b6bbaa2cf34f18a45e3252e..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/4000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/4500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/4500 deleted file mode 100644 index a46617df4e78d79404574a29f8422b4089a14483..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/4500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/500 deleted file mode 100644 index a989c1861b2c6f647060310c7d15420306971645..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/5000 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/5000 deleted file mode 100644 index f1cfb97f4ff0c3b2a5828aac5e8ad7f988448977..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/5000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/5500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/5500 deleted file mode 100644 index ab3a1b2c72df910f752c7ef0edd26aa49ede31c5..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/5500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/6000 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/6000 deleted file mode 100644 index aae7dcda00157110573bd5423d1b948734e2fcf9..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/6000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/6500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/6500 deleted file mode 100644 index 1f5fad16755b1b5e718afc2c4f6915fa051ee036..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/6500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/7000 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/7000 deleted file mode 100644 index b7ed3703bf2eafc752e0e78202fb65bd8000a8cc..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/7000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/7500 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/7500 deleted file mode 100644 index 8a593ed8b38d3f5777f69404be75080f0a69e06a..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/7500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/best b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/best deleted file mode 100644 index b835806ffb0b6668614f3c8533162a283e11b902..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/models/best and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/interact/events.out.tfevents.1684339616.DESKTOP-H34HQIQ.85940.0 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/interact/events.out.tfevents.1684339616.DESKTOP-H34HQIQ.85940.0 deleted file mode 100644 index c4dd9fd89b4a7ebbd9606d931e86def1498a76ad..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/interact/events.out.tfevents.1684339616.DESKTOP-H34HQIQ.85940.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0809faa3d06f301997fe2086bfc194e0722cf3d1d3aba069429b7945ec2a62f7 -size 40 diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/interact/events.out.tfevents.1684339625.DESKTOP-H34HQIQ.86440.0 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/interact/events.out.tfevents.1684339625.DESKTOP-H34HQIQ.86440.0 deleted file mode 100644 index b5bcce709eac4e64afab96ec57c9569e1219118f..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/interact/events.out.tfevents.1684339625.DESKTOP-H34HQIQ.86440.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:898c4511bf96015d660bf6792fc3b4a86216201b4dde4b0706fd7b1f03c3f9fd -size 15726 diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/model/events.out.tfevents.1684339616.DESKTOP-H34HQIQ.85940.1 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/model/events.out.tfevents.1684339616.DESKTOP-H34HQIQ.85940.1 deleted file mode 100644 index 9f797fc0a23fed6c922bd28f38d3c1dbabc6051e..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/model/events.out.tfevents.1684339616.DESKTOP-H34HQIQ.85940.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fb70b39d8d73fd2532acd6826f916c1fd0cd444129a5f906714f87c1688f9f9c -size 40 diff --git a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/model/events.out.tfevents.1684339625.DESKTOP-H34HQIQ.86440.1 b/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/model/events.out.tfevents.1684339625.DESKTOP-H34HQIQ.86440.1 deleted file mode 100644 index 19c32b633b92835732ba4c99915c41fb1a7ac153..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_ray_multi_learner_CartPole-v1_DQN_20230518-000656/tb_logs/model/events.out.tfevents.1684339625.DESKTOP-H34HQIQ.86440.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dfaa682d25e17dd438c8dc904403312777485965c83e55a4d92fbd56288b83d4 -size 380741 diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/config.yaml b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/config.yaml deleted file mode 100644 index 75110d577c14ac7a1b2fd656506e4e380280b325..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/config.yaml +++ /dev/null @@ -1,45 +0,0 @@ -general_cfg: - algo_name: DQN - collect_traj: false - device: cpu - env_name: gym - load_checkpoint: false - load_path: Train_CartPole-v1_DQN_20221026-054757 - max_episode: 100 - max_step: 200 - mode: train - model_save_fre: 500 - mp_backend: single - n_workers: 1 - online_eval: true - online_eval_episode: 10 - render_mode: null - save_fig: true - seed: 1 - show_fig: false -algo_cfg: - batch_size: 64 - buffer_size: 100000 - buffer_type: REPLAY_QUE - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.95 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - layer_type: linear -env_cfg: - id: CartPole-v1 - ignore_params: - - wrapper - - ignore_params - render_mode: null - wrapper: null diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/logs/log.txt b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/logs/log.txt deleted file mode 100644 index c7d2c227f466b49b663bebf2274cd8434afaf2f6..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/logs/log.txt +++ /dev/null @@ -1,168 +0,0 @@ -2023-05-15 21:17:21 - SimpleLog - INFO: - General Configs: -2023-05-15 21:17:21 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:17:21 - SimpleLog - INFO: - Name Value Type -2023-05-15 21:17:21 - SimpleLog - INFO: - env_name gym -2023-05-15 21:17:21 - SimpleLog - INFO: - render_mode None -2023-05-15 21:17:21 - SimpleLog - INFO: - algo_name DQN -2023-05-15 21:17:21 - SimpleLog - INFO: - mode train -2023-05-15 21:17:21 - SimpleLog - INFO: - collect_traj 0 -2023-05-15 21:17:21 - SimpleLog - INFO: - mp_backend single -2023-05-15 21:17:21 - SimpleLog - INFO: - n_workers 1 -2023-05-15 21:17:21 - SimpleLog - INFO: - seed 1 -2023-05-15 21:17:21 - SimpleLog - INFO: - device cpu -2023-05-15 21:17:21 - SimpleLog - INFO: - max_episode 100 -2023-05-15 21:17:21 - SimpleLog - INFO: - max_step 200 -2023-05-15 21:17:21 - SimpleLog - INFO: - online_eval 1 -2023-05-15 21:17:21 - SimpleLog - INFO: - online_eval_episode 10 -2023-05-15 21:17:21 - SimpleLog - INFO: - load_checkpoint 0 -2023-05-15 21:17:21 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 -2023-05-15 21:17:21 - SimpleLog - INFO: - show_fig 0 -2023-05-15 21:17:21 - SimpleLog - INFO: - save_fig 1 -2023-05-15 21:17:21 - SimpleLog - INFO: - model_save_fre 500 -2023-05-15 21:17:21 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:17:21 - SimpleLog - INFO: - Algo Configs: -2023-05-15 21:17:21 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:17:21 - SimpleLog - INFO: - Name Value Type -2023-05-15 21:17:21 - SimpleLog - INFO: - epsilon_start 0.95 -2023-05-15 21:17:21 - SimpleLog - INFO: - epsilon_end 0.01 -2023-05-15 21:17:21 - SimpleLog - INFO: - epsilon_decay 500 -2023-05-15 21:17:21 - SimpleLog - INFO: - gamma 0.95 -2023-05-15 21:17:21 - SimpleLog - INFO: - lr 0.0001 -2023-05-15 21:17:21 - SimpleLog - INFO: - buffer_size 100000 -2023-05-15 21:17:21 - SimpleLog - INFO: - batch_size 64 -2023-05-15 21:17:21 - SimpleLog - INFO: - target_update 4 -2023-05-15 21:17:21 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] -2023-05-15 21:17:21 - SimpleLog - INFO: - buffer_type REPLAY_QUE -2023-05-15 21:17:21 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:17:21 - SimpleLog - INFO: - Env Configs: -2023-05-15 21:17:21 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:17:21 - SimpleLog - INFO: - Name Value Type -2023-05-15 21:17:21 - SimpleLog - INFO: - id CartPole-v1 -2023-05-15 21:17:21 - SimpleLog - INFO: - render_mode None -2023-05-15 21:17:21 - SimpleLog - INFO: - wrapper None -2023-05-15 21:17:21 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] -2023-05-15 21:17:21 - SimpleLog - INFO: - ================================================================================ -2023-05-15 21:17:21 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) -2023-05-15 21:17:21 - SimpleLog - INFO: - Start training! -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 0, ep_reward: 25.0, ep_step: 25 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 1, ep_reward: 10.0, ep_step: 10 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 2, ep_reward: 13.0, ep_step: 13 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 3, ep_reward: 11.0, ep_step: 11 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 4, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 5, ep_reward: 20.0, ep_step: 20 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 6, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 7, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 8, ep_reward: 11.0, ep_step: 11 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 9, ep_reward: 14.0, ep_step: 14 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 10, ep_reward: 22.0, ep_step: 22 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 11, ep_reward: 14.0, ep_step: 14 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 12, ep_reward: 18.0, ep_step: 18 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 13, ep_reward: 10.0, ep_step: 10 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 14, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 15, ep_reward: 13.0, ep_step: 13 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 16, ep_reward: 13.0, ep_step: 13 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 17, ep_reward: 19.0, ep_step: 19 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 18, ep_reward: 10.0, ep_step: 10 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 19, ep_reward: 11.0, ep_step: 11 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 20, ep_reward: 9.0, ep_step: 9 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 21, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 22, ep_reward: 10.0, ep_step: 10 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 23, ep_reward: 13.0, ep_step: 13 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 24, ep_reward: 14.0, ep_step: 14 -2023-05-15 21:17:21 - SimpleLog - INFO: - episode: 25, ep_reward: 20.0, ep_step: 20 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 26, ep_reward: 37.0, ep_step: 37 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 27, ep_reward: 15.0, ep_step: 15 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 28, ep_reward: 10.0, ep_step: 10 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 29, ep_reward: 11.0, ep_step: 11 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 30, ep_reward: 11.0, ep_step: 11 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 31, ep_reward: 11.0, ep_step: 11 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 32, ep_reward: 11.0, ep_step: 11 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 33, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 34, ep_reward: 9.0, ep_step: 9 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 35, ep_reward: 9.0, ep_step: 9 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 36, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 37, ep_reward: 9.0, ep_step: 9 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 38, ep_reward: 14.0, ep_step: 14 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 39, ep_reward: 13.0, ep_step: 13 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 40, ep_reward: 10.0, ep_step: 10 -2023-05-15 21:17:22 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 17.900 -2023-05-15 21:17:22 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 17.900, save the best model! -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 41, ep_reward: 12.0, ep_step: 12 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 42, ep_reward: 19.0, ep_step: 19 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 43, ep_reward: 16.0, ep_step: 16 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 44, ep_reward: 14.0, ep_step: 14 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 45, ep_reward: 18.0, ep_step: 18 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 46, ep_reward: 24.0, ep_step: 24 -2023-05-15 21:17:22 - SimpleLog - INFO: - episode: 47, ep_reward: 27.0, ep_step: 27 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 48, ep_reward: 27.0, ep_step: 27 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 49, ep_reward: 19.0, ep_step: 19 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 50, ep_reward: 20.0, ep_step: 20 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 51, ep_reward: 37.0, ep_step: 37 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 52, ep_reward: 26.0, ep_step: 26 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 53, ep_reward: 31.0, ep_step: 31 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 54, ep_reward: 23.0, ep_step: 23 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 55, ep_reward: 27.0, ep_step: 27 -2023-05-15 21:17:23 - SimpleLog - INFO: - episode: 56, ep_reward: 35.0, ep_step: 35 -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 57, ep_reward: 33.0, ep_step: 33 -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 58, ep_reward: 24.0, ep_step: 24 -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 59, ep_reward: 33.0, ep_step: 33 -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 60, ep_reward: 36.0, ep_step: 36 -2023-05-15 21:17:24 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 29.100 -2023-05-15 21:17:24 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 29.100, save the best model! -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 61, ep_reward: 33.0, ep_step: 33 -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 62, ep_reward: 34.0, ep_step: 34 -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 63, ep_reward: 51.0, ep_step: 51 -2023-05-15 21:17:24 - SimpleLog - INFO: - episode: 64, ep_reward: 41.0, ep_step: 41 -2023-05-15 21:17:25 - SimpleLog - INFO: - episode: 65, ep_reward: 54.0, ep_step: 54 -2023-05-15 21:17:25 - SimpleLog - INFO: - episode: 66, ep_reward: 54.0, ep_step: 54 -2023-05-15 21:17:25 - SimpleLog - INFO: - episode: 67, ep_reward: 74.0, ep_step: 74 -2023-05-15 21:17:25 - SimpleLog - INFO: - episode: 68, ep_reward: 44.0, ep_step: 44 -2023-05-15 21:17:25 - SimpleLog - INFO: - episode: 69, ep_reward: 38.0, ep_step: 38 -2023-05-15 21:17:25 - SimpleLog - INFO: - episode: 70, ep_reward: 49.0, ep_step: 49 -2023-05-15 21:17:26 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 57.300 -2023-05-15 21:17:26 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 57.300, save the best model! -2023-05-15 21:17:26 - SimpleLog - INFO: - episode: 71, ep_reward: 64.0, ep_step: 64 -2023-05-15 21:17:26 - SimpleLog - INFO: - episode: 72, ep_reward: 89.0, ep_step: 89 -2023-05-15 21:17:26 - SimpleLog - INFO: - episode: 73, ep_reward: 90.0, ep_step: 90 -2023-05-15 21:17:26 - SimpleLog - INFO: - episode: 74, ep_reward: 66.0, ep_step: 66 -2023-05-15 21:17:27 - SimpleLog - INFO: - episode: 75, ep_reward: 60.0, ep_step: 60 -2023-05-15 21:17:27 - SimpleLog - INFO: - episode: 76, ep_reward: 66.0, ep_step: 66 -2023-05-15 21:17:27 - SimpleLog - INFO: - episode: 77, ep_reward: 67.0, ep_step: 67 -2023-05-15 21:17:27 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 91.800 -2023-05-15 21:17:27 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 91.800, save the best model! -2023-05-15 21:17:27 - SimpleLog - INFO: - episode: 78, ep_reward: 141.0, ep_step: 141 -2023-05-15 21:17:28 - SimpleLog - INFO: - episode: 79, ep_reward: 138.0, ep_step: 138 -2023-05-15 21:17:28 - SimpleLog - INFO: - episode: 80, ep_reward: 141.0, ep_step: 141 -2023-05-15 21:17:29 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 178.900 -2023-05-15 21:17:29 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 178.900, save the best model! -2023-05-15 21:17:29 - SimpleLog - INFO: - episode: 81, ep_reward: 192.0, ep_step: 192 -2023-05-15 21:17:30 - SimpleLog - INFO: - episode: 82, ep_reward: 163.0, ep_step: 163 -2023-05-15 21:17:31 - SimpleLog - INFO: - episode: 83, ep_reward: 183.0, ep_step: 183 -2023-05-15 21:17:31 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 165.900 -2023-05-15 21:17:31 - SimpleLog - INFO: - episode: 84, ep_reward: 199.0, ep_step: 199 -2023-05-15 21:17:32 - SimpleLog - INFO: - episode: 85, ep_reward: 175.0, ep_step: 175 -2023-05-15 21:17:32 - SimpleLog - INFO: - episode: 86, ep_reward: 191.0, ep_step: 191 -2023-05-15 21:17:33 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 186.300 -2023-05-15 21:17:33 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 186.300, save the best model! -2023-05-15 21:17:33 - SimpleLog - INFO: - episode: 87, ep_reward: 180.0, ep_step: 180 -2023-05-15 21:17:34 - SimpleLog - INFO: - episode: 88, ep_reward: 185.0, ep_step: 185 -2023-05-15 21:17:35 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 187.300 -2023-05-15 21:17:35 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 187.300, save the best model! -2023-05-15 21:17:35 - SimpleLog - INFO: - episode: 89, ep_reward: 194.0, ep_step: 194 -2023-05-15 21:17:35 - SimpleLog - INFO: - episode: 90, ep_reward: 181.0, ep_step: 181 -2023-05-15 21:17:36 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:36 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 197.500 -2023-05-15 21:17:36 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 197.500, save the best model! -2023-05-15 21:17:37 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:37 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:38 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 196.200 -2023-05-15 21:17:38 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:39 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:39 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:40 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 199.500 -2023-05-15 21:17:40 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 199.500, save the best model! -2023-05-15 21:17:40 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:41 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:42 - SimpleLog - INFO: - update_step: 6000, online_eval_reward: 196.200 -2023-05-15 21:17:42 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200 -2023-05-15 21:17:42 - SimpleLog - INFO: - Finish training! total time consumed: 21.14s diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/1000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/1000 deleted file mode 100644 index dbf3fa9fda9fb47fdcf28b3d2e1b8cb1d6b50996..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/1000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/1500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/1500 deleted file mode 100644 index 5ee7115ec7afb544054aee808779e08ed2699b9f..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/1500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/2000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/2000 deleted file mode 100644 index 258d4873833124a5fcd1163fe9756e977a852d57..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/2000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/2500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/2500 deleted file mode 100644 index 30eb4a77f2882a1141aaf0566d96ca4890ad5061..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/2500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/3000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/3000 deleted file mode 100644 index 1b7adc295072abcbc5d58100bde674b9ee166d3f..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/3000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/3500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/3500 deleted file mode 100644 index a4f70eb9c16b6482fd4bdf5b88ba9efedea01a91..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/3500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/4000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/4000 deleted file mode 100644 index e5802f1d33b2e5bfcd042c183d6baeb9684fd010..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/4000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/4500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/4500 deleted file mode 100644 index 16e4ba299793db1f8533e35289e7f2532557c16b..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/4500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/500 deleted file mode 100644 index eca897a0bbd68625355e7b428f650d27effb00c3..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/5000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/5000 deleted file mode 100644 index dd0c84d8cd4c59bbc8ea7c2a238da855afedbad1..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/5000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/5500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/5500 deleted file mode 100644 index 554bffc8fafc5efcfab15798da33e11c9fbeebb7..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/5500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/6000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/6000 deleted file mode 100644 index 0f432672e133a8be11116a3d574e92f37a205004..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/6000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/best b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/best deleted file mode 100644 index 554bffc8fafc5efcfab15798da33e11c9fbeebb7..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/models/best and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/tb_logs/interact/events.out.tfevents.1684156641.DESKTOP-H34HQIQ.65936.0 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/tb_logs/interact/events.out.tfevents.1684156641.DESKTOP-H34HQIQ.65936.0 deleted file mode 100644 index 7d0a6c3605bfd40626cc871d584af84282c89bc1..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/tb_logs/interact/events.out.tfevents.1684156641.DESKTOP-H34HQIQ.65936.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:88c8b584bf2023d80b8da777a9aa1d6799df06ab44841a1350e3779314f0dff5 -size 10436 diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/tb_logs/model/events.out.tfevents.1684156641.DESKTOP-H34HQIQ.65936.1 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/tb_logs/model/events.out.tfevents.1684156641.DESKTOP-H34HQIQ.65936.1 deleted file mode 100644 index 77772ec0a8df42b45d1c7a26ce2a784e43b1bd63..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DQN_20230515-211721/tb_logs/model/events.out.tfevents.1684156641.DESKTOP-H34HQIQ.65936.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:34aa745aea2a3b867783177a4a48344c1e8a6218893f768de6a7cc79eb74d6d8 -size 295334 diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/logs/log.txt b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/logs/log.txt deleted file mode 100644 index 96bfc997b328664ed7d2f2611d7429971bd3596f..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/logs/log.txt +++ /dev/null @@ -1,174 +0,0 @@ -2023-05-17 22:38:43 - SimpleLog - INFO: - General Configs: -2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:38:43 - SimpleLog - INFO: - Name Value Type -2023-05-17 22:38:43 - SimpleLog - INFO: - env_name gym -2023-05-17 22:38:43 - SimpleLog - INFO: - algo_name DuelingDQN -2023-05-17 22:38:43 - SimpleLog - INFO: - mode train -2023-05-17 22:38:43 - SimpleLog - INFO: - device cpu -2023-05-17 22:38:43 - SimpleLog - INFO: - seed 1 -2023-05-17 22:38:43 - SimpleLog - INFO: - max_episode 100 -2023-05-17 22:38:43 - SimpleLog - INFO: - max_step 200 -2023-05-17 22:38:43 - SimpleLog - INFO: - collect_traj 0 -2023-05-17 22:38:43 - SimpleLog - INFO: - mp_backend single -2023-05-17 22:38:43 - SimpleLog - INFO: - n_workers 2 -2023-05-17 22:38:43 - SimpleLog - INFO: - online_eval 1 -2023-05-17 22:38:43 - SimpleLog - INFO: - online_eval_episode 10 -2023-05-17 22:38:43 - SimpleLog - INFO: - model_save_fre 500 -2023-05-17 22:38:43 - SimpleLog - INFO: - load_checkpoint 0 -2023-05-17 22:38:43 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 -2023-05-17 22:38:43 - SimpleLog - INFO: - load_model_step best -2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:38:43 - SimpleLog - INFO: - Algo Configs: -2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:38:43 - SimpleLog - INFO: - Name Value Type -2023-05-17 22:38:43 - SimpleLog - INFO: - dueling 1 -2023-05-17 22:38:43 - SimpleLog - INFO: - epsilon_start 0.95 -2023-05-17 22:38:43 - SimpleLog - INFO: - epsilon_end 0.01 -2023-05-17 22:38:43 - SimpleLog - INFO: - epsilon_decay 500 -2023-05-17 22:38:43 - SimpleLog - INFO: - gamma 0.95 -2023-05-17 22:38:43 - SimpleLog - INFO: - lr 0.0001 -2023-05-17 22:38:43 - SimpleLog - INFO: - buffer_size 100000 -2023-05-17 22:38:43 - SimpleLog - INFO: - batch_size 64 -2023-05-17 22:38:43 - SimpleLog - INFO: - target_update 4 -2023-05-17 22:38:43 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] -2023-05-17 22:38:43 - SimpleLog - INFO: - buffer_type REPLAY_QUE -2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:38:43 - SimpleLog - INFO: - Env Configs: -2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:38:43 - SimpleLog - INFO: - Name Value Type -2023-05-17 22:38:43 - SimpleLog - INFO: - id CartPole-v1 -2023-05-17 22:38:43 - SimpleLog - INFO: - render_mode None -2023-05-17 22:38:43 - SimpleLog - INFO: - wrapper None -2023-05-17 22:38:43 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] -2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ -2023-05-17 22:38:43 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) -2023-05-17 22:38:43 - SimpleLog - INFO: - Start training! -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 0, ep_reward: 35.0, ep_step: 35 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 1, ep_reward: 14.0, ep_step: 14 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 2, ep_reward: 20.0, ep_step: 20 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 3, ep_reward: 33.0, ep_step: 33 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 4, ep_reward: 21.0, ep_step: 21 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 5, ep_reward: 19.0, ep_step: 19 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 6, ep_reward: 15.0, ep_step: 15 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 7, ep_reward: 14.0, ep_step: 14 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 8, ep_reward: 9.0, ep_step: 9 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 9, ep_reward: 11.0, ep_step: 11 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 10, ep_reward: 17.0, ep_step: 17 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 11, ep_reward: 21.0, ep_step: 21 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 12, ep_reward: 15.0, ep_step: 15 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 13, ep_reward: 19.0, ep_step: 19 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 14, ep_reward: 9.0, ep_step: 9 -2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 15, ep_reward: 10.0, ep_step: 10 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 16, ep_reward: 11.0, ep_step: 11 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 17, ep_reward: 17.0, ep_step: 17 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 18, ep_reward: 13.0, ep_step: 13 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 19, ep_reward: 16.0, ep_step: 16 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 20, ep_reward: 18.0, ep_step: 18 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 21, ep_reward: 14.0, ep_step: 14 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 22, ep_reward: 16.0, ep_step: 16 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 23, ep_reward: 14.0, ep_step: 14 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 24, ep_reward: 10.0, ep_step: 10 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 25, ep_reward: 14.0, ep_step: 14 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 26, ep_reward: 18.0, ep_step: 18 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 27, ep_reward: 44.0, ep_step: 44 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 28, ep_reward: 12.0, ep_step: 12 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 29, ep_reward: 12.0, ep_step: 12 -2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 30, ep_reward: 14.0, ep_step: 14 -2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 31, ep_reward: 10.0, ep_step: 10 -2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 32, ep_reward: 10.0, ep_step: 10 -2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 33, ep_reward: 15.0, ep_step: 15 -2023-05-17 22:38:45 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 10.000 -2023-05-17 22:38:45 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 10.000, save the best model! -2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 34, ep_reward: 39.0, ep_step: 39 -2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 35, ep_reward: 70.0, ep_step: 70 -2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 36, ep_reward: 92.0, ep_step: 92 -2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 37, ep_reward: 46.0, ep_step: 46 -2023-05-17 22:38:46 - SimpleLog - INFO: - episode: 38, ep_reward: 139.0, ep_step: 139 -2023-05-17 22:38:46 - SimpleLog - INFO: - episode: 39, ep_reward: 58.0, ep_step: 58 -2023-05-17 22:38:46 - SimpleLog - INFO: - episode: 40, ep_reward: 45.0, ep_step: 45 -2023-05-17 22:38:46 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 57.000 -2023-05-17 22:38:46 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 57.000, save the best model! -2023-05-17 22:38:47 - SimpleLog - INFO: - episode: 41, ep_reward: 70.0, ep_step: 70 -2023-05-17 22:38:47 - SimpleLog - INFO: - episode: 42, ep_reward: 131.0, ep_step: 131 -2023-05-17 22:38:47 - SimpleLog - INFO: - episode: 43, ep_reward: 67.0, ep_step: 67 -2023-05-17 22:38:48 - SimpleLog - INFO: - episode: 44, ep_reward: 111.0, ep_step: 111 -2023-05-17 22:38:48 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 158.000 -2023-05-17 22:38:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 158.000, save the best model! -2023-05-17 22:38:49 - SimpleLog - INFO: - episode: 45, ep_reward: 153.0, ep_step: 153 -2023-05-17 22:38:49 - SimpleLog - INFO: - episode: 46, ep_reward: 188.0, ep_step: 188 -2023-05-17 22:38:50 - SimpleLog - INFO: - episode: 47, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:50 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 200.000 -2023-05-17 22:38:50 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! -2023-05-17 22:38:51 - SimpleLog - INFO: - episode: 48, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:52 - SimpleLog - INFO: - episode: 49, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:53 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 200.000 -2023-05-17 22:38:53 - SimpleLog - INFO: - episode: 50, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:53 - SimpleLog - INFO: - episode: 51, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:54 - SimpleLog - INFO: - episode: 52, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:55 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 200.000 -2023-05-17 22:38:55 - SimpleLog - INFO: - episode: 53, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:56 - SimpleLog - INFO: - episode: 54, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:57 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 200.000 -2023-05-17 22:38:57 - SimpleLog - INFO: - episode: 55, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:57 - SimpleLog - INFO: - episode: 56, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:58 - SimpleLog - INFO: - episode: 57, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:38:59 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 200.000 -2023-05-17 22:38:59 - SimpleLog - INFO: - episode: 58, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:00 - SimpleLog - INFO: - episode: 59, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:01 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 200.000 -2023-05-17 22:39:01 - SimpleLog - INFO: - episode: 60, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:02 - SimpleLog - INFO: - episode: 61, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:02 - SimpleLog - INFO: - episode: 62, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:03 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 200.000 -2023-05-17 22:39:03 - SimpleLog - INFO: - episode: 63, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:04 - SimpleLog - INFO: - episode: 64, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:05 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 200.000 -2023-05-17 22:39:05 - SimpleLog - INFO: - episode: 65, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:06 - SimpleLog - INFO: - episode: 66, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:07 - SimpleLog - INFO: - episode: 67, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:07 - SimpleLog - INFO: - update_step: 6000, online_eval_reward: 200.000 -2023-05-17 22:39:08 - SimpleLog - INFO: - episode: 68, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:08 - SimpleLog - INFO: - episode: 69, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:09 - SimpleLog - INFO: - update_step: 6500, online_eval_reward: 200.000 -2023-05-17 22:39:09 - SimpleLog - INFO: - episode: 70, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:10 - SimpleLog - INFO: - episode: 71, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:11 - SimpleLog - INFO: - episode: 72, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:12 - SimpleLog - INFO: - update_step: 7000, online_eval_reward: 200.000 -2023-05-17 22:39:12 - SimpleLog - INFO: - episode: 73, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:13 - SimpleLog - INFO: - episode: 74, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:13 - SimpleLog - INFO: - update_step: 7500, online_eval_reward: 200.000 -2023-05-17 22:39:13 - SimpleLog - INFO: - episode: 75, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:15 - SimpleLog - INFO: - episode: 76, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:15 - SimpleLog - INFO: - episode: 77, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:16 - SimpleLog - INFO: - update_step: 8000, online_eval_reward: 200.000 -2023-05-17 22:39:16 - SimpleLog - INFO: - episode: 78, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:17 - SimpleLog - INFO: - episode: 79, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:18 - SimpleLog - INFO: - update_step: 8500, online_eval_reward: 200.000 -2023-05-17 22:39:18 - SimpleLog - INFO: - episode: 80, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:19 - SimpleLog - INFO: - episode: 81, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:19 - SimpleLog - INFO: - episode: 82, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:20 - SimpleLog - INFO: - update_step: 9000, online_eval_reward: 200.000 -2023-05-17 22:39:20 - SimpleLog - INFO: - episode: 83, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:21 - SimpleLog - INFO: - episode: 84, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:22 - SimpleLog - INFO: - update_step: 9500, online_eval_reward: 200.000 -2023-05-17 22:39:22 - SimpleLog - INFO: - episode: 85, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:23 - SimpleLog - INFO: - episode: 86, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:23 - SimpleLog - INFO: - episode: 87, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:24 - SimpleLog - INFO: - update_step: 10000, online_eval_reward: 200.000 -2023-05-17 22:39:25 - SimpleLog - INFO: - episode: 88, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:25 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:26 - SimpleLog - INFO: - update_step: 10500, online_eval_reward: 200.000 -2023-05-17 22:39:26 - SimpleLog - INFO: - episode: 90, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:27 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:28 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:28 - SimpleLog - INFO: - update_step: 11000, online_eval_reward: 200.000 -2023-05-17 22:39:29 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:30 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:31 - SimpleLog - INFO: - update_step: 11500, online_eval_reward: 200.000 -2023-05-17 22:39:31 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:32 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:32 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:33 - SimpleLog - INFO: - update_step: 12000, online_eval_reward: 200.000 -2023-05-17 22:39:34 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:34 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200 -2023-05-17 22:39:34 - SimpleLog - INFO: - Finish training! total time consumed: 51.81s diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1000 deleted file mode 100644 index 7e7c2e4f6bb11f41ea2a29249e34ad03696aafde..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10000 deleted file mode 100644 index 0bed7eb6ce5f628bc782c24a4950b9b2a99f7dd8..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10500 deleted file mode 100644 index a48e789c0a921609b744c9ad8af4f89a86331f5e..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11000 deleted file mode 100644 index 093809c058fe2d94035d14fc3ed0af8fb4fc0f2e..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11500 deleted file mode 100644 index 85a1ea724fb21ac95dd0e3e63488c4566186cee9..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/12000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/12000 deleted file mode 100644 index 761cb30eca6b89559f93b2bb82cf2055a1f3aefb..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/12000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1500 deleted file mode 100644 index d153981ff07400b1259fde874b00b1a2645704c3..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2000 deleted file mode 100644 index 6281022c0a450df782a362b51827a0520ad4e6a5..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2500 deleted file mode 100644 index f9f478c7c75e17273d194e35f33f3d26742e682b..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3000 deleted file mode 100644 index 3fd92f6ced8d31878ec3e64d6cc8c33c33e66ac5..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3500 deleted file mode 100644 index ef8d2641fd40c7549a24e6d9e074d62a06241cb5..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4000 deleted file mode 100644 index 740a1e693a9a27bac202492d2485bfd4043bad0c..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4500 deleted file mode 100644 index e1bd86ef93ae1fca062e5d6ed3dee37b2558947e..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/500 deleted file mode 100644 index 89c9b14402f1fae05fe12ba0814d5dc8506b868f..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5000 deleted file mode 100644 index e0b6af9f53c5950da60d0a286d1da9e74ca278c0..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5500 deleted file mode 100644 index 696cab4d60acf09813d6f3338f7c08a12f758684..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6000 deleted file mode 100644 index 75d3cdb8a2b3b531144789e39bee75e2b6af91e0..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6500 deleted file mode 100644 index 2502198dbe83f446e89f3988ab789cb54d872f2e..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7000 deleted file mode 100644 index 922f8b938b70d301aecaf19072f416c93eec60ec..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7500 deleted file mode 100644 index a812949325f0506a7e575b45d6ef6fc8b9c2d100..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8000 deleted file mode 100644 index 77f3df7c1e27836ed286113dd9a648733ecdb768..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8500 deleted file mode 100644 index 3e70a1fcba267c642d86d5ab66d13be3f25faa62..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9000 deleted file mode 100644 index f11d60bf987f0b15fdc7ce8be6ba7392913dc75f..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9000 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9500 deleted file mode 100644 index 474ebafb615b4e3f8b98711b4fce0d61b33667a5..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9500 and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/best b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/best deleted file mode 100644 index 6281022c0a450df782a362b51827a0520ad4e6a5..0000000000000000000000000000000000000000 Binary files a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/best and /dev/null differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/interact/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.0 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/interact/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.0 deleted file mode 100644 index 2e8899b7b2f3f7a88a8e3cc14397214c8229a6d7..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/interact/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fa1caa6e526f308946db961e2fc4735b6e26eb12584b222b5250d20217905f0f -size 10436 diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/model/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.1 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/model/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.1 deleted file mode 100644 index 2c6dd676c852f6e1168ccf74bf4f9473d9408900..0000000000000000000000000000000000000000 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/model/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.1 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:467410bc502ff48858b3003f77303d779925a2f505c21b5901f849383ad693fd -size 602907 diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/config.yaml b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/config.yaml similarity index 73% rename from ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/config.yaml rename to ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/config.yaml index a203609467be220d76360ff3216a07ca4838dc0b..0238350472dc72d1fc024391c74ecc2cd59d983e 100644 --- a/ClassControl/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/config.yaml +++ b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/config.yaml @@ -1,7 +1,7 @@ general_cfg: - algo_name: DuelingDQN + algo_name: PER_DQN collect_traj: false - device: cpu + device: cuda env_name: gym load_checkpoint: false load_model_step: best @@ -11,28 +11,33 @@ general_cfg: mode: train model_save_fre: 500 mp_backend: single + n_learners: 1 n_workers: 2 online_eval: true online_eval_episode: 10 seed: 1 + share_buffer: true algo_cfg: batch_size: 64 buffer_size: 100000 - buffer_type: REPLAY_QUE - dueling: true - epsilon_decay: 500 + buffer_type: PER_QUE + epsilon_decay: 1000 epsilon_end: 0.01 epsilon_start: 0.95 - gamma: 0.95 + gamma: 0.99 lr: 0.0001 + per_alpha: 0.6 + per_beta: 0.4 + per_beta_annealing: 0.001 + per_epsilon: 0.01 target_update: 4 value_layers: - activation: relu - layer_dim: + layer_size: - 256 layer_type: linear - activation: relu - layer_dim: + layer_size: - 256 layer_type: linear env_cfg: diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/logs/log.txt b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/logs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..001c2ab13052ac0217ba323e4cc4a5341c49c68a --- /dev/null +++ b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/logs/log.txt @@ -0,0 +1,172 @@ +2023-05-18 23:22:15 - SimpleLog - INFO: - General Configs: +2023-05-18 23:22:15 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:22:15 - SimpleLog - INFO: - Name Value Type +2023-05-18 23:22:15 - SimpleLog - INFO: - env_name gym +2023-05-18 23:22:15 - SimpleLog - INFO: - algo_name PER_DQN +2023-05-18 23:22:15 - SimpleLog - INFO: - mode train +2023-05-18 23:22:15 - SimpleLog - INFO: - device cuda +2023-05-18 23:22:15 - SimpleLog - INFO: - seed 1 +2023-05-18 23:22:15 - SimpleLog - INFO: - max_episode 100 +2023-05-18 23:22:15 - SimpleLog - INFO: - max_step 200 +2023-05-18 23:22:15 - SimpleLog - INFO: - collect_traj 0 +2023-05-18 23:22:15 - SimpleLog - INFO: - mp_backend single +2023-05-18 23:22:15 - SimpleLog - INFO: - n_workers 2 +2023-05-18 23:22:15 - SimpleLog - INFO: - n_learners 1 +2023-05-18 23:22:15 - SimpleLog - INFO: - share_buffer 1 +2023-05-18 23:22:15 - SimpleLog - INFO: - online_eval 1 +2023-05-18 23:22:15 - SimpleLog - INFO: - online_eval_episode 10 +2023-05-18 23:22:15 - SimpleLog - INFO: - model_save_fre 500 +2023-05-18 23:22:15 - SimpleLog - INFO: - load_checkpoint 0 +2023-05-18 23:22:15 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 +2023-05-18 23:22:15 - SimpleLog - INFO: - load_model_step best +2023-05-18 23:22:15 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:22:15 - SimpleLog - INFO: - Algo Configs: +2023-05-18 23:22:15 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:22:15 - SimpleLog - INFO: - Name Value Type +2023-05-18 23:22:15 - SimpleLog - INFO: - epsilon_start 0.95 +2023-05-18 23:22:15 - SimpleLog - INFO: - epsilon_end 0.01 +2023-05-18 23:22:15 - SimpleLog - INFO: - epsilon_decay 1000 +2023-05-18 23:22:15 - SimpleLog - INFO: - gamma 0.99 +2023-05-18 23:22:15 - SimpleLog - INFO: - lr 0.0001 +2023-05-18 23:22:15 - SimpleLog - INFO: - buffer_type PER_QUE +2023-05-18 23:22:15 - SimpleLog - INFO: - buffer_size 100000 +2023-05-18 23:22:15 - SimpleLog - INFO: - per_alpha 0.6 +2023-05-18 23:22:15 - SimpleLog - INFO: - per_beta 0.4 +2023-05-18 23:22:15 - SimpleLog - INFO: - per_beta_annealing 0.001 +2023-05-18 23:22:15 - SimpleLog - INFO: - per_epsilon 0.01 +2023-05-18 23:22:15 - SimpleLog - INFO: - batch_size 64 +2023-05-18 23:22:15 - SimpleLog - INFO: - target_update 4 +2023-05-18 23:22:15 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] +2023-05-18 23:22:15 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:22:15 - SimpleLog - INFO: - Env Configs: +2023-05-18 23:22:15 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:22:15 - SimpleLog - INFO: - Name Value Type +2023-05-18 23:22:15 - SimpleLog - INFO: - id CartPole-v1 +2023-05-18 23:22:15 - SimpleLog - INFO: - render_mode None +2023-05-18 23:22:15 - SimpleLog - INFO: - wrapper None +2023-05-18 23:22:15 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] +2023-05-18 23:22:15 - SimpleLog - INFO: - ================================================================================ +2023-05-18 23:22:15 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) +2023-05-18 23:22:16 - SimpleLog - INFO: - Start training! +2023-05-18 23:22:17 - SimpleLog - INFO: - episode: 0, ep_reward: 34.0, ep_step: 34 +2023-05-18 23:22:17 - SimpleLog - INFO: - episode: 1, ep_reward: 14.0, ep_step: 14 +2023-05-18 23:22:17 - SimpleLog - INFO: - episode: 2, ep_reward: 15.0, ep_step: 15 +2023-05-18 23:22:17 - SimpleLog - INFO: - episode: 3, ep_reward: 17.0, ep_step: 17 +2023-05-18 23:22:17 - SimpleLog - INFO: - episode: 4, ep_reward: 12.0, ep_step: 12 +2023-05-18 23:22:17 - SimpleLog - INFO: - episode: 5, ep_reward: 39.0, ep_step: 39 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 6, ep_reward: 28.0, ep_step: 28 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 7, ep_reward: 33.0, ep_step: 33 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 8, ep_reward: 15.0, ep_step: 15 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 9, ep_reward: 15.0, ep_step: 15 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 10, ep_reward: 20.0, ep_step: 20 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 11, ep_reward: 20.0, ep_step: 20 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 12, ep_reward: 13.0, ep_step: 13 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 13, ep_reward: 19.0, ep_step: 19 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 14, ep_reward: 30.0, ep_step: 30 +2023-05-18 23:22:18 - SimpleLog - INFO: - episode: 15, ep_reward: 15.0, ep_step: 15 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 16, ep_reward: 20.0, ep_step: 20 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 17, ep_reward: 14.0, ep_step: 14 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 18, ep_reward: 11.0, ep_step: 11 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 19, ep_reward: 21.0, ep_step: 21 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 20, ep_reward: 15.0, ep_step: 15 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 21, ep_reward: 18.0, ep_step: 18 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 22, ep_reward: 12.0, ep_step: 12 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 23, ep_reward: 24.0, ep_step: 24 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 24, ep_reward: 23.0, ep_step: 23 +2023-05-18 23:22:19 - SimpleLog - INFO: - episode: 25, ep_reward: 25.0, ep_step: 25 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 26, ep_reward: 17.0, ep_step: 17 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 27, ep_reward: 12.0, ep_step: 12 +2023-05-18 23:22:20 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 9.000 +2023-05-18 23:22:20 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model! +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 28, ep_reward: 17.0, ep_step: 17 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 29, ep_reward: 12.0, ep_step: 12 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 30, ep_reward: 12.0, ep_step: 12 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 31, ep_reward: 16.0, ep_step: 16 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 32, ep_reward: 11.0, ep_step: 11 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 33, ep_reward: 15.0, ep_step: 15 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 34, ep_reward: 12.0, ep_step: 12 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 35, ep_reward: 18.0, ep_step: 18 +2023-05-18 23:22:20 - SimpleLog - INFO: - episode: 36, ep_reward: 9.0, ep_step: 9 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 37, ep_reward: 50.0, ep_step: 50 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 38, ep_reward: 12.0, ep_step: 12 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 39, ep_reward: 14.0, ep_step: 14 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 40, ep_reward: 15.0, ep_step: 15 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 41, ep_reward: 10.0, ep_step: 10 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 42, ep_reward: 19.0, ep_step: 19 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 43, ep_reward: 13.0, ep_step: 13 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 44, ep_reward: 16.0, ep_step: 16 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 45, ep_reward: 20.0, ep_step: 20 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 46, ep_reward: 16.0, ep_step: 16 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 47, ep_reward: 32.0, ep_step: 32 +2023-05-18 23:22:21 - SimpleLog - INFO: - episode: 48, ep_reward: 25.0, ep_step: 25 +2023-05-18 23:22:22 - SimpleLog - INFO: - episode: 49, ep_reward: 73.0, ep_step: 73 +2023-05-18 23:22:22 - SimpleLog - INFO: - episode: 50, ep_reward: 28.0, ep_step: 28 +2023-05-18 23:22:22 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 75.000 +2023-05-18 23:22:22 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 75.000, save the best model! +2023-05-18 23:22:23 - SimpleLog - INFO: - episode: 51, ep_reward: 88.0, ep_step: 88 +2023-05-18 23:22:23 - SimpleLog - INFO: - episode: 52, ep_reward: 58.0, ep_step: 58 +2023-05-18 23:22:23 - SimpleLog - INFO: - episode: 53, ep_reward: 53.0, ep_step: 53 +2023-05-18 23:22:24 - SimpleLog - INFO: - episode: 54, ep_reward: 77.0, ep_step: 77 +2023-05-18 23:22:24 - SimpleLog - INFO: - episode: 55, ep_reward: 48.0, ep_step: 48 +2023-05-18 23:22:25 - SimpleLog - INFO: - episode: 56, ep_reward: 150.0, ep_step: 150 +2023-05-18 23:22:25 - SimpleLog - INFO: - episode: 57, ep_reward: 45.0, ep_step: 45 +2023-05-18 23:22:25 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 68.000 +2023-05-18 23:22:25 - SimpleLog - INFO: - episode: 58, ep_reward: 53.0, ep_step: 53 +2023-05-18 23:22:26 - SimpleLog - INFO: - episode: 59, ep_reward: 75.0, ep_step: 75 +2023-05-18 23:22:26 - SimpleLog - INFO: - episode: 60, ep_reward: 49.0, ep_step: 49 +2023-05-18 23:22:27 - SimpleLog - INFO: - episode: 61, ep_reward: 127.0, ep_step: 127 +2023-05-18 23:22:27 - SimpleLog - INFO: - episode: 62, ep_reward: 107.0, ep_step: 107 +2023-05-18 23:22:28 - SimpleLog - INFO: - episode: 63, ep_reward: 72.0, ep_step: 72 +2023-05-18 23:22:28 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 59.000 +2023-05-18 23:22:28 - SimpleLog - INFO: - episode: 64, ep_reward: 70.0, ep_step: 70 +2023-05-18 23:22:29 - SimpleLog - INFO: - episode: 65, ep_reward: 54.0, ep_step: 54 +2023-05-18 23:22:29 - SimpleLog - INFO: - episode: 66, ep_reward: 49.0, ep_step: 49 +2023-05-18 23:22:29 - SimpleLog - INFO: - episode: 67, ep_reward: 56.0, ep_step: 56 +2023-05-18 23:22:29 - SimpleLog - INFO: - episode: 68, ep_reward: 69.0, ep_step: 69 +2023-05-18 23:22:30 - SimpleLog - INFO: - episode: 69, ep_reward: 70.0, ep_step: 70 +2023-05-18 23:22:30 - SimpleLog - INFO: - episode: 70, ep_reward: 65.0, ep_step: 65 +2023-05-18 23:22:30 - SimpleLog - INFO: - episode: 71, ep_reward: 57.0, ep_step: 57 +2023-05-18 23:22:31 - SimpleLog - INFO: - episode: 72, ep_reward: 50.0, ep_step: 50 +2023-05-18 23:22:31 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 124.000 +2023-05-18 23:22:31 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 124.000, save the best model! +2023-05-18 23:22:32 - SimpleLog - INFO: - episode: 73, ep_reward: 82.0, ep_step: 82 +2023-05-18 23:22:32 - SimpleLog - INFO: - episode: 74, ep_reward: 74.0, ep_step: 74 +2023-05-18 23:22:33 - SimpleLog - INFO: - episode: 75, ep_reward: 93.0, ep_step: 93 +2023-05-18 23:22:33 - SimpleLog - INFO: - episode: 76, ep_reward: 80.0, ep_step: 80 +2023-05-18 23:22:33 - SimpleLog - INFO: - episode: 77, ep_reward: 56.0, ep_step: 56 +2023-05-18 23:22:34 - SimpleLog - INFO: - episode: 78, ep_reward: 87.0, ep_step: 87 +2023-05-18 23:22:34 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 68.000 +2023-05-18 23:22:35 - SimpleLog - INFO: - episode: 79, ep_reward: 67.0, ep_step: 67 +2023-05-18 23:22:35 - SimpleLog - INFO: - episode: 80, ep_reward: 80.0, ep_step: 80 +2023-05-18 23:22:35 - SimpleLog - INFO: - episode: 81, ep_reward: 65.0, ep_step: 65 +2023-05-18 23:22:36 - SimpleLog - INFO: - episode: 82, ep_reward: 79.0, ep_step: 79 +2023-05-18 23:22:36 - SimpleLog - INFO: - episode: 83, ep_reward: 66.0, ep_step: 66 +2023-05-18 23:22:37 - SimpleLog - INFO: - episode: 84, ep_reward: 90.0, ep_step: 90 +2023-05-18 23:22:38 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 146.000 +2023-05-18 23:22:38 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 146.000, save the best model! +2023-05-18 23:22:38 - SimpleLog - INFO: - episode: 85, ep_reward: 134.0, ep_step: 134 +2023-05-18 23:22:39 - SimpleLog - INFO: - episode: 86, ep_reward: 156.0, ep_step: 156 +2023-05-18 23:22:40 - SimpleLog - INFO: - episode: 87, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:41 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 185.000 +2023-05-18 23:22:41 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 185.000, save the best model! +2023-05-18 23:22:42 - SimpleLog - INFO: - episode: 88, ep_reward: 196.0, ep_step: 196 +2023-05-18 23:22:43 - SimpleLog - INFO: - episode: 89, ep_reward: 190.0, ep_step: 190 +2023-05-18 23:22:44 - SimpleLog - INFO: - episode: 90, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:45 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 200.000 +2023-05-18 23:22:45 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-18 23:22:46 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:47 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:48 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 200.000 +2023-05-18 23:22:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-18 23:22:49 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:50 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:51 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:52 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 200.000 +2023-05-18 23:22:52 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-18 23:22:53 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:54 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:55 - SimpleLog - INFO: - update_step: 6000, online_eval_reward: 200.000 +2023-05-18 23:22:55 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-18 23:22:56 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:57 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200 +2023-05-18 23:22:57 - SimpleLog - INFO: - Finish training! total time consumed: 41.45s diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/1000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/1000 new file mode 100644 index 0000000000000000000000000000000000000000..c1db9ebef56cc5d2f8055e5722478d40fa69890f Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/1000 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/1500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/1500 new file mode 100644 index 0000000000000000000000000000000000000000..9941dff020972f1a1bdcfac93e88234cde43eb5f Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/1500 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/2000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/2000 new file mode 100644 index 0000000000000000000000000000000000000000..87cc9b8f09124e2fca7193686b179f1cf530234b Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/2000 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/2500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/2500 new file mode 100644 index 0000000000000000000000000000000000000000..589ca5c9fb4eebf7a9d254d094559fc75ef63197 Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/2500 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/3000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/3000 new file mode 100644 index 0000000000000000000000000000000000000000..f52371ec5b51262de1ea48fda565f3e31c045de4 Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/3000 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/3500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/3500 new file mode 100644 index 0000000000000000000000000000000000000000..9978125679846186252a83389b3c9bc2605974df Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/3500 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/4000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/4000 new file mode 100644 index 0000000000000000000000000000000000000000..f1038f07174a1ebf0da103acf384bb57c6d66fe6 Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/4000 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/4500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/4500 new file mode 100644 index 0000000000000000000000000000000000000000..cfb5c626d1433a6d48acec2644c86a569916b393 Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/4500 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/500 new file mode 100644 index 0000000000000000000000000000000000000000..61ef24a1cda78a588ce88438a9f954db08be193f Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/500 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/5000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/5000 new file mode 100644 index 0000000000000000000000000000000000000000..47509dac54014f347365018e091292daf7da9d8d Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/5000 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/5500 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/5500 new file mode 100644 index 0000000000000000000000000000000000000000..ea8189c542b756bfc483e4d13dd8aea1faa508b3 Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/5500 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/6000 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/6000 new file mode 100644 index 0000000000000000000000000000000000000000..077ebfa1b2f6b8dd18dc876243cae94f06f788a5 Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/6000 differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/best b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/best new file mode 100644 index 0000000000000000000000000000000000000000..077ebfa1b2f6b8dd18dc876243cae94f06f788a5 Binary files /dev/null and b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/models/best differ diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/tb_logs/interact/events.out.tfevents.1684423335.DESKTOP-H34HQIQ.88188.0 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/tb_logs/interact/events.out.tfevents.1684423335.DESKTOP-H34HQIQ.88188.0 new file mode 100644 index 0000000000000000000000000000000000000000..8e88df7e88278a8ca476bdf843c56d40efa93a18 --- /dev/null +++ b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/tb_logs/interact/events.out.tfevents.1684423335.DESKTOP-H34HQIQ.88188.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8113e0d3aca3f0a3a4bfbd3a08b691712bbadca363a276efb79d4a8bed0e0d4 +size 10436 diff --git a/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/tb_logs/model/events.out.tfevents.1684423335.DESKTOP-H34HQIQ.88188.1 b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/tb_logs/model/events.out.tfevents.1684423335.DESKTOP-H34HQIQ.88188.1 new file mode 100644 index 0000000000000000000000000000000000000000..8e14f4ab9f6c5f3a7e3db1d1839942e6dc8cbbe6 --- /dev/null +++ b/ClassControl/CartPole-v1/Train_single_CartPole-v1_PER_DQN_20230518-232215/tb_logs/model/events.out.tfevents.1684423335.DESKTOP-H34HQIQ.88188.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c848c1bd44a3cfcd06c305bab90b4bacceadd494bb86e4550338a1e719d706aa +size 308123