Roberto commited on
Commit
6a0df06
1 Parent(s): 440f65b

Initial commit

Browse files
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
- - name: DQN
10
  results:
11
  - task:
12
  type: reinforcement-learning
@@ -16,13 +16,13 @@ model-index:
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 374.00 +/- 214.89
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
- # **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
25
- This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
@@ -38,39 +38,37 @@ SB3 Contrib: https://github.com/Stable-Baselines-Team/stable-baselines3-contrib
38
 
39
  ```
40
  # Download model and save it into the logs/ folder
41
- python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga Roberto -f logs/
42
- python enjoy.py --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
43
  ```
44
 
45
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
46
  ```
47
- python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga Roberto -f logs/
48
- rl_zoo3 enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
49
  ```
50
 
51
  ## Training (with the RL Zoo)
52
  ```
53
- python train.py --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  # Upload the model and generate video (when possible)
55
- python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga Roberto
56
  ```
57
 
58
  ## Hyperparameters
59
  ```python
60
- OrderedDict([('batch_size', 32),
61
- ('buffer_size', 100000),
 
62
  ('env_wrapper',
63
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
64
- ('exploration_final_eps', 0.01),
65
- ('exploration_fraction', 0.1),
66
  ('frame_stack', 4),
67
- ('gradient_steps', 1),
68
- ('learning_rate', 0.0001),
69
- ('learning_starts', 100000),
70
- ('n_timesteps', 100000.0),
71
- ('optimize_memory_usage', False),
72
  ('policy', 'CnnPolicy'),
73
- ('target_update_interval', 1000),
74
- ('train_freq', 4),
75
  ('normalize', False)])
76
  ```
 
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
+ - name: PPO
10
  results:
11
  - task:
12
  type: reinforcement-learning
 
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 808.50 +/- 361.36
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
+ # **PPO** Agent playing **SpaceInvadersNoFrameskip-v4**
25
+ This is a trained model of a **PPO** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
 
38
 
39
  ```
40
  # Download model and save it into the logs/ folder
41
+ python -m rl_zoo3.load_from_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -orga Roberto -f logs/
42
+ python enjoy.py --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
43
  ```
44
 
45
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
46
  ```
47
+ python -m rl_zoo3.load_from_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -orga Roberto -f logs/
48
+ rl_zoo3 enjoy --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
49
  ```
50
 
51
  ## Training (with the RL Zoo)
52
  ```
53
+ python train.py --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  # Upload the model and generate video (when possible)
55
+ python -m rl_zoo3.push_to_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga Roberto
56
  ```
57
 
58
  ## Hyperparameters
59
  ```python
60
+ OrderedDict([('batch_size', 256),
61
+ ('clip_range', 'lin_0.1'),
62
+ ('ent_coef', 0.01),
63
  ('env_wrapper',
64
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
 
 
65
  ('frame_stack', 4),
66
+ ('learning_rate', 'lin_2.5e-4'),
67
+ ('n_envs', 8),
68
+ ('n_epochs', 4),
69
+ ('n_steps', 128),
70
+ ('n_timesteps', 10000000.0),
71
  ('policy', 'CnnPolicy'),
72
+ ('vf_coef', 0.5),
 
73
  ('normalize', False)])
74
  ```
args.yml CHANGED
@@ -1,6 +1,6 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
- - dqn
4
  - - conf_file
5
  - null
6
  - - device
@@ -54,7 +54,7 @@
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
- - 976994039
58
  - - storage
59
  - null
60
  - - study_name
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
+ - ppo
4
  - - conf_file
5
  - null
6
  - - device
 
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
+ - 1410119825
58
  - - storage
59
  - null
60
  - - study_name
config.yml CHANGED
@@ -1,29 +1,25 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
- - 32
4
- - - buffer_size
5
- - 100000
 
 
6
  - - env_wrapper
7
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
8
- - - exploration_final_eps
9
- - 0.01
10
- - - exploration_fraction
11
- - 0.1
12
  - - frame_stack
13
  - 4
14
- - - gradient_steps
15
- - 1
16
  - - learning_rate
17
- - 0.0001
18
- - - learning_starts
19
- - 100000
 
 
 
 
20
  - - n_timesteps
21
- - 100000.0
22
- - - optimize_memory_usage
23
- - false
24
  - - policy
25
  - CnnPolicy
26
- - - target_update_interval
27
- - 1000
28
- - - train_freq
29
- - 4
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
+ - 256
4
+ - - clip_range
5
+ - lin_0.1
6
+ - - ent_coef
7
+ - 0.01
8
  - - env_wrapper
9
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
 
 
 
 
10
  - - frame_stack
11
  - 4
 
 
12
  - - learning_rate
13
+ - lin_2.5e-4
14
+ - - n_envs
15
+ - 8
16
+ - - n_epochs
17
+ - 4
18
+ - - n_steps
19
+ - 128
20
  - - n_timesteps
21
+ - 10000000.0
 
 
22
  - - policy
23
  - CnnPolicy
24
+ - - vf_coef
25
+ - 0.5
 
 
ppo-SpaceInvadersNoFrameskip-v4.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b655fac000369f7b2d10caa6a6ee777aed467e6a90a635ae0d5224043d5638db
3
+ size 20442626
ppo-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version ADDED
@@ -0,0 +1 @@
 
 
1
+ 1.6.2
ppo-SpaceInvadersNoFrameskip-v4/data ADDED
The diff for this file is too large to render. See raw diff
 
ppo-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:223a39a5f8f477e61ba0ead2052ca913cb226730beacc7ea10af6408609e1cf5
3
+ size 13511481
ppo-SpaceInvadersNoFrameskip-v4/policy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39250ac8c632db02b9996e3db6398cb748e703da9faea8c3b183aa97d1c40be5
3
+ size 6755009
ppo-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d030ad8db708280fcae77d87e973102039acd23a11bdecc3db8eb6c0ac940ee1
3
+ size 431
ppo-SpaceInvadersNoFrameskip-v4/system_info.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ OS: Linux-5.15.0-56-generic-x86_64-with-glibc2.35 #62~20.04.1-Ubuntu SMP Tue Nov 22 21:24:20 UTC 2022
2
+ Python: 3.9.15
3
+ Stable-Baselines3: 1.6.2
4
+ PyTorch: 1.13.1+cu117
5
+ GPU Enabled: True
6
+ Numpy: 1.24.0
7
+ Gym: 0.21.0
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bdab282bd1eb89b9d6e06e4029b8262d97a8298038199be2dd79dd558b176c4
3
- size 234682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bfc6ed9433811788324a541ce8b129a920cb965bae7796fe02c1a772a65659
3
+ size 203960
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 374.0, "std_reward": 214.89299662855464, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2022-12-20T20:52:47.885895"}
 
1
+ {"mean_reward": 808.5, "std_reward": 361.35889362239305, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2022-12-21T09:29:07.019988"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a25f47fcc3bae5627ec7a41f3b4886bbb8a8c40c5497a0805663ebddbe9d9dc6
3
- size 5763
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:271a39b9269e2be53529507d07f422263036bf2d60154c534c274b85d139e3f2
3
+ size 314162