RAWsi-18 commited on
Commit
9a3509d
1 Parent(s): 873f959

Initial commit

Browse files
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
- - name: PPO
10
  results:
11
  - task:
12
  type: reinforcement-learning
@@ -16,13 +16,13 @@ model-index:
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 144.50 +/- 99.26
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
- # **PPO** Agent playing **SpaceInvadersNoFrameskip-v4**
25
- This is a trained model of a **PPO** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
@@ -43,38 +43,42 @@ pip install rl_zoo3
43
 
44
  ```
45
  # Download model and save it into the logs/ folder
46
- python -m rl_zoo3.load_from_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -orga RAWsi-18 -f logs/
47
- python -m rl_zoo3.enjoy --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
48
  ```
49
 
50
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
51
  ```
52
- python -m rl_zoo3.load_from_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -orga RAWsi-18 -f logs/
53
- python -m rl_zoo3.enjoy --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  ```
55
 
56
  ## Training (with the RL Zoo)
57
  ```
58
- python -m rl_zoo3.train --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
59
  # Upload the model and generate video (when possible)
60
- python -m rl_zoo3.push_to_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga RAWsi-18
61
  ```
62
 
63
  ## Hyperparameters
64
  ```python
65
- OrderedDict([('batch_size', 256),
66
- ('clip_range', 0.001),
67
- ('ent_coef', 0.01),
68
  ('env_wrapper',
69
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
 
 
70
  ('frame_stack', 4),
 
 
71
  ('learning_rate', 0.0001),
72
- ('n_envs', 8),
73
- ('n_epochs', 4),
74
- ('n_steps', 128),
75
- ('n_timesteps', 100000),
76
  ('policy', 'CnnPolicy'),
77
- ('vf_coef', 0.5),
 
 
78
  ('normalize', False)])
79
  ```
80
 
 
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
+ - name: DQN
10
  results:
11
  - task:
12
  type: reinforcement-learning
 
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 955.50 +/- 413.08
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
+ # **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
25
+ This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
 
43
 
44
  ```
45
  # Download model and save it into the logs/ folder
46
+ python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga RAWsi-18 -f logs/
47
+ python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
48
  ```
49
 
50
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
51
  ```
52
+ python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga RAWsi-18 -f logs/
53
+ python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  ```
55
 
56
  ## Training (with the RL Zoo)
57
  ```
58
+ python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
59
  # Upload the model and generate video (when possible)
60
+ python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga RAWsi-18
61
  ```
62
 
63
  ## Hyperparameters
64
  ```python
65
+ OrderedDict([('batch_size', 32),
66
+ ('buffer_size', 400000),
 
67
  ('env_wrapper',
68
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
69
+ ('exploration_final_eps', 0.01),
70
+ ('exploration_fraction', 0.1),
71
  ('frame_stack', 4),
72
+ ('gamma', 0.99),
73
+ ('gradient_steps', 1),
74
  ('learning_rate', 0.0001),
75
+ ('learning_starts', 200000),
76
+ ('n_timesteps', 10000000.0),
77
+ ('optimize_memory_usage', True),
 
78
  ('policy', 'CnnPolicy'),
79
+ ('replay_buffer_kwargs', {'handle_timeout_termination': False}),
80
+ ('target_update_interval', 30000),
81
+ ('train_freq', 4),
82
  ('normalize', False)])
83
  ```
84
 
args.yml CHANGED
@@ -1,10 +1,10 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
- - ppo
4
  - - conf_file
5
- - ppo.yml
6
  - - device
7
- - auto
8
  - - env
9
  - SpaceInvadersNoFrameskip-v4
10
  - - env_kwargs
@@ -18,7 +18,7 @@
18
  - - hyperparams
19
  - null
20
  - - log_folder
21
- - logs/
22
  - - log_interval
23
  - -1
24
  - - max_total_trials
@@ -54,7 +54,7 @@
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
- - 2285675408
58
  - - storage
59
  - null
60
  - - study_name
@@ -77,5 +77,5 @@
77
  - null
78
  - - wandb_project_name
79
  - sb3
80
- - - wandb_tags
81
- - []
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
+ - dqn
4
  - - conf_file
5
+ - null
6
  - - device
7
+ - mps
8
  - - env
9
  - SpaceInvadersNoFrameskip-v4
10
  - - env_kwargs
 
18
  - - hyperparams
19
  - null
20
  - - log_folder
21
+ - ./logs/
22
  - - log_interval
23
  - -1
24
  - - max_total_trials
 
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
+ - 2851482261
58
  - - storage
59
  - null
60
  - - study_name
 
77
  - null
78
  - - wandb_project_name
79
  - sb3
80
+ - - yaml_file
81
+ - null
config.yml CHANGED
@@ -1,25 +1,33 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
- - 256
4
- - - clip_range
5
- - 0.001
6
- - - ent_coef
7
- - 0.01
8
  - - env_wrapper
9
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
 
 
 
 
10
  - - frame_stack
11
  - 4
 
 
 
 
12
  - - learning_rate
13
  - 0.0001
14
- - - n_envs
15
- - 8
16
- - - n_epochs
17
- - 4
18
- - - n_steps
19
- - 128
20
  - - n_timesteps
21
- - 100000
 
 
22
  - - policy
23
  - CnnPolicy
24
- - - vf_coef
25
- - 0.5
 
 
 
 
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
+ - 32
4
+ - - buffer_size
5
+ - 400000
 
 
6
  - - env_wrapper
7
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
8
+ - - exploration_final_eps
9
+ - 0.01
10
+ - - exploration_fraction
11
+ - 0.1
12
  - - frame_stack
13
  - 4
14
+ - - gamma
15
+ - 0.99
16
+ - - gradient_steps
17
+ - 1
18
  - - learning_rate
19
  - 0.0001
20
+ - - learning_starts
21
+ - 200000
 
 
 
 
22
  - - n_timesteps
23
+ - 10000000.0
24
+ - - optimize_memory_usage
25
+ - true
26
  - - policy
27
  - CnnPolicy
28
+ - - replay_buffer_kwargs
29
+ - handle_timeout_termination: false
30
+ - - target_update_interval
31
+ - 30000
32
+ - - train_freq
33
+ - 4
dqn-SpaceInvadersNoFrameskip-v4.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:450f283c244ff7b9427931d86c613214f3ecd394ffa306c6e1485c4781f66d24
3
+ size 27220503
dqn-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version ADDED
@@ -0,0 +1 @@
 
 
1
+ 2.4.0a4
dqn-SpaceInvadersNoFrameskip-v4/data ADDED
The diff for this file is too large to render. See raw diff
 
dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f32af3e79370e1fc8757d69ebe4a7bf7fdc344c8bf905009fb2d6225c8a842b
3
+ size 13505852
dqn-SpaceInvadersNoFrameskip-v4/policy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b539097fe1cce9cfac67848f897cad9244bb80684d65010f7c0ece893094560
3
+ size 13505178
dqn-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c35cea3b2e60fb5e7e162d3592df775cd400e575a31c72f359fb9e654ab00c5
3
+ size 864
dqn-SpaceInvadersNoFrameskip-v4/system_info.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ - OS: Linux-6.1.85+-x86_64-with-glibc2.35 # 1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
2
+ - Python: 3.10.12
3
+ - Stable-Baselines3: 2.4.0a4
4
+ - PyTorch: 2.3.0+cu121
5
+ - GPU Enabled: False
6
+ - Numpy: 1.25.2
7
+ - Cloudpickle: 2.2.1
8
+ - Gymnasium: 0.29.1
9
+ - OpenAI Gym: 0.25.2
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:395e1fd2d8a8af0742bba9ec795d4c68a907e6f425dc14e12cfdf4006984a159
3
- size 254024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf23b688757adad01a08b1c70cef53088f9be2758949443d55231e5fa373700c
3
+ size 207513
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 144.5, "std_reward": 99.2585008953893, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-07-11T14:08:17.727661"}
 
1
+ {"mean_reward": 955.5, "std_reward": 413.08261885487263, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-07-11T14:25:17.598243"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f35e6afd136de9b1edd90d97013042d5104ef9f5b0ec1dfc7dd40d4aff0a72f
3
- size 6961
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4533414821c4753535a135ee5bbf143f73a553d68852a70f5f822a7a1d78a8b4
3
+ size 270682