MattStammers commited on
Commit
4cc5070
1 Parent(s): 1ee8c76

Upload folder using huggingface_hub

Browse files
.summary/0/events.out.tfevents.1696703324.rhmmedcatt-proliant-ml350-gen10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4041c2ffeee80e57f38c4cec57d7949c5ba1e3b443233cf75b2c50ec9c1435
3
+ size 16965
.summary/1/events.out.tfevents.1696703324.rhmmedcatt-proliant-ml350-gen10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c89b986bfd9e922d86e525a374e070c6428329945df52b7810c5a86bb238a70
3
+ size 11777
README.md CHANGED
@@ -15,7 +15,7 @@ model-index:
15
  type: atari_alien
16
  metrics:
17
  - type: mean_reward
18
- value: 1007.00 +/- 73.36
19
  name: mean_reward
20
  verified: false
21
  ---
@@ -40,22 +40,23 @@ This model as with all the others in the benchmarks was trained asynchronously u
40
 
41
  The aim is to reach state of the art (SOTA) performance on each atari environment. I will flag the models with SOTA when they reach at or near these levels.
42
 
43
- The hyperparameters used in the model are the ones I have pushed to my fork of sample-factory: https://github.com/MattStammers/sample-factory
 
44
  ```
45
  hyperparameters = {
46
  "device": "gpu",
47
- "seed": null,
48
  "num_policies": 2,
49
- "async_rl": false,
50
  "serial_mode": false,
51
- "batched_sampling": false,
52
  "num_batches_to_accumulate": 2,
53
  "worker_num_splits": 1,
54
- "policy_workers_per_policy": 2,
55
  "max_policy_lag": 1000,
56
  "num_workers": 16,
57
  "num_envs_per_worker": 2,
58
- "batch_size": 256,
59
  "num_batches_per_epoch": 8,
60
  "num_epochs": 4,
61
  "rollout": 128,
@@ -66,7 +67,7 @@ hyperparameters = {
66
  "reward_clip": 1000.0,
67
  "value_bootstrap": false,
68
  "normalize_returns": true,
69
- "exploration_loss_coeff": 0.01,
70
  "value_loss_coeff": 0.5,
71
  "kl_loss_coeff": 0.0,
72
  "exploration_loss": "entropy",
@@ -80,8 +81,8 @@ hyperparameters = {
80
  "adam_eps": 1e-05,
81
  "adam_beta1": 0.9,
82
  "adam_beta2": 0.999,
83
- "max_grad_norm": 0.5,
84
- "learning_rate": 0.00025,
85
  "lr_schedule": "linear_decay",
86
  "lr_schedule_kl_threshold": 0.008,
87
  "lr_adaptive_min": 1e-06,
@@ -89,7 +90,9 @@ hyperparameters = {
89
  "obs_subtract_mean": 0.0,
90
  "obs_scale": 255.0,
91
  "normalize_input": true,
92
- "normalize_input_keys": null,
 
 
93
  "decorrelate_experience_max_seconds": 0,
94
  "decorrelate_envs_on_one_worker": true,
95
  "actor_worker_gpus": [],
@@ -101,14 +104,14 @@ hyperparameters = {
101
  "flush_summaries_interval": 30,
102
  "stats_avg": 100,
103
  "summaries_use_frameskip": true,
104
- "heartbeat_interval": 20,
105
- "heartbeat_reporting_interval": 180,
106
  "train_for_env_steps": 100000000,
107
  "train_for_seconds": 10000000000,
108
  "save_every_sec": 120,
109
  "keep_checkpoints": 2,
110
  "load_checkpoint_kind": "latest",
111
- "save_milestones_sec": -1,
112
  "save_best_every_sec": 5,
113
  "save_best_metric": "reward",
114
  "save_best_after": 100000,
@@ -137,7 +140,7 @@ hyperparameters = {
137
  "env_gpu_actions": false,
138
  "env_gpu_observations": true,
139
  "env_frameskip": 4,
140
- "env_framestack": 4
141
  }
142
 
143
  ```
 
15
  type: atari_alien
16
  metrics:
17
  - type: mean_reward
18
+ value: 240.00 +/- 54.22
19
  name: mean_reward
20
  verified: false
21
  ---
 
40
 
41
  The aim is to reach state of the art (SOTA) performance on each atari environment. I will flag the models with SOTA when they reach at or near these levels.
42
 
43
+ The hyperparameters used in the model are the ones I have pushed to my fork of sample-factory: https://github.com/MattStammers/sample-factory . Given that https://huggingface.co/edbeeching has kindly shared his.
44
+ I saved time by using many of his tuned hyperparameters to maximise performance. However he used 2 billion training steps. I have started at 100 million to see how performance goes at that level:
45
  ```
46
  hyperparameters = {
47
  "device": "gpu",
48
+ "seed": 1234,
49
  "num_policies": 2,
50
+ "async_rl": true,
51
  "serial_mode": false,
52
+ "batched_sampling": true,
53
  "num_batches_to_accumulate": 2,
54
  "worker_num_splits": 1,
55
+ "policy_workers_per_policy": 1,
56
  "max_policy_lag": 1000,
57
  "num_workers": 16,
58
  "num_envs_per_worker": 2,
59
+ "batch_size": 1024,
60
  "num_batches_per_epoch": 8,
61
  "num_epochs": 4,
62
  "rollout": 128,
 
67
  "reward_clip": 1000.0,
68
  "value_bootstrap": false,
69
  "normalize_returns": true,
70
+ "exploration_loss_coeff": 0.0004677351413,
71
  "value_loss_coeff": 0.5,
72
  "kl_loss_coeff": 0.0,
73
  "exploration_loss": "entropy",
 
81
  "adam_eps": 1e-05,
82
  "adam_beta1": 0.9,
83
  "adam_beta2": 0.999,
84
+ "max_grad_norm": 0.0,
85
+ "learning_rate": 0.0003033891184,
86
  "lr_schedule": "linear_decay",
87
  "lr_schedule_kl_threshold": 0.008,
88
  "lr_adaptive_min": 1e-06,
 
90
  "obs_subtract_mean": 0.0,
91
  "obs_scale": 255.0,
92
  "normalize_input": true,
93
+ "normalize_input_keys": [
94
+ "obs"
95
+ ],
96
  "decorrelate_experience_max_seconds": 0,
97
  "decorrelate_envs_on_one_worker": true,
98
  "actor_worker_gpus": [],
 
104
  "flush_summaries_interval": 30,
105
  "stats_avg": 100,
106
  "summaries_use_frameskip": true,
107
+ "heartbeat_interval": 10,
108
+ "heartbeat_reporting_interval": 60,
109
  "train_for_env_steps": 100000000,
110
  "train_for_seconds": 10000000000,
111
  "save_every_sec": 120,
112
  "keep_checkpoints": 2,
113
  "load_checkpoint_kind": "latest",
114
+ "save_milestones_sec": 1200,
115
  "save_best_every_sec": 5,
116
  "save_best_metric": "reward",
117
  "save_best_after": 100000,
 
140
  "env_gpu_actions": false,
141
  "env_gpu_observations": true,
142
  "env_frameskip": 4,
143
+ "env_framestack": 4,
144
  }
145
 
146
  ```
checkpoint_p0/best_000000128_131072_reward_6.930.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac7d83347d29e588042f5ec92127d30422115ffbf8a8ad5bd22ee7fe8a1f871
3
+ size 20795763
checkpoint_p0/checkpoint_000000160_163840.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36c71762acac17d24f351e29ca5d927bb415dc020ddb113bb991c81cb1e43d68
3
+ size 20796099
checkpoint_p1/best_000000128_131072_reward_6.290.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5b0d005f468da50852478485e2d89c0911b6c41189e6b65ae82d6b23377e11
3
+ size 20795763
checkpoint_p1/checkpoint_000000160_163840.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c6abc927ef4a4ce3f408f9e8f37a1d7f0a4144240f74f2fdba94c43a635c4e
3
+ size 20796099
config.json CHANGED
@@ -8,16 +8,16 @@
8
  "device": "gpu",
9
  "seed": 1234,
10
  "num_policies": 2,
11
- "async_rl": false,
12
  "serial_mode": false,
13
- "batched_sampling": false,
14
  "num_batches_to_accumulate": 2,
15
  "worker_num_splits": 1,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
  "num_workers": 16,
19
  "num_envs_per_worker": 2,
20
- "batch_size": 256,
21
  "num_batches_per_epoch": 8,
22
  "num_epochs": 4,
23
  "rollout": 128,
@@ -28,7 +28,7 @@
28
  "reward_clip": 1000.0,
29
  "value_bootstrap": false,
30
  "normalize_returns": true,
31
- "exploration_loss_coeff": 0.01,
32
  "value_loss_coeff": 0.5,
33
  "kl_loss_coeff": 0.0,
34
  "exploration_loss": "entropy",
@@ -42,8 +42,8 @@
42
  "adam_eps": 1e-05,
43
  "adam_beta1": 0.9,
44
  "adam_beta2": 0.999,
45
- "max_grad_norm": 0.5,
46
- "learning_rate": 0.00025,
47
  "lr_schedule": "linear_decay",
48
  "lr_schedule_kl_threshold": 0.008,
49
  "lr_adaptive_min": 1e-06,
@@ -51,7 +51,9 @@
51
  "obs_subtract_mean": 0.0,
52
  "obs_scale": 255.0,
53
  "normalize_input": true,
54
- "normalize_input_keys": null,
 
 
55
  "decorrelate_experience_max_seconds": 0,
56
  "decorrelate_envs_on_one_worker": true,
57
  "actor_worker_gpus": [],
@@ -63,14 +65,14 @@
63
  "flush_summaries_interval": 30,
64
  "stats_avg": 100,
65
  "summaries_use_frameskip": true,
66
- "heartbeat_interval": 20,
67
- "heartbeat_reporting_interval": 180,
68
  "train_for_env_steps": 100000000,
69
  "train_for_seconds": 10000000000,
70
  "save_every_sec": 120,
71
  "keep_checkpoints": 2,
72
  "load_checkpoint_kind": "latest",
73
- "save_milestones_sec": -1,
74
  "save_best_every_sec": 5,
75
  "save_best_metric": "reward",
76
  "save_best_after": 100000,
@@ -122,7 +124,7 @@
122
  "pbt_target_objective": "true_objective",
123
  "pbt_perturb_min": 1.1,
124
  "pbt_perturb_max": 1.5,
125
- "command_line": "--algo=APPO --env=atari_alien --experiment=atari_alien_APPO --num_policies=2 --restart_behavior=restart --train_dir=./train_atari --train_for_env_steps=100000000 --seed=1234 --num_workers=16 --num_envs_per_worker=2 --num_batches_per_epoch=8 --with_wandb=true --wandb_user=matt-stammers --wandb_project=atari_APPO --wandb_group=atari_alien --wandb_job_type=SF --wandb_tags=atari",
126
  "cli_args": {
127
  "algo": "APPO",
128
  "env": "atari_alien",
@@ -131,10 +133,20 @@
131
  "restart_behavior": "restart",
132
  "seed": 1234,
133
  "num_policies": 2,
 
 
134
  "num_workers": 16,
135
  "num_envs_per_worker": 2,
 
136
  "num_batches_per_epoch": 8,
 
 
 
 
 
 
137
  "train_for_env_steps": 100000000,
 
138
  "with_wandb": true,
139
  "wandb_user": "matt-stammers",
140
  "wandb_project": "atari_APPO",
@@ -146,5 +158,5 @@
146
  },
147
  "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
148
  "git_repo_name": "not a git repository",
149
- "wandb_unique_id": "atari_alien_APPO_20231007_171858_185155"
150
  }
 
8
  "device": "gpu",
9
  "seed": 1234,
10
  "num_policies": 2,
11
+ "async_rl": true,
12
  "serial_mode": false,
13
+ "batched_sampling": true,
14
  "num_batches_to_accumulate": 2,
15
  "worker_num_splits": 1,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
  "num_workers": 16,
19
  "num_envs_per_worker": 2,
20
+ "batch_size": 1024,
21
  "num_batches_per_epoch": 8,
22
  "num_epochs": 4,
23
  "rollout": 128,
 
28
  "reward_clip": 1000.0,
29
  "value_bootstrap": false,
30
  "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.0004677351413,
32
  "value_loss_coeff": 0.5,
33
  "kl_loss_coeff": 0.0,
34
  "exploration_loss": "entropy",
 
42
  "adam_eps": 1e-05,
43
  "adam_beta1": 0.9,
44
  "adam_beta2": 0.999,
45
+ "max_grad_norm": 0.0,
46
+ "learning_rate": 0.0003033891184,
47
  "lr_schedule": "linear_decay",
48
  "lr_schedule_kl_threshold": 0.008,
49
  "lr_adaptive_min": 1e-06,
 
51
  "obs_subtract_mean": 0.0,
52
  "obs_scale": 255.0,
53
  "normalize_input": true,
54
+ "normalize_input_keys": [
55
+ "obs"
56
+ ],
57
  "decorrelate_experience_max_seconds": 0,
58
  "decorrelate_envs_on_one_worker": true,
59
  "actor_worker_gpus": [],
 
65
  "flush_summaries_interval": 30,
66
  "stats_avg": 100,
67
  "summaries_use_frameskip": true,
68
+ "heartbeat_interval": 10,
69
+ "heartbeat_reporting_interval": 60,
70
  "train_for_env_steps": 100000000,
71
  "train_for_seconds": 10000000000,
72
  "save_every_sec": 120,
73
  "keep_checkpoints": 2,
74
  "load_checkpoint_kind": "latest",
75
+ "save_milestones_sec": 1200,
76
  "save_best_every_sec": 5,
77
  "save_best_metric": "reward",
78
  "save_best_after": 100000,
 
124
  "pbt_target_objective": "true_objective",
125
  "pbt_perturb_min": 1.1,
126
  "pbt_perturb_max": 1.5,
127
+ "command_line": "--algo=APPO --env=atari_alien --experiment=atari_alien_APPO --num_policies=2 --restart_behavior=restart --train_dir=./train_atari --train_for_env_steps=100000000 --seed=1234 --num_workers=16 --num_envs_per_worker=2 --num_batches_per_epoch=8 --async_rl=true --batched_sampling=true --batch_size=1024 --max_grad_norm=0 --learning_rate=0.0003033891184 --heartbeat_interval=10 --heartbeat_reporting_interval=60 --save_milestones_sec=1200 --num_epochs=4 --exploration_loss_coeff=0.0004677351413 --with_wandb=true --wandb_user=matt-stammers --wandb_project=atari_APPO --wandb_group=atari_alien --wandb_job_type=SF --wandb_tags=atari",
128
  "cli_args": {
129
  "algo": "APPO",
130
  "env": "atari_alien",
 
133
  "restart_behavior": "restart",
134
  "seed": 1234,
135
  "num_policies": 2,
136
+ "async_rl": true,
137
+ "batched_sampling": true,
138
  "num_workers": 16,
139
  "num_envs_per_worker": 2,
140
+ "batch_size": 1024,
141
  "num_batches_per_epoch": 8,
142
+ "num_epochs": 4,
143
+ "exploration_loss_coeff": 0.0004677351413,
144
+ "max_grad_norm": 0.0,
145
+ "learning_rate": 0.0003033891184,
146
+ "heartbeat_interval": 10,
147
+ "heartbeat_reporting_interval": 60,
148
  "train_for_env_steps": 100000000,
149
+ "save_milestones_sec": 1200,
150
  "with_wandb": true,
151
  "wandb_user": "matt-stammers",
152
  "wandb_project": "atari_APPO",
 
158
  },
159
  "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
160
  "git_repo_name": "not a git repository",
161
+ "wandb_unique_id": "atari_alien_APPO_20231007_192842_088132"
162
  }
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4803d6281edfc5aedefef94d87dadad9713cd5209c976ae1ba2f81937ce4209d
3
- size 2057580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a4e8f39d42032eb384f1984cbf359b91df904281209911175b568634c681b9
3
+ size 1394763
sf_log.txt CHANGED
The diff for this file is too large to render. See raw diff