enaitzb commited on
Commit
4874003
1 Parent(s): 5e8725c

First push

Browse files
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
2
  library_name: ml-agents
3
  tags:
4
- - Goalie
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
- - ML-Agents-Goalie
8
  ---
9
 
10
- # **poca** Agent playing **Goalie**
11
- This is a trained model of a **poca** agent playing **Goalie**
12
  using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
13
 
14
  ## Usage (with ML-Agents)
 
1
  ---
2
  library_name: ml-agents
3
  tags:
4
+ - SoccerTwos
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
+ - ML-Agents-SoccerTwos
8
  ---
9
 
10
+ # **poca** Agent playing **SoccerTwos**
11
+ This is a trained model of a **poca** agent playing **SoccerTwos**
12
  using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
13
 
14
  ## Usage (with ML-Agents)
SoccerTwos.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:275a0485aca002cd3c8515f9b79701644da1dbe14f450c70b6957227bbb42bc7
3
+ size 1766874
SoccerTwos/SoccerTwos-10226.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:275a0485aca002cd3c8515f9b79701644da1dbe14f450c70b6957227bbb42bc7
3
+ size 1766874
SoccerTwos/SoccerTwos-10226.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:009fbab2d9a53a6ef264f5e2d17c20c0b3acf7cdee5325cfc1fbba3ebab35e4f
3
+ size 9479442
SoccerTwos/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a06df9de48ce32dab3fb02eca880b901924b24aa7e3d2521f391886040579c0
3
+ size 9479202
SoccerTwos/events.out.tfevents.1703231756.wkm0572l.11238.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d85cfd3b8131f4551f237a5491837ff6a05060c2438e5f77a25d88095bb1e24
3
+ size 11296
config.json CHANGED
@@ -1 +1 @@
1
- {"default_settings": null, "behaviors": {"Goalie": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 30000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 1000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}, "Striker": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 30000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 4000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "Proba", "initialize_from": null, "load_model": false, "resume": false, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
 
1
+ {"default_settings": null, "behaviors": {"SoccerTwos": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 10000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 2000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "Soccer", "initialize_from": null, "load_model": false, "resume": false, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
configuration.yaml CHANGED
@@ -1,6 +1,6 @@
1
  default_settings: null
2
  behaviors:
3
- Goalie:
4
  trainer_type: poca
5
  hyperparameters:
6
  batch_size: 2048
@@ -37,63 +37,14 @@ behaviors:
37
  init_path: null
38
  keep_checkpoints: 5
39
  even_checkpoints: false
40
- max_steps: 30000000
41
  time_horizon: 1000
42
  summary_freq: 10000
43
  threaded: false
44
  self_play:
45
  save_steps: 50000
46
  team_change: 200000
47
- swap_steps: 1000
48
- window: 10
49
- play_against_latest_model_ratio: 0.5
50
- initial_elo: 1200.0
51
- behavioral_cloning: null
52
- Striker:
53
- trainer_type: poca
54
- hyperparameters:
55
- batch_size: 2048
56
- buffer_size: 20480
57
- learning_rate: 0.0003
58
- beta: 0.005
59
- epsilon: 0.2
60
- lambd: 0.95
61
- num_epoch: 3
62
- learning_rate_schedule: constant
63
- beta_schedule: constant
64
- epsilon_schedule: constant
65
- checkpoint_interval: 500000
66
- network_settings:
67
- normalize: false
68
- hidden_units: 512
69
- num_layers: 2
70
- vis_encode_type: simple
71
- memory: null
72
- goal_conditioning_type: hyper
73
- deterministic: false
74
- reward_signals:
75
- extrinsic:
76
- gamma: 0.99
77
- strength: 1.0
78
- network_settings:
79
- normalize: false
80
- hidden_units: 128
81
- num_layers: 2
82
- vis_encode_type: simple
83
- memory: null
84
- goal_conditioning_type: hyper
85
- deterministic: false
86
- init_path: null
87
- keep_checkpoints: 5
88
- even_checkpoints: false
89
- max_steps: 30000000
90
- time_horizon: 1000
91
- summary_freq: 10000
92
- threaded: false
93
- self_play:
94
- save_steps: 50000
95
- team_change: 200000
96
- swap_steps: 4000
97
  window: 10
98
  play_against_latest_model_ratio: 0.5
99
  initial_elo: 1200.0
@@ -119,7 +70,7 @@ engine_settings:
119
  no_graphics: false
120
  environment_parameters: null
121
  checkpoint_settings:
122
- run_id: Proba
123
  initialize_from: null
124
  load_model: false
125
  resume: false
 
1
  default_settings: null
2
  behaviors:
3
+ SoccerTwos:
4
  trainer_type: poca
5
  hyperparameters:
6
  batch_size: 2048
 
37
  init_path: null
38
  keep_checkpoints: 5
39
  even_checkpoints: false
40
+ max_steps: 10000
41
  time_horizon: 1000
42
  summary_freq: 10000
43
  threaded: false
44
  self_play:
45
  save_steps: 50000
46
  team_change: 200000
47
+ swap_steps: 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  window: 10
49
  play_against_latest_model_ratio: 0.5
50
  initial_elo: 1200.0
 
70
  no_graphics: false
71
  environment_parameters: null
72
  checkpoint_settings:
73
+ run_id: Soccer
74
  initialize_from: null
75
  load_model: false
76
  resume: false
run_logs/timers.json CHANGED
@@ -1,454 +1,190 @@
1
  {
2
  "name": "root",
3
  "gauges": {
4
- "Goalie.Policy.Entropy.mean": {
5
- "value": 2.6350436210632324,
6
- "min": 2.6350436210632324,
7
- "max": 3.295673131942749,
8
- "count": 180
9
- },
10
- "Goalie.Policy.Entropy.sum": {
11
- "value": 25570.462890625,
12
- "min": 21048.62109375,
13
- "max": 376798.96875,
14
- "count": 180
15
- },
16
- "Goalie.Environment.EpisodeLength.mean": {
17
- "value": 56.851190476190474,
18
- "min": 46.45891003460208,
19
- "max": 999.0,
20
- "count": 180
21
- },
22
- "Goalie.Environment.EpisodeLength.sum": {
23
- "value": 9551.0,
24
- "min": 9013.0,
25
- "max": 110862.0,
26
- "count": 180
27
- },
28
- "Goalie.Self-play.ELO.mean": {
29
- "value": 780.0279731783617,
30
- "min": 780.0279731783617,
31
- "max": 1197.0217993396393,
32
- "count": 178
33
- },
34
- "Goalie.Self-play.ELO.sum": {
35
- "value": 131044.69949396476,
36
- "min": 1118.4588168213602,
37
- "max": 170427.22083595095,
38
- "count": 178
39
- },
40
- "Goalie.Step.mean": {
41
- "value": 1799995.0,
42
- "min": 9571.0,
43
- "max": 1799995.0,
44
- "count": 180
45
- },
46
- "Goalie.Step.sum": {
47
- "value": 1799995.0,
48
- "min": 9571.0,
49
- "max": 1799995.0,
50
- "count": 180
51
- },
52
- "Goalie.Policy.ExtrinsicBaselineEstimate.mean": {
53
- "value": -0.6391041278839111,
54
- "min": -0.6458126306533813,
55
- "max": 0.07203566282987595,
56
- "count": 180
57
- },
58
- "Goalie.Policy.ExtrinsicBaselineEstimate.sum": {
59
- "value": -107.36949157714844,
60
- "min": -133.0373992919922,
61
- "max": 1.2246062755584717,
62
- "count": 180
63
- },
64
- "Goalie.Policy.ExtrinsicValueEstimate.mean": {
65
- "value": -0.6391041278839111,
66
- "min": -0.6458126306533813,
67
- "max": 0.07203566282987595,
68
- "count": 180
69
- },
70
- "Goalie.Policy.ExtrinsicValueEstimate.sum": {
71
- "value": -107.36949157714844,
72
- "min": -133.0373992919922,
73
- "max": 1.2246062755584717,
74
- "count": 180
75
- },
76
- "Goalie.Environment.CumulativeReward.mean": {
77
- "value": 0.059623811895670814,
78
- "min": 0.045828039333589865,
79
- "max": 0.9996801018714905,
80
- "count": 180
81
- },
82
- "Goalie.Environment.CumulativeReward.sum": {
83
- "value": 10.016800398472697,
84
- "min": 9.021400986239314,
85
- "max": 10.917801141738892,
86
- "count": 180
87
- },
88
- "Goalie.Policy.ExtrinsicReward.mean": {
89
- "value": -0.9403761930408931,
90
- "min": -0.954171972018536,
91
- "max": 0.9996801018714905,
92
- "count": 180
93
- },
94
- "Goalie.Policy.ExtrinsicReward.sum": {
95
- "value": -157.98320043087006,
96
- "min": -204.1928020119667,
97
- "max": 9.996801018714905,
98
- "count": 180
99
- },
100
- "Goalie.Environment.GroupCumulativeReward.mean": {
101
- "value": -1.0,
102
- "min": -1.0,
103
- "max": 0.0,
104
- "count": 180
105
- },
106
- "Goalie.Environment.GroupCumulativeReward.sum": {
107
- "value": -168.0,
108
- "min": -214.0,
109
- "max": 0.0,
110
- "count": 180
111
- },
112
- "Goalie.IsTraining.mean": {
113
- "value": 1.0,
114
- "min": 1.0,
115
- "max": 1.0,
116
- "count": 180
117
- },
118
- "Goalie.IsTraining.sum": {
119
- "value": 1.0,
120
- "min": 1.0,
121
- "max": 1.0,
122
- "count": 180
123
- },
124
- "Goalie.Losses.PolicyLoss.mean": {
125
- "value": 0.016466737778197665,
126
- "min": 0.012115506251575425,
127
- "max": 0.02352036564067627,
128
- "count": 86
129
- },
130
- "Goalie.Losses.PolicyLoss.sum": {
131
- "value": 0.016466737778197665,
132
- "min": 0.012115506251575425,
133
- "max": 0.02352036564067627,
134
- "count": 86
135
- },
136
- "Goalie.Losses.ValueLoss.mean": {
137
- "value": 0.022342384917040665,
138
- "min": 0.0004670941584966689,
139
- "max": 0.033746559917926786,
140
- "count": 86
141
- },
142
- "Goalie.Losses.ValueLoss.sum": {
143
- "value": 0.022342384917040665,
144
- "min": 0.0004670941584966689,
145
- "max": 0.033746559917926786,
146
- "count": 86
147
- },
148
- "Goalie.Losses.BaselineLoss.mean": {
149
- "value": 0.02234272056569656,
150
- "min": 0.0004670941584966689,
151
- "max": 0.033895690863331156,
152
- "count": 86
153
- },
154
- "Goalie.Losses.BaselineLoss.sum": {
155
- "value": 0.02234272056569656,
156
- "min": 0.0004670941584966689,
157
- "max": 0.033895690863331156,
158
- "count": 86
159
- },
160
- "Goalie.Policy.LearningRate.mean": {
161
- "value": 0.0003,
162
- "min": 0.0003,
163
- "max": 0.0003,
164
- "count": 86
165
- },
166
- "Goalie.Policy.LearningRate.sum": {
167
- "value": 0.0003,
168
- "min": 0.0003,
169
- "max": 0.0003,
170
- "count": 86
171
- },
172
- "Goalie.Policy.Epsilon.mean": {
173
- "value": 0.20000000000000007,
174
- "min": 0.20000000000000007,
175
- "max": 0.20000000000000007,
176
- "count": 86
177
- },
178
- "Goalie.Policy.Epsilon.sum": {
179
- "value": 0.20000000000000007,
180
- "min": 0.20000000000000007,
181
- "max": 0.20000000000000007,
182
- "count": 86
183
- },
184
- "Goalie.Policy.Beta.mean": {
185
- "value": 0.005000000000000001,
186
- "min": 0.005000000000000001,
187
- "max": 0.005000000000000001,
188
- "count": 86
189
- },
190
- "Goalie.Policy.Beta.sum": {
191
- "value": 0.005000000000000001,
192
- "min": 0.005000000000000001,
193
- "max": 0.005000000000000001,
194
- "count": 86
195
- },
196
- "Striker.Policy.Entropy.mean": {
197
- "value": 2.3203654289245605,
198
- "min": 2.2051618099212646,
199
- "max": 3.295681953430176,
200
- "count": 180
201
- },
202
- "Striker.Policy.Entropy.sum": {
203
- "value": 23760.54296875,
204
- "min": 15216.9345703125,
205
- "max": 1400902.125,
206
- "count": 180
207
- },
208
- "Striker.Environment.EpisodeLength.mean": {
209
- "value": 50.415841584158414,
210
- "min": 35.613138686131386,
211
- "max": 999.0,
212
- "count": 180
213
- },
214
- "Striker.Environment.EpisodeLength.sum": {
215
- "value": 10184.0,
216
- "min": 8382.0,
217
- "max": 412654.0,
218
- "count": 180
219
- },
220
- "Striker.Step.mean": {
221
- "value": 1799842.0,
222
- "min": 9512.0,
223
- "max": 1799842.0,
224
- "count": 180
225
- },
226
- "Striker.Step.sum": {
227
- "value": 1799842.0,
228
- "min": 9512.0,
229
- "max": 1799842.0,
230
- "count": 180
231
- },
232
- "Striker.Policy.ExtrinsicBaselineEstimate.mean": {
233
- "value": 0.6013904213905334,
234
- "min": -0.16535364091396332,
235
- "max": 0.6533077955245972,
236
- "count": 180
237
- },
238
- "Striker.Policy.ExtrinsicBaselineEstimate.sum": {
239
- "value": 120.87947082519531,
240
- "min": -4.340723991394043,
241
- "max": 179.00633239746094,
242
- "count": 180
243
- },
244
- "Striker.Policy.ExtrinsicValueEstimate.mean": {
245
- "value": 0.601508378982544,
246
- "min": -0.16497276723384857,
247
- "max": 0.6537038087844849,
248
- "count": 180
249
- },
250
- "Striker.Policy.ExtrinsicValueEstimate.sum": {
251
- "value": 120.90318298339844,
252
- "min": -4.237868785858154,
253
- "max": 179.11483764648438,
254
- "count": 180
255
- },
256
- "Striker.Environment.CumulativeReward.mean": {
257
- "value": -0.04979403170094413,
258
- "min": -0.9997200965881348,
259
- "max": -0.035969343590448156,
260
- "count": 180
261
- },
262
- "Striker.Environment.CumulativeReward.sum": {
263
- "value": -10.00860037188977,
264
- "min": -10.743601087480783,
265
- "max": -9.09960101544857,
266
- "count": 180
267
- },
268
- "Striker.Policy.ExtrinsicReward.mean": {
269
- "value": 0.8502029683162917,
270
- "min": -1.9994401931762695,
271
- "max": 0.891693424351894,
272
- "count": 180
273
- },
274
- "Striker.Policy.ExtrinsicReward.sum": {
275
- "value": 170.89079663157463,
276
- "min": -20.052401542663574,
277
- "max": 244.32399827241898,
278
- "count": 180
279
- },
280
- "Striker.Environment.GroupCumulativeReward.mean": {
281
- "value": 0.949791041772757,
282
  "min": 0.0,
283
- "max": 0.9636321185279066,
284
- "count": 180
285
  },
286
- "Striker.Environment.GroupCumulativeReward.sum": {
287
- "value": 190.90799939632416,
288
  "min": 0.0,
289
- "max": 264.0352004766464,
290
- "count": 180
291
- },
292
- "Striker.Self-play.ELO.mean": {
293
- "value": 1754.8676722052517,
294
- "min": 1220.241356428936,
295
- "max": 1754.8676722052517,
296
- "count": 174
297
- },
298
- "Striker.Self-play.ELO.sum": {
299
- "value": 354483.26978546084,
300
- "min": 2454.501043758067,
301
- "max": 477024.1997551883,
302
- "count": 174
303
- },
304
- "Striker.IsTraining.mean": {
 
 
 
 
 
 
 
 
 
 
 
 
305
  "value": 1.0,
306
  "min": 1.0,
307
  "max": 1.0,
308
- "count": 180
309
  },
310
- "Striker.IsTraining.sum": {
311
  "value": 1.0,
312
  "min": 1.0,
313
  "max": 1.0,
314
- "count": 180
315
- },
316
- "Striker.Losses.PolicyLoss.mean": {
317
- "value": 0.016374270856613294,
318
- "min": 0.011526005501703669,
319
- "max": 0.02352414818791052,
320
- "count": 85
321
- },
322
- "Striker.Losses.PolicyLoss.sum": {
323
- "value": 0.016374270856613294,
324
- "min": 0.011526005501703669,
325
- "max": 0.02352414818791052,
326
- "count": 85
327
- },
328
- "Striker.Losses.ValueLoss.mean": {
329
- "value": 0.023624402284622193,
330
- "min": 0.0006618854221111784,
331
- "max": 0.0355209739257892,
332
- "count": 85
333
- },
334
- "Striker.Losses.ValueLoss.sum": {
335
- "value": 0.023624402284622193,
336
- "min": 0.0006618854221111784,
337
- "max": 0.0355209739257892,
338
- "count": 85
339
- },
340
- "Striker.Losses.BaselineLoss.mean": {
341
- "value": 0.02373644212881724,
342
- "min": 0.0006652884049496304,
343
- "max": 0.03594138734042644,
344
- "count": 85
345
- },
346
- "Striker.Losses.BaselineLoss.sum": {
347
- "value": 0.02373644212881724,
348
- "min": 0.0006652884049496304,
349
- "max": 0.03594138734042644,
350
- "count": 85
351
- },
352
- "Striker.Policy.LearningRate.mean": {
353
- "value": 0.0003,
354
- "min": 0.0003,
355
- "max": 0.0003,
356
- "count": 85
357
- },
358
- "Striker.Policy.LearningRate.sum": {
359
- "value": 0.0003,
360
- "min": 0.0003,
361
- "max": 0.0003,
362
- "count": 85
363
- },
364
- "Striker.Policy.Epsilon.mean": {
365
- "value": 0.20000000000000007,
366
- "min": 0.20000000000000007,
367
- "max": 0.20000000000000007,
368
- "count": 85
369
- },
370
- "Striker.Policy.Epsilon.sum": {
371
- "value": 0.20000000000000007,
372
- "min": 0.20000000000000007,
373
- "max": 0.20000000000000007,
374
- "count": 85
375
- },
376
- "Striker.Policy.Beta.mean": {
377
- "value": 0.005000000000000001,
378
- "min": 0.005000000000000001,
379
- "max": 0.005000000000000001,
380
- "count": 85
381
- },
382
- "Striker.Policy.Beta.sum": {
383
- "value": 0.005000000000000001,
384
- "min": 0.005000000000000001,
385
- "max": 0.005000000000000001,
386
- "count": 85
387
  }
388
  },
389
  "metadata": {
390
  "timer_format_version": "0.1.0",
391
- "start_time_seconds": "1702974733",
392
  "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
393
- "command_line_arguments": "/home/operario/anaconda3/envs/mlagents/bin/mlagents-learn ml-agents/config/poca/StrikersVsGoalie.yaml --run-id=Proba",
394
  "mlagents_version": "1.0.0",
395
  "mlagents_envs_version": "1.0.0",
396
  "communication_protocol_version": "1.5.0",
397
  "pytorch_version": "2.1.1+cu121",
398
  "numpy_version": "1.21.2",
399
- "end_time_seconds": "1702980564"
400
  },
401
- "total": 5831.547219161999,
402
  "count": 1,
403
- "self": 0.004055126997627667,
404
  "children": {
405
  "run_training.setup": {
406
- "total": 0.010500086000320152,
407
  "count": 1,
408
- "self": 0.010500086000320152
409
  },
410
  "TrainerController.start_learning": {
411
- "total": 5831.532663949001,
412
  "count": 1,
413
- "self": 5.20653796030183,
414
  "children": {
415
  "TrainerController._reset_env": {
416
- "total": 5.0229465030006395,
417
- "count": 19,
418
- "self": 5.0229465030006395
419
  },
420
  "TrainerController.advance": {
421
- "total": 5821.1293753806995,
422
- "count": 360326,
423
- "self": 6.000143237119119,
424
  "children": {
425
  "env_step": {
426
- "total": 3709.103530842298,
427
- "count": 360326,
428
- "self": 3106.020965303721,
429
  "children": {
430
  "SubprocessEnvManager._take_step": {
431
- "total": 599.6483402995009,
432
- "count": 360326,
433
- "self": 29.682954025429353,
434
  "children": {
435
  "TorchPolicy.evaluate": {
436
- "total": 569.9653862740715,
437
- "count": 685680,
438
- "self": 569.9653862740715
439
  }
440
  }
441
  },
442
  "workers": {
443
- "total": 3.434225239076113,
444
- "count": 360325,
445
  "self": 0.0,
446
  "children": {
447
  "worker_root": {
448
- "total": 5702.365667279509,
449
- "count": 360325,
450
  "is_parallel": true,
451
- "self": 3209.9978119108673,
452
  "children": {
453
  "run_training.setup": {
454
  "total": 0.0,
@@ -457,48 +193,48 @@
457
  "self": 0.0,
458
  "children": {
459
  "steps_from_proto": {
460
- "total": 0.0012914009994346998,
461
  "count": 2,
462
  "is_parallel": true,
463
- "self": 0.0003063059994019568,
464
  "children": {
465
  "_process_rank_one_or_two_observation": {
466
- "total": 0.000985095000032743,
467
- "count": 6,
468
  "is_parallel": true,
469
- "self": 0.000985095000032743
470
  }
471
  }
472
  },
473
  "UnityEnvironment.step": {
474
- "total": 0.020311455999944883,
475
  "count": 1,
476
  "is_parallel": true,
477
- "self": 0.0004727990008177585,
478
  "children": {
479
  "UnityEnvironment._generate_step_input": {
480
- "total": 0.00023028099985822337,
481
  "count": 1,
482
  "is_parallel": true,
483
- "self": 0.00023028099985822337
484
  },
485
  "communicator.exchange": {
486
- "total": 0.01830136799981119,
487
  "count": 1,
488
  "is_parallel": true,
489
- "self": 0.01830136799981119
490
  },
491
  "steps_from_proto": {
492
- "total": 0.00130700799945771,
493
  "count": 2,
494
  "is_parallel": true,
495
- "self": 0.0002533939996283152,
496
  "children": {
497
  "_process_rank_one_or_two_observation": {
498
- "total": 0.0010536139998293947,
499
- "count": 6,
500
  "is_parallel": true,
501
- "self": 0.0010536139998293947
502
  }
503
  }
504
  }
@@ -507,52 +243,38 @@
507
  }
508
  },
509
  "UnityEnvironment.step": {
510
- "total": 2492.3496092696414,
511
- "count": 360324,
512
  "is_parallel": true,
513
- "self": 116.63121924221014,
514
  "children": {
515
  "UnityEnvironment._generate_step_input": {
516
- "total": 65.49518589350373,
517
- "count": 360324,
518
  "is_parallel": true,
519
- "self": 65.49518589350373
520
  },
521
  "communicator.exchange": {
522
- "total": 1983.5648633142227,
523
- "count": 360324,
524
  "is_parallel": true,
525
- "self": 1983.5648633142227
526
  },
527
  "steps_from_proto": {
528
- "total": 326.6583408197048,
529
- "count": 720648,
530
  "is_parallel": true,
531
- "self": 63.17436062367142,
532
  "children": {
533
  "_process_rank_one_or_two_observation": {
534
- "total": 263.4839801960334,
535
- "count": 2161944,
536
  "is_parallel": true,
537
- "self": 263.4839801960334
538
  }
539
  }
540
  }
541
  }
542
- },
543
- "steps_from_proto": {
544
- "total": 0.018246098999952665,
545
- "count": 36,
546
- "is_parallel": true,
547
- "self": 0.003693893993840902,
548
- "children": {
549
- "_process_rank_one_or_two_observation": {
550
- "total": 0.014552205006111762,
551
- "count": 108,
552
- "is_parallel": true,
553
- "self": 0.014552205006111762
554
- }
555
- }
556
  }
557
  }
558
  }
@@ -561,47 +283,33 @@
561
  }
562
  },
563
  "trainer_advance": {
564
- "total": 2106.0257013012824,
565
- "count": 720650,
566
- "self": 37.160836643673065,
567
  "children": {
568
  "process_trajectory": {
569
- "total": 342.6532845196207,
570
- "count": 720650,
571
- "self": 341.9924087116178,
572
- "children": {
573
- "RLTrainer._checkpoint": {
574
- "total": 0.6608758080028565,
575
- "count": 6,
576
- "self": 0.6608758080028565
577
- }
578
- }
579
- },
580
- "_update_policy": {
581
- "total": 1726.2115801379887,
582
- "count": 173,
583
- "self": 296.8522167799774,
584
- "children": {
585
- "TorchPOCAOptimizer.update": {
586
- "total": 1429.3593633580113,
587
- "count": 5190,
588
- "self": 1429.3593633580113
589
- }
590
- }
591
  }
592
  }
593
  }
594
  }
595
  },
 
 
 
 
 
596
  "TrainerController._save_models": {
597
- "total": 0.1738041049993626,
598
  "count": 1,
599
- "self": 0.00232954700186383,
600
  "children": {
601
  "RLTrainer._checkpoint": {
602
- "total": 0.17147455799749878,
603
- "count": 2,
604
- "self": 0.17147455799749878
605
  }
606
  }
607
  }
 
1
  {
2
  "name": "root",
3
  "gauges": {
4
+ "SoccerTwos.Policy.Entropy.mean": {
5
+ "value": 3.295698404312134,
6
+ "min": 3.295698404312134,
7
+ "max": 3.295698404312134,
8
+ "count": 1
9
+ },
10
+ "SoccerTwos.Policy.Entropy.sum": {
11
+ "value": 105462.3515625,
12
+ "min": 105462.3515625,
13
+ "max": 105462.3515625,
14
+ "count": 1
15
+ },
16
+ "SoccerTwos.Environment.EpisodeLength.mean": {
17
+ "value": 510.3,
18
+ "min": 510.3,
19
+ "max": 510.3,
20
+ "count": 1
21
+ },
22
+ "SoccerTwos.Environment.EpisodeLength.sum": {
23
+ "value": 20412.0,
24
+ "min": 20412.0,
25
+ "max": 20412.0,
26
+ "count": 1
27
+ },
28
+ "SoccerTwos.Self-play.ELO.mean": {
29
+ "value": 1198.6983882804839,
30
+ "min": 1198.6983882804839,
31
+ "max": 1198.6983882804839,
32
+ "count": 1
33
+ },
34
+ "SoccerTwos.Self-play.ELO.sum": {
35
+ "value": 16781.777435926775,
36
+ "min": 16781.777435926775,
37
+ "max": 16781.777435926775,
38
+ "count": 1
39
+ },
40
+ "SoccerTwos.Step.mean": {
41
+ "value": 9226.0,
42
+ "min": 9226.0,
43
+ "max": 9226.0,
44
+ "count": 1
45
+ },
46
+ "SoccerTwos.Step.sum": {
47
+ "value": 9226.0,
48
+ "min": 9226.0,
49
+ "max": 9226.0,
50
+ "count": 1
51
+ },
52
+ "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
53
+ "value": 0.06891250610351562,
54
+ "min": 0.06891250610351562,
55
+ "max": 0.06891250610351562,
56
+ "count": 1
57
+ },
58
+ "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
59
+ "value": 1.3093376159667969,
60
+ "min": 1.3093376159667969,
61
+ "max": 1.3093376159667969,
62
+ "count": 1
63
+ },
64
+ "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
65
+ "value": 0.06894762814044952,
66
+ "min": 0.06894762814044952,
67
+ "max": 0.06894762814044952,
68
+ "count": 1
69
+ },
70
+ "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
71
+ "value": 1.3100049495697021,
72
+ "min": 1.3100049495697021,
73
+ "max": 1.3100049495697021,
74
+ "count": 1
75
+ },
76
+ "SoccerTwos.Environment.CumulativeReward.mean": {
77
+ "value": 0.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  "min": 0.0,
79
+ "max": 0.0,
80
+ "count": 1
81
  },
82
+ "SoccerTwos.Environment.CumulativeReward.sum": {
83
+ "value": 0.0,
84
  "min": 0.0,
85
+ "max": 0.0,
86
+ "count": 1
87
+ },
88
+ "SoccerTwos.Policy.ExtrinsicReward.mean": {
89
+ "value": -0.35837894991824504,
90
+ "min": -0.35837894991824504,
91
+ "max": -0.35837894991824504,
92
+ "count": 1
93
+ },
94
+ "SoccerTwos.Policy.ExtrinsicReward.sum": {
95
+ "value": -6.809200048446655,
96
+ "min": -6.809200048446655,
97
+ "max": -6.809200048446655,
98
+ "count": 1
99
+ },
100
+ "SoccerTwos.Environment.GroupCumulativeReward.mean": {
101
+ "value": -0.35837894991824504,
102
+ "min": -0.35837894991824504,
103
+ "max": -0.35837894991824504,
104
+ "count": 1
105
+ },
106
+ "SoccerTwos.Environment.GroupCumulativeReward.sum": {
107
+ "value": -6.809200048446655,
108
+ "min": -6.809200048446655,
109
+ "max": -6.809200048446655,
110
+ "count": 1
111
+ },
112
+ "SoccerTwos.IsTraining.mean": {
113
  "value": 1.0,
114
  "min": 1.0,
115
  "max": 1.0,
116
+ "count": 1
117
  },
118
+ "SoccerTwos.IsTraining.sum": {
119
  "value": 1.0,
120
  "min": 1.0,
121
  "max": 1.0,
122
+ "count": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  },
125
  "metadata": {
126
  "timer_format_version": "0.1.0",
127
+ "start_time_seconds": "1703231750",
128
  "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
129
+ "command_line_arguments": "/home/operario/anaconda3/envs/mlagents/bin/mlagents-learn SoccerTwos.yaml --run-id=Soccer",
130
  "mlagents_version": "1.0.0",
131
  "mlagents_envs_version": "1.0.0",
132
  "communication_protocol_version": "1.5.0",
133
  "pytorch_version": "2.1.1+cu121",
134
  "numpy_version": "1.21.2",
135
+ "end_time_seconds": "1703231769"
136
  },
137
+ "total": 18.828908071000114,
138
  "count": 1,
139
+ "self": 0.0038752350001232116,
140
  "children": {
141
  "run_training.setup": {
142
+ "total": 0.011371966999831784,
143
  "count": 1,
144
+ "self": 0.011371966999831784
145
  },
146
  "TrainerController.start_learning": {
147
+ "total": 18.81366086900016,
148
  "count": 1,
149
+ "self": 0.012658913007271622,
150
  "children": {
151
  "TrainerController._reset_env": {
152
+ "total": 6.519004453999969,
153
+ "count": 1,
154
+ "self": 6.519004453999969
155
  },
156
  "TrainerController.advance": {
157
+ "total": 12.224295807993258,
158
+ "count": 1005,
159
+ "self": 0.012754022996887215,
160
  "children": {
161
  "env_step": {
162
+ "total": 11.209075801001745,
163
+ "count": 1005,
164
+ "self": 9.642230745010238,
165
  "children": {
166
  "SubprocessEnvManager._take_step": {
167
+ "total": 1.558794789998501,
168
+ "count": 1005,
169
+ "self": 0.07714688002579351,
170
  "children": {
171
  "TorchPolicy.evaluate": {
172
+ "total": 1.4816479099727076,
173
+ "count": 2000,
174
+ "self": 1.4816479099727076
175
  }
176
  }
177
  },
178
  "workers": {
179
+ "total": 0.008050265993006178,
180
+ "count": 1005,
181
  "self": 0.0,
182
  "children": {
183
  "worker_root": {
184
+ "total": 18.191109271007917,
185
+ "count": 1005,
186
  "is_parallel": true,
187
+ "self": 9.978554264029299,
188
  "children": {
189
  "run_training.setup": {
190
  "total": 0.0,
 
193
  "self": 0.0,
194
  "children": {
195
  "steps_from_proto": {
196
+ "total": 0.0016817149999042158,
197
  "count": 2,
198
  "is_parallel": true,
199
+ "self": 0.0005443020008897292,
200
  "children": {
201
  "_process_rank_one_or_two_observation": {
202
+ "total": 0.0011374129990144866,
203
+ "count": 8,
204
  "is_parallel": true,
205
+ "self": 0.0011374129990144866
206
  }
207
  }
208
  },
209
  "UnityEnvironment.step": {
210
+ "total": 0.0247552379996705,
211
  "count": 1,
212
  "is_parallel": true,
213
+ "self": 0.0005118170001878752,
214
  "children": {
215
  "UnityEnvironment._generate_step_input": {
216
+ "total": 0.0006443479996960377,
217
  "count": 1,
218
  "is_parallel": true,
219
+ "self": 0.0006443479996960377
220
  },
221
  "communicator.exchange": {
222
+ "total": 0.02191044499977579,
223
  "count": 1,
224
  "is_parallel": true,
225
+ "self": 0.02191044499977579
226
  },
227
  "steps_from_proto": {
228
+ "total": 0.0016886280000107945,
229
  "count": 2,
230
  "is_parallel": true,
231
+ "self": 0.0003622190006353776,
232
  "children": {
233
  "_process_rank_one_or_two_observation": {
234
+ "total": 0.001326408999375417,
235
+ "count": 8,
236
  "is_parallel": true,
237
+ "self": 0.001326408999375417
238
  }
239
  }
240
  }
 
243
  }
244
  },
245
  "UnityEnvironment.step": {
246
+ "total": 8.212555006978619,
247
+ "count": 1004,
248
  "is_parallel": true,
249
+ "self": 0.3458426219631292,
250
  "children": {
251
  "UnityEnvironment._generate_step_input": {
252
+ "total": 0.20143538798947702,
253
+ "count": 1004,
254
  "is_parallel": true,
255
+ "self": 0.20143538798947702
256
  },
257
  "communicator.exchange": {
258
+ "total": 6.67237666901201,
259
+ "count": 1004,
260
  "is_parallel": true,
261
+ "self": 6.67237666901201
262
  },
263
  "steps_from_proto": {
264
+ "total": 0.9929003280140023,
265
+ "count": 2008,
266
  "is_parallel": true,
267
+ "self": 0.18314762201407575,
268
  "children": {
269
  "_process_rank_one_or_two_observation": {
270
+ "total": 0.8097527059999265,
271
+ "count": 8032,
272
  "is_parallel": true,
273
+ "self": 0.8097527059999265
274
  }
275
  }
276
  }
277
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  }
279
  }
280
  }
 
283
  }
284
  },
285
  "trainer_advance": {
286
+ "total": 1.0024659839946253,
287
+ "count": 1005,
288
+ "self": 0.05726587000845029,
289
  "children": {
290
  "process_trajectory": {
291
+ "total": 0.945200113986175,
292
+ "count": 1005,
293
+ "self": 0.945200113986175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  }
295
  }
296
  }
297
  }
298
  },
299
+ "trainer_threads": {
300
+ "total": 6.089994712965563e-07,
301
+ "count": 1,
302
+ "self": 6.089994712965563e-07
303
+ },
304
  "TrainerController._save_models": {
305
+ "total": 0.05770108500018978,
306
  "count": 1,
307
+ "self": 0.0008857650000209105,
308
  "children": {
309
  "RLTrainer._checkpoint": {
310
+ "total": 0.05681532000016887,
311
+ "count": 1,
312
+ "self": 0.05681532000016887
313
  }
314
  }
315
  }
run_logs/training_status.json CHANGED
@@ -1,101 +1,24 @@
1
  {
2
- "Goalie": {
 
3
  "checkpoints": [
4
  {
5
- "steps": 499419,
6
- "file_path": "results/Proba/Goalie/Goalie-499419.onnx",
7
- "reward": 0.7520077661252939,
8
- "creation_time": 1702976047.3382895,
9
  "auxillary_file_paths": [
10
- "results/Proba/Goalie/Goalie-499419.pt"
11
- ]
12
- },
13
- {
14
- "steps": 999974,
15
- "file_path": "results/Proba/Goalie/Goalie-999974.onnx",
16
- "reward": 0.3689773957824932,
17
- "creation_time": 1702977402.499321,
18
- "auxillary_file_paths": [
19
- "results/Proba/Goalie/Goalie-999974.pt"
20
- ]
21
- },
22
- {
23
- "steps": 1499980,
24
- "file_path": "results/Proba/Goalie/Goalie-1499980.onnx",
25
- "reward": 0.042800000247855984,
26
- "creation_time": 1702979254.7808285,
27
- "auxillary_file_paths": [
28
- "results/Proba/Goalie/Goalie-1499980.pt"
29
- ]
30
- },
31
- {
32
- "steps": 1807818,
33
- "file_path": "results/Proba/Goalie/Goalie-1807818.onnx",
34
- "reward": null,
35
- "creation_time": 1702980564.4739382,
36
- "auxillary_file_paths": [
37
- "results/Proba/Goalie/Goalie-1807818.pt"
38
- ]
39
- }
40
- ],
41
- "elo": 771.3258240415493,
42
- "final_checkpoint": {
43
- "steps": 1807818,
44
- "file_path": "results/Proba/Goalie.onnx",
45
- "reward": null,
46
- "creation_time": 1702980564.4739382,
47
- "auxillary_file_paths": [
48
- "results/Proba/Goalie/Goalie-1807818.pt"
49
- ]
50
- }
51
- },
52
- "Striker": {
53
- "checkpoints": [
54
- {
55
- "steps": 499317,
56
- "file_path": "results/Proba/Striker/Striker-499317.onnx",
57
- "reward": -0.5727692818125854,
58
- "creation_time": 1702976347.5755029,
59
- "auxillary_file_paths": [
60
- "results/Proba/Striker/Striker-499317.pt"
61
- ]
62
- },
63
- {
64
- "steps": 999985,
65
- "file_path": "results/Proba/Striker/Striker-999985.onnx",
66
- "reward": -0.07280247349200057,
67
- "creation_time": 1702977703.5105634,
68
- "auxillary_file_paths": [
69
- "results/Proba/Striker/Striker-999985.pt"
70
- ]
71
- },
72
- {
73
- "steps": 1499979,
74
- "file_path": "results/Proba/Striker/Striker-1499979.onnx",
75
- "reward": -0.05407068019914896,
76
- "creation_time": 1702979607.5258744,
77
- "auxillary_file_paths": [
78
- "results/Proba/Striker/Striker-1499979.pt"
79
- ]
80
- },
81
- {
82
- "steps": 1802718,
83
- "file_path": "results/Proba/Striker/Striker-1802718.onnx",
84
- "reward": -0.03104210407228062,
85
- "creation_time": 1702980564.5476835,
86
- "auxillary_file_paths": [
87
- "results/Proba/Striker/Striker-1802718.pt"
88
  ]
89
  }
90
  ],
91
- "elo": 1755.7615527828686,
92
  "final_checkpoint": {
93
- "steps": 1802718,
94
- "file_path": "results/Proba/Striker.onnx",
95
- "reward": -0.03104210407228062,
96
- "creation_time": 1702980564.5476835,
97
  "auxillary_file_paths": [
98
- "results/Proba/Striker/Striker-1802718.pt"
99
  ]
100
  }
101
  },
 
1
  {
2
+ "SoccerTwos": {
3
+ "elo": 1197.0439391453008,
4
  "checkpoints": [
5
  {
6
+ "steps": 10226,
7
+ "file_path": "results/Soccer/SoccerTwos/SoccerTwos-10226.onnx",
8
+ "reward": 0.0,
9
+ "creation_time": 1703231769.47281,
10
  "auxillary_file_paths": [
11
+ "results/Soccer/SoccerTwos/SoccerTwos-10226.pt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ]
13
  }
14
  ],
 
15
  "final_checkpoint": {
16
+ "steps": 10226,
17
+ "file_path": "results/Soccer/SoccerTwos.onnx",
18
+ "reward": 0.0,
19
+ "creation_time": 1703231769.47281,
20
  "auxillary_file_paths": [
21
+ "results/Soccer/SoccerTwos/SoccerTwos-10226.pt"
22
  ]
23
  }
24
  },