thomaspalomares commited on
Commit
56325d9
1 Parent(s): 4209da8

Second Push

Browse files
SoccerTwos.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c86cfa26704c35c1c5707ef826b4ac0a299203878b09bf5386313019de8c091b
3
  size 1768747
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc71bccf40e2b87b4462c87f12c57b275d9424814637cf91ea03803f61269c54
3
  size 1768747
SoccerTwos/SoccerTwos-345772.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc71bccf40e2b87b4462c87f12c57b275d9424814637cf91ea03803f61269c54
3
+ size 1768747
SoccerTwos/SoccerTwos-345772.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc04456f9ddbc3235f45fd3c5a39542a9cd8fc8a067262f8d8b20c38307e9cd
3
+ size 28430234
SoccerTwos/checkpoint.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ed3f966c7f5c208a4dff048ac240ab39641d70aad9c54e09a0d741bdb0afaeb
3
  size 28429366
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c59038d4a3e5c47ef63bc898dd23db42b9c95e5a3fd1f2a48f87c7b8b7199b8
3
  size 28429366
SoccerTwos/events.out.tfevents.1721744065.PaloLenovo.12958.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3497bf9dbb4960737712adff118184b96e12796b6470a5af765ebbb8cdfc35e
3
+ size 231769
config.json CHANGED
@@ -1 +1 @@
1
- {"default_settings": null, "behaviors": {"SoccerTwos": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 50000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 2000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": "./training-envs-executables/SoccerTwos/SoccerTwos.app", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "SoccerTwos", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
 
1
+ {"default_settings": null, "behaviors": {"SoccerTwos": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 50000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 2000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": "./training-envs-executables/SoccerTwos/SoccerTwos.app", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "SoccerTwos", "initialize_from": null, "load_model": false, "resume": true, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
configuration.yaml CHANGED
@@ -74,8 +74,8 @@ checkpoint_settings:
74
  run_id: SoccerTwos
75
  initialize_from: null
76
  load_model: false
77
- resume: false
78
- force: true
79
  train_model: false
80
  inference: false
81
  results_dir: results
 
74
  run_id: SoccerTwos
75
  initialize_from: null
76
  load_model: false
77
+ resume: true
78
+ force: false
79
  train_model: false
80
  inference: false
81
  results_dir: results
run_logs/Player-0.log CHANGED
@@ -12,7 +12,7 @@ NullGfxDevice:
12
  Vendor: Unity Technologies
13
  FMOD initialized on nosound output
14
  Begin MonoManager ReloadAssembly
15
- - Completed reload, in 0.066 seconds
16
  ERROR: Shader Sprites/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
17
  ERROR: Shader Sprites/Mask shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
18
  ERROR: Shader Legacy Shaders/VertexLit shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
@@ -37,7 +37,7 @@ ERROR: Shader Standard shader is not supported on this GPU (none of subshaders/f
37
  WARNING: Shader Unsupported: 'Standard' - All subshaders removed
38
  WARNING: Shader Did you use #pragma only_renderers and omit this platform?
39
  WARNING: Shader If subshaders removal was intentional, you may have forgotten turning Fallback off?
40
- UnloadTime: 0.938788 ms
41
  Registered Communicator in Agent.
42
  ERROR: Shader UI/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
43
  requesting resize 84 x 84
@@ -54,12 +54,124 @@ TOO LONG
54
  TOO LONG
55
  TOO LONG
56
  TOO LONG
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  Setting up 8 worker threads for Enlighten.
58
  Memory Statistics:
59
  [ALLOC_TEMP_TLS] TLS Allocator
60
  StackAllocators :
61
  [ALLOC_TEMP_MAIN]
62
- Peak usage frame count: [8.0 KB-16.0 KB]: 897 frames, [2.0 MB-4.0 MB]: 1 frames
63
  Initial Block Size 4.0 MB
64
  Current Block Size 4.0 MB
65
  Peak Allocated Bytes 2.1 MB
@@ -251,13 +363,13 @@ Memory Statistics:
251
  Used Block count 1
252
  Peak Allocated bytes 1.0 MB
253
  [ALLOC_DEFAULT_MAIN]
254
- Peak usage frame count: [4.0 MB-8.0 MB]: 898 frames
255
  Requested Block Size 16.0 MB
256
  Peak Block count 1
257
  Peak Allocated memory 4.9 MB
258
  Peak Large allocation bytes 0 B
259
  [ALLOC_DEFAULT_THREAD]
260
- Peak usage frame count: [16.0 MB-32.0 MB]: 898 frames
261
  Requested Block Size 16.0 MB
262
  Peak Block count 1
263
  Peak Allocated memory 17.3 MB
@@ -289,13 +401,13 @@ Memory Statistics:
289
  Used Block count 1
290
  Peak Allocated bytes 1.0 MB
291
  [ALLOC_GFX_MAIN]
292
- Peak usage frame count: [32.0 KB-64.0 KB]: 897 frames, [64.0 KB-128.0 KB]: 1 frames
293
  Requested Block Size 16.0 MB
294
  Peak Block count 1
295
  Peak Allocated memory 65.6 KB
296
  Peak Large allocation bytes 0 B
297
  [ALLOC_GFX_THREAD]
298
- Peak usage frame count: [128.0 KB-256.0 KB]: 898 frames
299
  Requested Block Size 16.0 MB
300
  Peak Block count 1
301
  Peak Allocated memory 173.5 KB
@@ -307,13 +419,13 @@ Memory Statistics:
307
  Used Block count 1
308
  Peak Allocated bytes 1.0 MB
309
  [ALLOC_CACHEOBJECTS_MAIN]
310
- Peak usage frame count: [0.5 MB-1.0 MB]: 898 frames
311
  Requested Block Size 4.0 MB
312
  Peak Block count 1
313
  Peak Allocated memory 0.6 MB
314
  Peak Large allocation bytes 0 B
315
  [ALLOC_CACHEOBJECTS_THREAD]
316
- Peak usage frame count: [0.5 MB-1.0 MB]: 897 frames, [4.0 MB-8.0 MB]: 1 frames
317
  Requested Block Size 4.0 MB
318
  Peak Block count 2
319
  Peak Allocated memory 4.5 MB
@@ -325,13 +437,13 @@ Memory Statistics:
325
  Used Block count 1
326
  Peak Allocated bytes 1.0 MB
327
  [ALLOC_TYPETREE_MAIN]
328
- Peak usage frame count: [0-1.0 KB]: 898 frames
329
  Requested Block Size 2.0 MB
330
  Peak Block count 1
331
  Peak Allocated memory 1.0 KB
332
  Peak Large allocation bytes 0 B
333
  [ALLOC_TYPETREE_THREAD]
334
- Peak usage frame count: [1.0 KB-2.0 KB]: 897 frames, [2.0 KB-4.0 KB]: 1 frames
335
  Requested Block Size 2.0 MB
336
  Peak Block count 1
337
  Peak Allocated memory 2.2 KB
 
12
  Vendor: Unity Technologies
13
  FMOD initialized on nosound output
14
  Begin MonoManager ReloadAssembly
15
+ - Completed reload, in 0.083 seconds
16
  ERROR: Shader Sprites/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
17
  ERROR: Shader Sprites/Mask shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
18
  ERROR: Shader Legacy Shaders/VertexLit shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
 
37
  WARNING: Shader Unsupported: 'Standard' - All subshaders removed
38
  WARNING: Shader Did you use #pragma only_renderers and omit this platform?
39
  WARNING: Shader If subshaders removal was intentional, you may have forgotten turning Fallback off?
40
+ UnloadTime: 0.771510 ms
41
  Registered Communicator in Agent.
42
  ERROR: Shader UI/Default shader is not supported on this GPU (none of subshaders/fallbacks are suitable)
43
  requesting resize 84 x 84
 
54
  TOO LONG
55
  TOO LONG
56
  TOO LONG
57
+ TOO LONG
58
+ TOO LONG
59
+ TOO LONG
60
+ TOO LONG
61
+ TOO LONG
62
+ TOO LONG
63
+ TOO LONG
64
+ TOO LONG
65
+ TOO LONG
66
+ TOO LONG
67
+ TOO LONG
68
+ TOO LONG
69
+ TOO LONG
70
+ TOO LONG
71
+ TOO LONG
72
+ TOO LONG
73
+ TOO LONG
74
+ TOO LONG
75
+ TOO LONG
76
+ TOO LONG
77
+ TOO LONG
78
+ TOO LONG
79
+ TOO LONG
80
+ TOO LONG
81
+ TOO LONG
82
+ TOO LONG
83
+ TOO LONG
84
+ TOO LONG
85
+ TOO LONG
86
+ TOO LONG
87
+ TOO LONG
88
+ TOO LONG
89
+ TOO LONG
90
+ TOO LONG
91
+ TOO LONG
92
+ TOO LONG
93
+ TOO LONG
94
+ TOO LONG
95
+ TOO LONG
96
+ TOO LONG
97
+ TOO LONG
98
+ TOO LONG
99
+ TOO LONG
100
+ TOO LONG
101
+ TOO LONG
102
+ TOO LONG
103
+ TOO LONG
104
+ TOO LONG
105
+ TOO LONG
106
+ TOO LONG
107
+ TOO LONG
108
+ TOO LONG
109
+ TOO LONG
110
+ TOO LONG
111
+ TOO LONG
112
+ TOO LONG
113
+ TOO LONG
114
+ TOO LONG
115
+ TOO LONG
116
+ TOO LONG
117
+ TOO LONG
118
+ TOO LONG
119
+ TOO LONG
120
+ TOO LONG
121
+ TOO LONG
122
+ TOO LONG
123
+ TOO LONG
124
+ TOO LONG
125
+ TOO LONG
126
+ TOO LONG
127
+ TOO LONG
128
+ TOO LONG
129
+ TOO LONG
130
+ TOO LONG
131
+ TOO LONG
132
+ TOO LONG
133
+ TOO LONG
134
+ TOO LONG
135
+ TOO LONG
136
+ TOO LONG
137
+ TOO LONG
138
+ TOO LONG
139
+ TOO LONG
140
+ TOO LONG
141
+ TOO LONG
142
+ TOO LONG
143
+ TOO LONG
144
+ TOO LONG
145
+ TOO LONG
146
+ TOO LONG
147
+ TOO LONG
148
+ TOO LONG
149
+ TOO LONG
150
+ TOO LONG
151
+ TOO LONG
152
+ TOO LONG
153
+ TOO LONG
154
+ TOO LONG
155
+ TOO LONG
156
+ TOO LONG
157
+ TOO LONG
158
+ TOO LONG
159
+ TOO LONG
160
+ TOO LONG
161
+ TOO LONG
162
+ TOO LONG
163
+ TOO LONG
164
+ TOO LONG
165
+ TOO LONG
166
+ TOO LONG
167
+ TOO LONG
168
+ TOO LONG
169
  Setting up 8 worker threads for Enlighten.
170
  Memory Statistics:
171
  [ALLOC_TEMP_TLS] TLS Allocator
172
  StackAllocators :
173
  [ALLOC_TEMP_MAIN]
174
+ Peak usage frame count: [8.0 KB-16.0 KB]: 6040 frames, [2.0 MB-4.0 MB]: 1 frames
175
  Initial Block Size 4.0 MB
176
  Current Block Size 4.0 MB
177
  Peak Allocated Bytes 2.1 MB
 
363
  Used Block count 1
364
  Peak Allocated bytes 1.0 MB
365
  [ALLOC_DEFAULT_MAIN]
366
+ Peak usage frame count: [4.0 MB-8.0 MB]: 6041 frames
367
  Requested Block Size 16.0 MB
368
  Peak Block count 1
369
  Peak Allocated memory 4.9 MB
370
  Peak Large allocation bytes 0 B
371
  [ALLOC_DEFAULT_THREAD]
372
+ Peak usage frame count: [16.0 MB-32.0 MB]: 6041 frames
373
  Requested Block Size 16.0 MB
374
  Peak Block count 1
375
  Peak Allocated memory 17.3 MB
 
401
  Used Block count 1
402
  Peak Allocated bytes 1.0 MB
403
  [ALLOC_GFX_MAIN]
404
+ Peak usage frame count: [32.0 KB-64.0 KB]: 6040 frames, [64.0 KB-128.0 KB]: 1 frames
405
  Requested Block Size 16.0 MB
406
  Peak Block count 1
407
  Peak Allocated memory 65.6 KB
408
  Peak Large allocation bytes 0 B
409
  [ALLOC_GFX_THREAD]
410
+ Peak usage frame count: [128.0 KB-256.0 KB]: 6041 frames
411
  Requested Block Size 16.0 MB
412
  Peak Block count 1
413
  Peak Allocated memory 173.5 KB
 
419
  Used Block count 1
420
  Peak Allocated bytes 1.0 MB
421
  [ALLOC_CACHEOBJECTS_MAIN]
422
+ Peak usage frame count: [0.5 MB-1.0 MB]: 6041 frames
423
  Requested Block Size 4.0 MB
424
  Peak Block count 1
425
  Peak Allocated memory 0.6 MB
426
  Peak Large allocation bytes 0 B
427
  [ALLOC_CACHEOBJECTS_THREAD]
428
+ Peak usage frame count: [0.5 MB-1.0 MB]: 6040 frames, [4.0 MB-8.0 MB]: 1 frames
429
  Requested Block Size 4.0 MB
430
  Peak Block count 2
431
  Peak Allocated memory 4.5 MB
 
437
  Used Block count 1
438
  Peak Allocated bytes 1.0 MB
439
  [ALLOC_TYPETREE_MAIN]
440
+ Peak usage frame count: [0-1.0 KB]: 6041 frames
441
  Requested Block Size 2.0 MB
442
  Peak Block count 1
443
  Peak Allocated memory 1.0 KB
444
  Peak Large allocation bytes 0 B
445
  [ALLOC_TYPETREE_THREAD]
446
+ Peak usage frame count: [1.0 KB-2.0 KB]: 6040 frames, [2.0 KB-4.0 KB]: 1 frames
447
  Requested Block Size 2.0 MB
448
  Peak Block count 1
449
  Peak Allocated memory 2.2 KB
run_logs/timers.json CHANGED
@@ -2,261 +2,261 @@
2
  "name": "root",
3
  "gauges": {
4
  "SoccerTwos.Policy.Entropy.mean": {
5
- "value": 3.2841238975524902,
6
- "min": 3.2841238975524902,
7
- "max": 3.2957353591918945,
8
- "count": 4
9
  },
10
  "SoccerTwos.Policy.Entropy.sum": {
11
- "value": 84704.125,
12
- "min": 39097.6171875,
13
- "max": 105463.53125,
14
- "count": 4
15
  },
16
  "SoccerTwos.Environment.EpisodeLength.mean": {
17
- "value": 715.0,
18
- "min": 594.3,
19
- "max": 774.5714285714286,
20
- "count": 4
21
  },
22
  "SoccerTwos.Environment.EpisodeLength.sum": {
23
- "value": 20020.0,
24
- "min": 17404.0,
25
- "max": 23772.0,
26
- "count": 4
27
  },
28
  "SoccerTwos.Self-play.ELO.mean": {
29
- "value": 1202.393121297402,
30
- "min": 1200.6170397970752,
31
- "max": 1202.8440589090824,
32
- "count": 4
33
  },
34
  "SoccerTwos.Self-play.ELO.sum": {
35
- "value": 12023.931212974021,
36
- "min": 4811.3762356363295,
37
- "max": 14407.404477564902,
38
- "count": 4
39
  },
40
  "SoccerTwos.Step.mean": {
41
- "value": 39588.0,
42
- "min": 9906.0,
43
- "max": 39588.0,
44
- "count": 4
45
  },
46
  "SoccerTwos.Step.sum": {
47
- "value": 39588.0,
48
- "min": 9906.0,
49
- "max": 39588.0,
50
- "count": 4
51
  },
52
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
53
- "value": 0.084320068359375,
54
- "min": 0.0808270052075386,
55
- "max": 0.09010187536478043,
56
- "count": 4
57
  },
58
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
59
- "value": 1.18048095703125,
60
- "min": 1.0507510900497437,
61
- "max": 1.6218231916427612,
62
- "count": 4
63
  },
64
  "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
65
- "value": 0.08006962388753891,
66
- "min": 0.07616598159074783,
67
- "max": 0.09002643078565598,
68
- "count": 4
69
  },
70
  "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
71
- "value": 1.1209747791290283,
72
- "min": 0.9901577234268188,
73
- "max": 1.6204432249069214,
74
- "count": 4
75
  },
76
  "SoccerTwos.Environment.CumulativeReward.mean": {
77
  "value": 0.0,
78
  "min": 0.0,
79
  "max": 0.0,
80
- "count": 4
81
  },
82
  "SoccerTwos.Environment.CumulativeReward.sum": {
83
  "value": 0.0,
84
  "min": 0.0,
85
  "max": 0.0,
86
- "count": 4
87
  },
88
  "SoccerTwos.Policy.ExtrinsicReward.mean": {
89
- "value": -0.11011428918157305,
90
- "min": -0.11011428918157305,
91
- "max": 0.2417538441144503,
92
- "count": 4
93
  },
94
  "SoccerTwos.Policy.ExtrinsicReward.sum": {
95
- "value": -1.5416000485420227,
96
- "min": -1.5416000485420227,
97
- "max": 3.142799973487854,
98
- "count": 4
99
  },
100
  "SoccerTwos.Environment.GroupCumulativeReward.mean": {
101
- "value": -0.11011428918157305,
102
- "min": -0.11011428918157305,
103
- "max": 0.2417538441144503,
104
- "count": 4
105
  },
106
  "SoccerTwos.Environment.GroupCumulativeReward.sum": {
107
- "value": -1.5416000485420227,
108
- "min": -1.5416000485420227,
109
- "max": 3.142799973487854,
110
- "count": 4
111
  },
112
  "SoccerTwos.IsTraining.mean": {
113
  "value": 1.0,
114
  "min": 1.0,
115
  "max": 1.0,
116
- "count": 4
117
  },
118
  "SoccerTwos.IsTraining.sum": {
119
  "value": 1.0,
120
  "min": 1.0,
121
  "max": 1.0,
122
- "count": 4
123
  },
124
  "SoccerTwos.Losses.PolicyLoss.mean": {
125
- "value": 0.014625760088286673,
126
- "min": 0.014625760088286673,
127
- "max": 0.014625760088286673,
128
- "count": 1
129
  },
130
  "SoccerTwos.Losses.PolicyLoss.sum": {
131
- "value": 0.014625760088286673,
132
- "min": 0.014625760088286673,
133
- "max": 0.014625760088286673,
134
- "count": 1
135
  },
136
  "SoccerTwos.Losses.ValueLoss.mean": {
137
- "value": 0.0066176157755156355,
138
- "min": 0.0066176157755156355,
139
- "max": 0.0066176157755156355,
140
- "count": 1
141
  },
142
  "SoccerTwos.Losses.ValueLoss.sum": {
143
- "value": 0.0066176157755156355,
144
- "min": 0.0066176157755156355,
145
- "max": 0.0066176157755156355,
146
- "count": 1
147
  },
148
  "SoccerTwos.Losses.BaselineLoss.mean": {
149
- "value": 0.006111458611364166,
150
- "min": 0.006111458611364166,
151
- "max": 0.006111458611364166,
152
- "count": 1
153
  },
154
  "SoccerTwos.Losses.BaselineLoss.sum": {
155
- "value": 0.006111458611364166,
156
- "min": 0.006111458611364166,
157
- "max": 0.006111458611364166,
158
- "count": 1
159
  },
160
  "SoccerTwos.Policy.LearningRate.mean": {
161
  "value": 0.0003,
162
  "min": 0.0003,
163
  "max": 0.0003,
164
- "count": 1
165
  },
166
  "SoccerTwos.Policy.LearningRate.sum": {
167
  "value": 0.0003,
168
  "min": 0.0003,
169
  "max": 0.0003,
170
- "count": 1
171
  },
172
  "SoccerTwos.Policy.Epsilon.mean": {
173
  "value": 0.20000000000000007,
174
- "min": 0.20000000000000007,
175
  "max": 0.20000000000000007,
176
- "count": 1
177
  },
178
  "SoccerTwos.Policy.Epsilon.sum": {
179
  "value": 0.20000000000000007,
180
- "min": 0.20000000000000007,
181
  "max": 0.20000000000000007,
182
- "count": 1
183
  },
184
  "SoccerTwos.Policy.Beta.mean": {
185
  "value": 0.005000000000000001,
186
- "min": 0.005000000000000001,
187
  "max": 0.005000000000000001,
188
- "count": 1
189
  },
190
  "SoccerTwos.Policy.Beta.sum": {
191
  "value": 0.005000000000000001,
192
- "min": 0.005000000000000001,
193
  "max": 0.005000000000000001,
194
- "count": 1
195
  }
196
  },
197
  "metadata": {
198
  "timer_format_version": "0.1.0",
199
- "start_time_seconds": "1721743360",
200
  "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
201
- "command_line_arguments": "/home/thopalomares/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --force",
202
  "mlagents_version": "1.1.0.dev0",
203
  "mlagents_envs_version": "1.1.0.dev0",
204
  "communication_protocol_version": "1.5.0",
205
  "pytorch_version": "2.3.1+cu121",
206
  "numpy_version": "1.23.5",
207
- "end_time_seconds": "1721743614"
208
  },
209
- "total": 253.17795013400018,
210
  "count": 1,
211
- "self": 0.01201935200015214,
212
  "children": {
213
  "run_training.setup": {
214
- "total": 0.015367697000101543,
215
  "count": 1,
216
- "self": 0.015367697000101543
217
  },
218
  "TrainerController.start_learning": {
219
- "total": 253.15056308499993,
220
  "count": 1,
221
- "self": 0.11234658499756733,
222
  "children": {
223
  "TrainerController._reset_env": {
224
- "total": 1.5845080609999513,
225
- "count": 1,
226
- "self": 1.5845080609999513
227
  },
228
  "TrainerController.advance": {
229
- "total": 251.10908360900248,
230
- "count": 3011,
231
- "self": 0.12692364399936196,
232
  "children": {
233
  "env_step": {
234
- "total": 168.04880293299698,
235
- "count": 3011,
236
- "self": 150.43118493600537,
237
  "children": {
238
  "SubprocessEnvManager._take_step": {
239
- "total": 17.54256649900094,
240
- "count": 3011,
241
- "self": 0.7796117740113004,
242
  "children": {
243
  "TorchPolicy.evaluate": {
244
- "total": 16.76295472498964,
245
- "count": 5984,
246
- "self": 16.76295472498964
247
  }
248
  }
249
  },
250
  "workers": {
251
- "total": 0.07505149799067112,
252
- "count": 3011,
253
  "self": 0.0,
254
  "children": {
255
  "worker_root": {
256
- "total": 225.25777519000007,
257
- "count": 3011,
258
  "is_parallel": true,
259
- "self": 89.81766361299628,
260
  "children": {
261
  "run_training.setup": {
262
  "total": 0.0,
@@ -265,48 +265,48 @@
265
  "self": 0.0,
266
  "children": {
267
  "steps_from_proto": {
268
- "total": 0.003097265999940646,
269
  "count": 2,
270
  "is_parallel": true,
271
- "self": 0.0008827609999570996,
272
  "children": {
273
  "_process_rank_one_or_two_observation": {
274
- "total": 0.0022145049999835464,
275
  "count": 8,
276
  "is_parallel": true,
277
- "self": 0.0022145049999835464
278
  }
279
  }
280
  },
281
  "UnityEnvironment.step": {
282
- "total": 0.052691164999941975,
283
  "count": 1,
284
  "is_parallel": true,
285
- "self": 0.0008246209999924758,
286
  "children": {
287
  "UnityEnvironment._generate_step_input": {
288
- "total": 0.0005216829999881156,
289
  "count": 1,
290
  "is_parallel": true,
291
- "self": 0.0005216829999881156
292
  },
293
  "communicator.exchange": {
294
- "total": 0.048692567999978564,
295
  "count": 1,
296
  "is_parallel": true,
297
- "self": 0.048692567999978564
298
  },
299
  "steps_from_proto": {
300
- "total": 0.0026522929999828193,
301
  "count": 2,
302
  "is_parallel": true,
303
- "self": 0.0005639009998503752,
304
  "children": {
305
  "_process_rank_one_or_two_observation": {
306
- "total": 0.002088392000132444,
307
  "count": 8,
308
  "is_parallel": true,
309
- "self": 0.002088392000132444
310
  }
311
  }
312
  }
@@ -315,38 +315,52 @@
315
  }
316
  },
317
  "UnityEnvironment.step": {
318
- "total": 135.4401115770038,
319
- "count": 3010,
320
  "is_parallel": true,
321
- "self": 2.988635597010898,
322
  "children": {
323
  "UnityEnvironment._generate_step_input": {
324
- "total": 3.023054466004851,
325
- "count": 3010,
326
  "is_parallel": true,
327
- "self": 3.023054466004851
328
  },
329
  "communicator.exchange": {
330
- "total": 120.96275903098967,
331
- "count": 3010,
332
  "is_parallel": true,
333
- "self": 120.96275903098967
334
  },
335
  "steps_from_proto": {
336
- "total": 8.465662482998368,
337
- "count": 6020,
338
  "is_parallel": true,
339
- "self": 1.821456657001363,
340
  "children": {
341
  "_process_rank_one_or_two_observation": {
342
- "total": 6.644205825997005,
343
- "count": 24080,
344
  "is_parallel": true,
345
- "self": 6.644205825997005
346
  }
347
  }
348
  }
349
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  }
351
  }
352
  }
@@ -355,24 +369,24 @@
355
  }
356
  },
357
  "trainer_advance": {
358
- "total": 82.93335703200614,
359
- "count": 3011,
360
- "self": 0.9061286740131891,
361
  "children": {
362
  "process_trajectory": {
363
- "total": 11.145806544992865,
364
- "count": 3011,
365
- "self": 11.145806544992865
366
  },
367
  "_update_policy": {
368
- "total": 70.88142181300009,
369
- "count": 2,
370
- "self": 6.314715992998117,
371
  "children": {
372
  "TorchPOCAOptimizer.update": {
373
- "total": 64.56670582000197,
374
- "count": 49,
375
- "self": 64.56670582000197
376
  }
377
  }
378
  }
@@ -381,19 +395,19 @@
381
  }
382
  },
383
  "trainer_threads": {
384
- "total": 1.8739999632089166e-06,
385
  "count": 1,
386
- "self": 1.8739999632089166e-06
387
  },
388
  "TrainerController._save_models": {
389
- "total": 0.3446229559999665,
390
  "count": 1,
391
- "self": 0.009960272999933295,
392
  "children": {
393
  "RLTrainer._checkpoint": {
394
- "total": 0.3346626830000332,
395
  "count": 1,
396
- "self": 0.3346626830000332
397
  }
398
  }
399
  }
 
2
  "name": "root",
3
  "gauges": {
4
  "SoccerTwos.Policy.Entropy.mean": {
5
+ "value": 3.215240478515625,
6
+ "min": 3.2082087993621826,
7
+ "max": 3.2788138389587402,
8
+ "count": 30
9
  },
10
  "SoccerTwos.Policy.Entropy.sum": {
11
+ "value": 58440.2109375,
12
+ "min": 6609.21240234375,
13
+ "max": 112997.453125,
14
+ "count": 30
15
  },
16
  "SoccerTwos.Environment.EpisodeLength.mean": {
17
+ "value": 852.5,
18
+ "min": 372.2307692307692,
19
+ "max": 999.0,
20
+ "count": 30
21
  },
22
  "SoccerTwos.Environment.EpisodeLength.sum": {
23
+ "value": 20460.0,
24
+ "min": 6204.0,
25
+ "max": 31716.0,
26
+ "count": 30
27
  },
28
  "SoccerTwos.Self-play.ELO.mean": {
29
+ "value": 1212.351426911121,
30
+ "min": 1201.9235520535651,
31
+ "max": 1214.940296813811,
32
+ "count": 29
33
  },
34
  "SoccerTwos.Self-play.ELO.sum": {
35
+ "value": 4849.405707644484,
36
+ "min": 2403.8471041071302,
37
+ "max": 16987.397148141066,
38
+ "count": 29
39
  },
40
  "SoccerTwos.Step.mean": {
41
+ "value": 339854.0,
42
+ "min": 49378.0,
43
+ "max": 339854.0,
44
+ "count": 30
45
  },
46
  "SoccerTwos.Step.sum": {
47
+ "value": 339854.0,
48
+ "min": 49378.0,
49
+ "max": 339854.0,
50
+ "count": 30
51
  },
52
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
53
+ "value": 0.006578196305781603,
54
+ "min": 0.006578196305781603,
55
+ "max": 0.06802091002464294,
56
+ "count": 30
57
  },
58
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
59
+ "value": 0.07893835753202438,
60
+ "min": 0.07893835753202438,
61
+ "max": 1.088334560394287,
62
+ "count": 30
63
  },
64
  "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
65
+ "value": 0.004074022639542818,
66
+ "min": 0.004074022639542818,
67
+ "max": 0.06785643100738525,
68
+ "count": 30
69
  },
70
  "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
71
+ "value": 0.048888273537158966,
72
+ "min": 0.048888273537158966,
73
+ "max": 1.085702896118164,
74
+ "count": 30
75
  },
76
  "SoccerTwos.Environment.CumulativeReward.mean": {
77
  "value": 0.0,
78
  "min": 0.0,
79
  "max": 0.0,
80
+ "count": 30
81
  },
82
  "SoccerTwos.Environment.CumulativeReward.sum": {
83
  "value": 0.0,
84
  "min": 0.0,
85
  "max": 0.0,
86
+ "count": 30
87
  },
88
  "SoccerTwos.Policy.ExtrinsicReward.mean": {
89
+ "value": -0.3333333333333333,
90
+ "min": -0.3333333333333333,
91
+ "max": 0.2857894771977475,
92
+ "count": 30
93
  },
94
  "SoccerTwos.Policy.ExtrinsicReward.sum": {
95
+ "value": -4.0,
96
+ "min": -4.0,
97
+ "max": 5.430000066757202,
98
+ "count": 30
99
  },
100
  "SoccerTwos.Environment.GroupCumulativeReward.mean": {
101
+ "value": -0.3333333333333333,
102
+ "min": -0.3333333333333333,
103
+ "max": 0.2857894771977475,
104
+ "count": 30
105
  },
106
  "SoccerTwos.Environment.GroupCumulativeReward.sum": {
107
+ "value": -4.0,
108
+ "min": -4.0,
109
+ "max": 5.430000066757202,
110
+ "count": 30
111
  },
112
  "SoccerTwos.IsTraining.mean": {
113
  "value": 1.0,
114
  "min": 1.0,
115
  "max": 1.0,
116
+ "count": 30
117
  },
118
  "SoccerTwos.IsTraining.sum": {
119
  "value": 1.0,
120
  "min": 1.0,
121
  "max": 1.0,
122
+ "count": 30
123
  },
124
  "SoccerTwos.Losses.PolicyLoss.mean": {
125
+ "value": 0.016774885818207016,
126
+ "min": 0.013413160684285686,
127
+ "max": 0.020140104663247862,
128
+ "count": 13
129
  },
130
  "SoccerTwos.Losses.PolicyLoss.sum": {
131
+ "value": 0.016774885818207016,
132
+ "min": 0.013413160684285686,
133
+ "max": 0.020140104663247862,
134
+ "count": 13
135
  },
136
  "SoccerTwos.Losses.ValueLoss.mean": {
137
+ "value": 0.0025689200653384128,
138
+ "min": 0.0003981584195571486,
139
+ "max": 0.005156840841906766,
140
+ "count": 13
141
  },
142
  "SoccerTwos.Losses.ValueLoss.sum": {
143
+ "value": 0.0025689200653384128,
144
+ "min": 0.0003981584195571486,
145
+ "max": 0.005156840841906766,
146
+ "count": 13
147
  },
148
  "SoccerTwos.Losses.BaselineLoss.mean": {
149
+ "value": 0.0025834310023734965,
150
+ "min": 0.0003941108460518687,
151
+ "max": 0.005347981389301519,
152
+ "count": 13
153
  },
154
  "SoccerTwos.Losses.BaselineLoss.sum": {
155
+ "value": 0.0025834310023734965,
156
+ "min": 0.0003941108460518687,
157
+ "max": 0.005347981389301519,
158
+ "count": 13
159
  },
160
  "SoccerTwos.Policy.LearningRate.mean": {
161
  "value": 0.0003,
162
  "min": 0.0003,
163
  "max": 0.0003,
164
+ "count": 13
165
  },
166
  "SoccerTwos.Policy.LearningRate.sum": {
167
  "value": 0.0003,
168
  "min": 0.0003,
169
  "max": 0.0003,
170
+ "count": 13
171
  },
172
  "SoccerTwos.Policy.Epsilon.mean": {
173
  "value": 0.20000000000000007,
174
+ "min": 0.19999999999999996,
175
  "max": 0.20000000000000007,
176
+ "count": 13
177
  },
178
  "SoccerTwos.Policy.Epsilon.sum": {
179
  "value": 0.20000000000000007,
180
+ "min": 0.19999999999999996,
181
  "max": 0.20000000000000007,
182
+ "count": 13
183
  },
184
  "SoccerTwos.Policy.Beta.mean": {
185
  "value": 0.005000000000000001,
186
+ "min": 0.005,
187
  "max": 0.005000000000000001,
188
+ "count": 13
189
  },
190
  "SoccerTwos.Policy.Beta.sum": {
191
  "value": 0.005000000000000001,
192
+ "min": 0.005,
193
  "max": 0.005000000000000001,
194
+ "count": 13
195
  }
196
  },
197
  "metadata": {
198
  "timer_format_version": "0.1.0",
199
+ "start_time_seconds": "1721744065",
200
  "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
201
+ "command_line_arguments": "/home/thopalomares/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume",
202
  "mlagents_version": "1.1.0.dev0",
203
  "mlagents_envs_version": "1.1.0.dev0",
204
  "communication_protocol_version": "1.5.0",
205
  "pytorch_version": "2.3.1+cu121",
206
  "numpy_version": "1.23.5",
207
+ "end_time_seconds": "1721745921"
208
  },
209
+ "total": 1856.0187186140001,
210
  "count": 1,
211
+ "self": 0.3318577139998524,
212
  "children": {
213
  "run_training.setup": {
214
+ "total": 0.01629420300014317,
215
  "count": 1,
216
+ "self": 0.01629420300014317
217
  },
218
  "TrainerController.start_learning": {
219
+ "total": 1855.6705666970001,
220
  "count": 1,
221
+ "self": 0.770913486050631,
222
  "children": {
223
  "TrainerController._reset_env": {
224
+ "total": 1.815844243000356,
225
+ "count": 2,
226
+ "self": 1.815844243000356
227
  },
228
  "TrainerController.advance": {
229
+ "total": 1852.6214321879493,
230
+ "count": 20285,
231
+ "self": 0.9898066918790391,
232
  "children": {
233
  "env_step": {
234
+ "total": 1171.8839023150204,
235
+ "count": 20285,
236
+ "self": 1031.3447389960074,
237
  "children": {
238
  "SubprocessEnvManager._take_step": {
239
+ "total": 139.9729437979954,
240
+ "count": 20285,
241
+ "self": 6.600656839059866,
242
  "children": {
243
  "TorchPolicy.evaluate": {
244
+ "total": 133.37228695893555,
245
+ "count": 40268,
246
+ "self": 133.37228695893555
247
  }
248
  }
249
  },
250
  "workers": {
251
+ "total": 0.5662195210175014,
252
+ "count": 20284,
253
  "self": 0.0,
254
  "children": {
255
  "worker_root": {
256
+ "total": 1853.3329737060058,
257
+ "count": 20284,
258
  "is_parallel": true,
259
+ "self": 927.5884458780147,
260
  "children": {
261
  "run_training.setup": {
262
  "total": 0.0,
 
265
  "self": 0.0,
266
  "children": {
267
  "steps_from_proto": {
268
+ "total": 0.0047305030002462445,
269
  "count": 2,
270
  "is_parallel": true,
271
+ "self": 0.0011434060006649815,
272
  "children": {
273
  "_process_rank_one_or_two_observation": {
274
+ "total": 0.003587096999581263,
275
  "count": 8,
276
  "is_parallel": true,
277
+ "self": 0.003587096999581263
278
  }
279
  }
280
  },
281
  "UnityEnvironment.step": {
282
+ "total": 0.04282491300000402,
283
  "count": 1,
284
  "is_parallel": true,
285
+ "self": 0.0008339199998772528,
286
  "children": {
287
  "UnityEnvironment._generate_step_input": {
288
+ "total": 0.0005496940002558404,
289
  "count": 1,
290
  "is_parallel": true,
291
+ "self": 0.0005496940002558404
292
  },
293
  "communicator.exchange": {
294
+ "total": 0.03921005099982722,
295
  "count": 1,
296
  "is_parallel": true,
297
+ "self": 0.03921005099982722
298
  },
299
  "steps_from_proto": {
300
+ "total": 0.0022312480000437063,
301
  "count": 2,
302
  "is_parallel": true,
303
+ "self": 0.0004841500003749388,
304
  "children": {
305
  "_process_rank_one_or_two_observation": {
306
+ "total": 0.0017470979996687674,
307
  "count": 8,
308
  "is_parallel": true,
309
+ "self": 0.0017470979996687674
310
  }
311
  }
312
  }
 
315
  }
316
  },
317
  "UnityEnvironment.step": {
318
+ "total": 925.741009410991,
319
+ "count": 20283,
320
  "is_parallel": true,
321
+ "self": 21.56576160804525,
322
  "children": {
323
  "UnityEnvironment._generate_step_input": {
324
+ "total": 18.77643948494233,
325
+ "count": 20283,
326
  "is_parallel": true,
327
+ "self": 18.77643948494233
328
  },
329
  "communicator.exchange": {
330
+ "total": 822.8149252780249,
331
+ "count": 20283,
332
  "is_parallel": true,
333
+ "self": 822.8149252780249
334
  },
335
  "steps_from_proto": {
336
+ "total": 62.583883039978446,
337
+ "count": 40566,
338
  "is_parallel": true,
339
+ "self": 13.445873751940326,
340
  "children": {
341
  "_process_rank_one_or_two_observation": {
342
+ "total": 49.13800928803812,
343
+ "count": 162264,
344
  "is_parallel": true,
345
+ "self": 49.13800928803812
346
  }
347
  }
348
  }
349
  }
350
+ },
351
+ "steps_from_proto": {
352
+ "total": 0.0035184170001230086,
353
+ "count": 2,
354
+ "is_parallel": true,
355
+ "self": 0.0007879720010350866,
356
+ "children": {
357
+ "_process_rank_one_or_two_observation": {
358
+ "total": 0.002730444999087922,
359
+ "count": 8,
360
+ "is_parallel": true,
361
+ "self": 0.002730444999087922
362
+ }
363
+ }
364
  }
365
  }
366
  }
 
369
  }
370
  },
371
  "trainer_advance": {
372
+ "total": 679.7477231810499,
373
+ "count": 20284,
374
+ "self": 6.3452692170303635,
375
  "children": {
376
  "process_trajectory": {
377
+ "total": 88.60656461601957,
378
+ "count": 20284,
379
+ "self": 88.60656461601957
380
  },
381
  "_update_policy": {
382
+ "total": 584.795889348,
383
+ "count": 13,
384
+ "self": 53.833978601001036,
385
  "children": {
386
  "TorchPOCAOptimizer.update": {
387
+ "total": 530.9619107469989,
388
+ "count": 402,
389
+ "self": 530.9619107469989
390
  }
391
  }
392
  }
 
395
  }
396
  },
397
  "trainer_threads": {
398
+ "total": 1.9960002646257635e-06,
399
  "count": 1,
400
+ "self": 1.9960002646257635e-06
401
  },
402
  "TrainerController._save_models": {
403
+ "total": 0.4623747839996213,
404
  "count": 1,
405
+ "self": 0.023142872999414976,
406
  "children": {
407
  "RLTrainer._checkpoint": {
408
+ "total": 0.4392319110002063,
409
  "count": 1,
410
+ "self": 0.4392319110002063
411
  }
412
  }
413
  }
run_logs/training_status.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "SoccerTwos": {
3
- "elo": 1201.9235520535651,
4
  "checkpoints": [
5
  {
6
  "steps": 41504,
@@ -10,15 +10,24 @@
10
  "auxillary_file_paths": [
11
  "results/SoccerTwos/SoccerTwos/SoccerTwos-41504.pt"
12
  ]
 
 
 
 
 
 
 
 
 
13
  }
14
  ],
15
  "final_checkpoint": {
16
- "steps": 41504,
17
  "file_path": "results/SoccerTwos/SoccerTwos.onnx",
18
- "reward": null,
19
- "creation_time": 1721743614.041071,
20
  "auxillary_file_paths": [
21
- "results/SoccerTwos/SoccerTwos/SoccerTwos-41504.pt"
22
  ]
23
  }
24
  },
 
1
  {
2
  "SoccerTwos": {
3
+ "elo": 1213.1785008262468,
4
  "checkpoints": [
5
  {
6
  "steps": 41504,
 
10
  "auxillary_file_paths": [
11
  "results/SoccerTwos/SoccerTwos/SoccerTwos-41504.pt"
12
  ]
13
+ },
14
+ {
15
+ "steps": 345772,
16
+ "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-345772.onnx",
17
+ "reward": 0.0,
18
+ "creation_time": 1721745920.767362,
19
+ "auxillary_file_paths": [
20
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-345772.pt"
21
+ ]
22
  }
23
  ],
24
  "final_checkpoint": {
25
+ "steps": 345772,
26
  "file_path": "results/SoccerTwos/SoccerTwos.onnx",
27
+ "reward": 0.0,
28
+ "creation_time": 1721745920.767362,
29
  "auxillary_file_paths": [
30
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-345772.pt"
31
  ]
32
  }
33
  },