Xoyo commited on
Commit
e871005
•
1 Parent(s): 73342b3

Second Push

Browse files
SoccerTwos.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e35749cd79129ab2c135f06df3e0b46c5ed340035cafc3e55cb2a981a6b76d7
3
  size 1764633
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af17b563ed0cf71385d82b83cf260b7a0e55ba01712e78f9fe9c761990acdb7
3
  size 1764633
SoccerTwos/{SoccerTwos-48499680.onnx → SoccerTwos-4499728.onnx} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:297147efff5775ca301efb8a1ffacea3ab82adc6980ed192d30a508be3ba3f73
3
  size 1764633
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adeb35f8097003449c178adf1e4dac67465539b9aa50c6beb2283ffb1858acc
3
  size 1764633
SoccerTwos/{SoccerTwos-48499680.pt → SoccerTwos-4499728.pt} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ce25db65425d49e1e0a2b68e343b74ea0bbfb9ca3147f5c241979571893558
3
  size 28422481
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:226d21c707b97a45b64320de078e0ffa7ef044ff14b87b90d8aa2be3d573b496
3
  size 28422481
SoccerTwos/{events.out.tfevents.1685525223.fingerdl3.379268.0 → SoccerTwos-48499991.onnx} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6198b18a4f86be88518fed581dbef180c2731313f6ce1d857e9b32591d7d51f
3
- size 7282268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:327778cdd24e1f446a0cf402f2ee62bd3abe74e1e3810d11547991246386f00d
3
+ size 1764633
SoccerTwos/SoccerTwos-48499991.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172dfd8366e781aadf16cbbcf787963baa600b069f76bf881686c58a98bbf29b
3
+ size 28422481
SoccerTwos/SoccerTwos-48999788.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7938ca4a742258f6d90739a299b3a375da184fd84f33f609d7b4a01637d1df
3
+ size 1764633
SoccerTwos/SoccerTwos-48999788.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1521ff5fc7718a054d7c7d70351db29c3f5cf74960d48f4c808bca7eba2a486
3
+ size 28422481
SoccerTwos/SoccerTwos-49499900.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34dbc7793db26fd7783c47c7c8b4c4924da30bd14ce743a60f8b0558757d3702
3
+ size 1764633
SoccerTwos/SoccerTwos-49499900.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf407d058052ed8551e9b0fabb45e8a349462641e1c9f1bd6cfea4fdbf117092
3
+ size 28422481
SoccerTwos/SoccerTwos-4999900.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d6691441cf4de0aa05d446012826ef6b1b7af0f8fc2ee5efd7b1b8fad782e3c
3
+ size 1764633
SoccerTwos/SoccerTwos-4999900.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f7172a0b81db2c3c3e13f43edf5d3acf731077f875be5446c3098343a6f73f7
3
+ size 28422481
SoccerTwos/SoccerTwos-49999973.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af17b563ed0cf71385d82b83cf260b7a0e55ba01712e78f9fe9c761990acdb7
3
+ size 1764633
SoccerTwos/SoccerTwos-49999973.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073a47e007cbef04092f9192b13af89b8c01edf62dded63160b2799e6336cb74
3
+ size 28422481
SoccerTwos/SoccerTwos-50000004.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af17b563ed0cf71385d82b83cf260b7a0e55ba01712e78f9fe9c761990acdb7
3
+ size 1764633
SoccerTwos/SoccerTwos-50000004.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfba214ab078185e37c73effed55ba578d6b1351659755ae082e5ef920a94d27
3
+ size 28422481
SoccerTwos/SoccerTwos-50000890.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ebd6a82b31bd98d2125299eb402370bf98753ca9b5a36f629feda9cc3b44a4f
3
  size 28422481
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c29e4299f4253bb37792f627913167da7a0dbfa75cfaa80f019f84a284612d7
3
  size 28422481
SoccerTwos/SoccerTwos-5499116.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e75def63ef7e56161fdd6525b31b992ee4a290c100dc8e3b2ae6534b1c58c6a4
3
+ size 1764633
SoccerTwos/SoccerTwos-5499116.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c905b10dc953766d1ad6774371e6c326d6e533ffb715b5752ba5809272e419
3
+ size 28422481
SoccerTwos/SoccerTwos-5999094.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e24321e62580d8f3f84a377e109bbfa5d81c4b5fd21c2d56a8058a0a8016a59
3
+ size 1764633
SoccerTwos/SoccerTwos-5999094.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e55ae522c3b1c3176bc5303eee0dbf47e719f079c1e19aa65fcc65269152c0db
3
+ size 28422481
SoccerTwos/SoccerTwos-6499554.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2019a90d3bea83053bf56f54bab9508a22b2ec25f66dd92b0bb58fde4d39a7f
3
+ size 1764633
SoccerTwos/SoccerTwos-6499554.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4106f1c3e4d1fb0335392d50010bc3ee02641ee70eee67d288b52c06da73bc
3
+ size 28422481
SoccerTwos/checkpoint.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ebd6a82b31bd98d2125299eb402370bf98753ca9b5a36f629feda9cc3b44a4f
3
  size 28422481
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfba214ab078185e37c73effed55ba578d6b1351659755ae082e5ef920a94d27
3
  size 28422481
SoccerTwos/events.out.tfevents.1685605327.fingerdl3.411510.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cbbdb9aa636e180c948965726339b46278095e7b3a76a9d6804cd4c5225e441
3
+ size 50586931
config.json CHANGED
@@ -1 +1 @@
1
- {"default_settings": null, "behaviors": {"SoccerTwos": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 50000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 2000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": "./training-envs-executables/SoccerTwos/SoccerTwos.x86_64", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true}, "environment_parameters": null, "checkpoint_settings": {"run_id": "SoccerTwos", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
 
1
+ {"default_settings": null, "behaviors": {"SoccerTwos": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 50000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 10000, "team_change": 50000, "swap_steps": 2000, "window": 20, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": "./training-envs-executables/SoccerTwos/SoccerTwos.x86_64", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true}, "environment_parameters": null, "checkpoint_settings": {"run_id": "SoccerTwos", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
configuration.yaml CHANGED
@@ -5,7 +5,7 @@ behaviors:
5
  hyperparameters:
6
  batch_size: 2048
7
  buffer_size: 20480
8
- learning_rate: 0.0003
9
  beta: 0.005
10
  epsilon: 0.2
11
  lambd: 0.95
@@ -42,10 +42,10 @@ behaviors:
42
  summary_freq: 10000
43
  threaded: false
44
  self_play:
45
- save_steps: 50000
46
- team_change: 200000
47
  swap_steps: 2000
48
- window: 10
49
  play_against_latest_model_ratio: 0.5
50
  initial_elo: 1200.0
51
  behavioral_cloning: null
 
5
  hyperparameters:
6
  batch_size: 2048
7
  buffer_size: 20480
8
+ learning_rate: 0.003
9
  beta: 0.005
10
  epsilon: 0.2
11
  lambd: 0.95
 
42
  summary_freq: 10000
43
  threaded: false
44
  self_play:
45
+ save_steps: 10000
46
+ team_change: 50000
47
  swap_steps: 2000
48
+ window: 20
49
  play_against_latest_model_ratio: 0.5
50
  initial_elo: 1200.0
51
  behavioral_cloning: null
run_logs/Player-0.log CHANGED
The diff for this file is too large to render. See raw diff
 
run_logs/timers.json CHANGED
@@ -2,75 +2,75 @@
2
  "name": "root",
3
  "gauges": {
4
  "SoccerTwos.Policy.Entropy.mean": {
5
- "value": 2.556478977203369,
6
- "min": 0.8628996014595032,
7
- "max": 3.295734405517578,
8
  "count": 5000
9
  },
10
  "SoccerTwos.Policy.Entropy.sum": {
11
- "value": 58819.46875,
12
- "min": 9834.373046875,
13
- "max": 155859.171875,
14
  "count": 5000
15
  },
16
  "SoccerTwos.Environment.EpisodeLength.mean": {
17
- "value": 320.0625,
18
- "min": 256.94444444444446,
19
  "max": 999.0,
20
  "count": 5000
21
  },
22
  "SoccerTwos.Environment.EpisodeLength.sum": {
23
- "value": 20484.0,
24
- "min": 15984.0,
25
- "max": 25532.0,
26
  "count": 5000
27
  },
28
  "SoccerTwos.Self-play.ELO.mean": {
29
- "value": 1246.1294126353869,
30
- "min": 1187.3139795928823,
31
- "max": 1246.4191325003276,
32
- "count": 444
33
  },
34
  "SoccerTwos.Self-play.ELO.sum": {
35
- "value": 37383.88237906161,
36
- "min": 2374.6279591857647,
37
- "max": 37383.88237906161,
38
- "count": 444
39
  },
40
  "SoccerTwos.Step.mean": {
41
- "value": 49999430.0,
42
- "min": 9784.0,
43
- "max": 49999430.0,
44
  "count": 5000
45
  },
46
  "SoccerTwos.Step.sum": {
47
- "value": 49999430.0,
48
- "min": 9784.0,
49
- "max": 49999430.0,
50
  "count": 5000
51
  },
52
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
53
- "value": 0.012056336738169193,
54
- "min": -0.10242440551519394,
55
- "max": 0.042053915560245514,
56
  "count": 5000
57
  },
58
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
59
- "value": 0.37374642491340637,
60
- "min": -1.536352276802063,
61
- "max": 1.0224354267120361,
62
  "count": 5000
63
  },
64
  "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
65
- "value": 0.012648879550397396,
66
- "min": -0.10245412588119507,
67
- "max": 0.04426921531558037,
68
  "count": 5000
69
  },
70
  "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
71
- "value": 0.3921152651309967,
72
- "min": -1.5367718935012817,
73
- "max": 1.1084165573120117,
74
  "count": 5000
75
  },
76
  "SoccerTwos.Environment.CumulativeReward.mean": {
@@ -86,27 +86,27 @@
86
  "count": 5000
87
  },
88
  "SoccerTwos.Policy.ExtrinsicReward.mean": {
89
- "value": -0.1501677425638322,
90
- "min": -0.7717600007851918,
91
- "max": 0.43691111273235744,
92
  "count": 5000
93
  },
94
  "SoccerTwos.Policy.ExtrinsicReward.sum": {
95
- "value": -4.655200019478798,
96
- "min": -16.417999982833862,
97
- "max": 8.89359986782074,
98
  "count": 5000
99
  },
100
  "SoccerTwos.Environment.GroupCumulativeReward.mean": {
101
- "value": -0.1501677425638322,
102
- "min": -0.7717600007851918,
103
- "max": 0.43691111273235744,
104
  "count": 5000
105
  },
106
  "SoccerTwos.Environment.GroupCumulativeReward.sum": {
107
- "value": -4.655200019478798,
108
- "min": -16.417999982833862,
109
- "max": 8.89359986782074,
110
  "count": 5000
111
  },
112
  "SoccerTwos.IsTraining.mean": {
@@ -122,81 +122,81 @@
122
  "count": 5000
123
  },
124
  "SoccerTwos.Losses.PolicyLoss.mean": {
125
- "value": 0.01427030615353336,
126
- "min": 0.009354956900763985,
127
- "max": 0.025422956169738123,
128
- "count": 2286
129
  },
130
  "SoccerTwos.Losses.PolicyLoss.sum": {
131
- "value": 0.01427030615353336,
132
- "min": 0.009354956900763985,
133
- "max": 0.025422956169738123,
134
- "count": 2286
135
  },
136
  "SoccerTwos.Losses.ValueLoss.mean": {
137
- "value": 0.009385776737083991,
138
- "min": 2.688042489695323e-12,
139
- "max": 0.02662869206542382,
140
- "count": 2286
141
  },
142
  "SoccerTwos.Losses.ValueLoss.sum": {
143
- "value": 0.009385776737083991,
144
- "min": 2.688042489695323e-12,
145
- "max": 0.02662869206542382,
146
- "count": 2286
147
  },
148
  "SoccerTwos.Losses.BaselineLoss.mean": {
149
- "value": 0.00949315377511084,
150
- "min": 4.768202434453361e-12,
151
- "max": 0.027282530008233153,
152
- "count": 2286
153
  },
154
  "SoccerTwos.Losses.BaselineLoss.sum": {
155
- "value": 0.00949315377511084,
156
- "min": 4.768202434453361e-12,
157
- "max": 0.027282530008233153,
158
- "count": 2286
159
  },
160
  "SoccerTwos.Policy.LearningRate.mean": {
161
- "value": 0.0003,
162
- "min": 0.0003,
163
- "max": 0.0003,
164
- "count": 2286
165
  },
166
  "SoccerTwos.Policy.LearningRate.sum": {
167
- "value": 0.0003,
168
- "min": 0.0003,
169
- "max": 0.0003,
170
- "count": 2286
171
  },
172
  "SoccerTwos.Policy.Epsilon.mean": {
173
  "value": 0.20000000000000007,
174
  "min": 0.20000000000000007,
175
  "max": 0.20000000000000007,
176
- "count": 2286
177
  },
178
  "SoccerTwos.Policy.Epsilon.sum": {
179
  "value": 0.20000000000000007,
180
  "min": 0.20000000000000007,
181
  "max": 0.20000000000000007,
182
- "count": 2286
183
  },
184
  "SoccerTwos.Policy.Beta.mean": {
185
  "value": 0.005000000000000001,
186
  "min": 0.005000000000000001,
187
  "max": 0.005000000000000001,
188
- "count": 2286
189
  },
190
  "SoccerTwos.Policy.Beta.sum": {
191
  "value": 0.005000000000000001,
192
  "min": 0.005000000000000001,
193
  "max": 0.005000000000000001,
194
- "count": 2286
195
  }
196
  },
197
  "metadata": {
198
  "timer_format_version": "0.1.0",
199
- "start_time_seconds": "1685525223",
200
  "python_version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]",
201
  "command_line_arguments": "/home/wyt/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --force",
202
  "mlagents_version": "0.31.0.dev0",
@@ -204,59 +204,59 @@
204
  "communication_protocol_version": "1.5.0",
205
  "pytorch_version": "1.11.0+cu102",
206
  "numpy_version": "1.21.2",
207
- "end_time_seconds": "1685584266"
208
  },
209
- "total": 59042.89769419585,
210
  "count": 1,
211
- "self": 0.21814238373190165,
212
  "children": {
213
  "run_training.setup": {
214
- "total": 0.0063062182161957026,
215
  "count": 1,
216
- "self": 0.0063062182161957026
217
  },
218
  "TrainerController.start_learning": {
219
- "total": 59042.673245593905,
220
  "count": 1,
221
- "self": 54.184389336034656,
222
  "children": {
223
  "TrainerController._reset_env": {
224
- "total": 18.66194161004387,
225
- "count": 249,
226
- "self": 18.66194161004387
227
  },
228
  "TrainerController.advance": {
229
- "total": 58969.67677217396,
230
- "count": 3249605,
231
- "self": 52.1698747985065,
232
  "children": {
233
  "env_step": {
234
- "total": 44698.29575628159,
235
- "count": 3249605,
236
- "self": 28578.731281354558,
237
  "children": {
238
  "SubprocessEnvManager._take_step": {
239
- "total": 16084.75626065908,
240
- "count": 3249605,
241
- "self": 298.7151764656883,
242
  "children": {
243
  "TorchPolicy.evaluate": {
244
- "total": 15786.041084193392,
245
- "count": 6463122,
246
- "self": 15786.041084193392
247
  }
248
  }
249
  },
250
  "workers": {
251
- "total": 34.80821426794864,
252
- "count": 3249605,
253
  "self": 0.0,
254
  "children": {
255
  "worker_root": {
256
- "total": 58964.13316858723,
257
- "count": 3249605,
258
  "is_parallel": true,
259
- "self": 36394.802758344915,
260
  "children": {
261
  "run_training.setup": {
262
  "total": 0.0,
@@ -265,48 +265,48 @@
265
  "self": 0.0,
266
  "children": {
267
  "steps_from_proto": {
268
- "total": 0.0016105701215565205,
269
  "count": 2,
270
  "is_parallel": true,
271
- "self": 0.00037895189598202705,
272
  "children": {
273
  "_process_rank_one_or_two_observation": {
274
- "total": 0.0012316182255744934,
275
  "count": 8,
276
  "is_parallel": true,
277
- "self": 0.0012316182255744934
278
  }
279
  }
280
  },
281
  "UnityEnvironment.step": {
282
- "total": 0.016228169901296496,
283
  "count": 1,
284
  "is_parallel": true,
285
- "self": 0.00037772185169160366,
286
  "children": {
287
  "UnityEnvironment._generate_step_input": {
288
- "total": 0.00032189092598855495,
289
  "count": 1,
290
  "is_parallel": true,
291
- "self": 0.00032189092598855495
292
  },
293
  "communicator.exchange": {
294
- "total": 0.01430563093163073,
295
  "count": 1,
296
  "is_parallel": true,
297
- "self": 0.01430563093163073
298
  },
299
  "steps_from_proto": {
300
- "total": 0.0012229261919856071,
301
  "count": 2,
302
  "is_parallel": true,
303
- "self": 0.0002754807937890291,
304
  "children": {
305
  "_process_rank_one_or_two_observation": {
306
- "total": 0.000947445398196578,
307
  "count": 8,
308
  "is_parallel": true,
309
- "self": 0.000947445398196578
310
  }
311
  }
312
  }
@@ -315,50 +315,50 @@
315
  }
316
  },
317
  "UnityEnvironment.step": {
318
- "total": 22569.036006505834,
319
- "count": 3249604,
320
  "is_parallel": true,
321
- "self": 1224.9911103511695,
322
  "children": {
323
  "UnityEnvironment._generate_step_input": {
324
- "total": 875.6204554305878,
325
- "count": 3249604,
326
  "is_parallel": true,
327
- "self": 875.6204554305878
328
  },
329
  "communicator.exchange": {
330
- "total": 16670.457968858536,
331
- "count": 3249604,
332
  "is_parallel": true,
333
- "self": 16670.457968858536
334
  },
335
  "steps_from_proto": {
336
- "total": 3797.9664718655404,
337
- "count": 6499208,
338
  "is_parallel": true,
339
- "self": 776.744841856882,
340
  "children": {
341
  "_process_rank_one_or_two_observation": {
342
- "total": 3021.2216300086584,
343
- "count": 25996832,
344
  "is_parallel": true,
345
- "self": 3021.2216300086584
346
  }
347
  }
348
  }
349
  }
350
  },
351
  "steps_from_proto": {
352
- "total": 0.2944037364795804,
353
- "count": 496,
354
  "is_parallel": true,
355
- "self": 0.0608798258472234,
356
  "children": {
357
  "_process_rank_one_or_two_observation": {
358
- "total": 0.233523910632357,
359
- "count": 1984,
360
  "is_parallel": true,
361
- "self": 0.233523910632357
362
  }
363
  }
364
  }
@@ -369,31 +369,31 @@
369
  }
370
  },
371
  "trainer_advance": {
372
- "total": 14219.211141093867,
373
- "count": 3249605,
374
- "self": 439.10235753725283,
375
  "children": {
376
  "process_trajectory": {
377
- "total": 5498.635246976977,
378
- "count": 3249605,
379
- "self": 5482.912984529277,
380
  "children": {
381
  "RLTrainer._checkpoint": {
382
- "total": 15.722262447699904,
383
  "count": 100,
384
- "self": 15.722262447699904
385
  }
386
  }
387
  },
388
  "_update_policy": {
389
- "total": 8281.473536579637,
390
- "count": 2286,
391
- "self": 5230.548123164568,
392
  "children": {
393
  "TorchPOCAOptimizer.update": {
394
- "total": 3050.9254134150688,
395
- "count": 68580,
396
- "self": 3050.9254134150688
397
  }
398
  }
399
  }
@@ -402,19 +402,19 @@
402
  }
403
  },
404
  "trainer_threads": {
405
- "total": 5.359761416912079e-07,
406
  "count": 1,
407
- "self": 5.359761416912079e-07
408
  },
409
  "TrainerController._save_models": {
410
- "total": 0.15014193789102137,
411
  "count": 1,
412
- "self": 0.000920719001442194,
413
  "children": {
414
  "RLTrainer._checkpoint": {
415
- "total": 0.14922121888957918,
416
  "count": 1,
417
- "self": 0.14922121888957918
418
  }
419
  }
420
  }
 
2
  "name": "root",
3
  "gauges": {
4
  "SoccerTwos.Policy.Entropy.mean": {
5
+ "value": 2.732017993927002,
6
+ "min": 1.4310749769210815,
7
+ "max": 3.295657157897949,
8
  "count": 5000
9
  },
10
  "SoccerTwos.Policy.Entropy.sum": {
11
+ "value": 46859.57421875,
12
+ "min": 17607.2578125,
13
+ "max": 139971.84375,
14
  "count": 5000
15
  },
16
  "SoccerTwos.Environment.EpisodeLength.mean": {
17
+ "value": 88.23214285714286,
18
+ "min": 49.295918367346935,
19
  "max": 999.0,
20
  "count": 5000
21
  },
22
  "SoccerTwos.Environment.EpisodeLength.sum": {
23
+ "value": 19764.0,
24
+ "min": 13172.0,
25
+ "max": 27080.0,
26
  "count": 5000
27
  },
28
  "SoccerTwos.Self-play.ELO.mean": {
29
+ "value": 1328.9875677323002,
30
+ "min": 1067.6681513782962,
31
+ "max": 1425.1669090363382,
32
+ "count": 4978
33
  },
34
  "SoccerTwos.Self-play.ELO.sum": {
35
+ "value": 148846.60758601764,
36
+ "min": 2371.5191713258605,
37
+ "max": 241218.86328195722,
38
+ "count": 4978
39
  },
40
  "SoccerTwos.Step.mean": {
41
+ "value": 49999973.0,
42
+ "min": 9236.0,
43
+ "max": 49999973.0,
44
  "count": 5000
45
  },
46
  "SoccerTwos.Step.sum": {
47
+ "value": 49999973.0,
48
+ "min": 9236.0,
49
+ "max": 49999973.0,
50
  "count": 5000
51
  },
52
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
53
+ "value": 0.002145185600966215,
54
+ "min": -0.35611632466316223,
55
+ "max": 1.1934101581573486,
56
  "count": 5000
57
  },
58
  "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
59
+ "value": 0.2402607798576355,
60
+ "min": -49.1440544128418,
61
+ "max": 134.99392700195312,
62
  "count": 5000
63
  },
64
  "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
65
+ "value": 0.006087425164878368,
66
+ "min": -0.3684330880641937,
67
+ "max": 1.1617356538772583,
68
  "count": 5000
69
  },
70
  "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
71
+ "value": 0.6817916035652161,
72
+ "min": -50.84376525878906,
73
+ "max": 131.594482421875,
74
  "count": 5000
75
  },
76
  "SoccerTwos.Environment.CumulativeReward.mean": {
 
86
  "count": 5000
87
  },
88
  "SoccerTwos.Policy.ExtrinsicReward.mean": {
89
+ "value": -0.16895714508635656,
90
+ "min": -0.6427789487336811,
91
+ "max": 0.46163683816006307,
92
  "count": 5000
93
  },
94
  "SoccerTwos.Policy.ExtrinsicReward.sum": {
95
+ "value": -18.923200249671936,
96
+ "min": -56.69440001249313,
97
+ "max": 50.39880013465881,
98
  "count": 5000
99
  },
100
  "SoccerTwos.Environment.GroupCumulativeReward.mean": {
101
+ "value": -0.16895714508635656,
102
+ "min": -0.6427789487336811,
103
+ "max": 0.46163683816006307,
104
  "count": 5000
105
  },
106
  "SoccerTwos.Environment.GroupCumulativeReward.sum": {
107
+ "value": -18.923200249671936,
108
+ "min": -56.69440001249313,
109
+ "max": 50.39880013465881,
110
  "count": 5000
111
  },
112
  "SoccerTwos.IsTraining.mean": {
 
122
  "count": 5000
123
  },
124
  "SoccerTwos.Losses.PolicyLoss.mean": {
125
+ "value": 0.01943461374224474,
126
+ "min": 0.011112430859551143,
127
+ "max": 0.03933954979875125,
128
+ "count": 2412
129
  },
130
  "SoccerTwos.Losses.PolicyLoss.sum": {
131
+ "value": 0.01943461374224474,
132
+ "min": 0.011112430859551143,
133
+ "max": 0.03933954979875125,
134
+ "count": 2412
135
  },
136
  "SoccerTwos.Losses.ValueLoss.mean": {
137
+ "value": 0.058215494453907016,
138
+ "min": 5.623583623067437e-06,
139
+ "max": 17.72092866244105,
140
+ "count": 2412
141
  },
142
  "SoccerTwos.Losses.ValueLoss.sum": {
143
+ "value": 0.058215494453907016,
144
+ "min": 5.623583623067437e-06,
145
+ "max": 17.72092866244105,
146
+ "count": 2412
147
  },
148
  "SoccerTwos.Losses.BaselineLoss.mean": {
149
+ "value": 0.059470121189951895,
150
+ "min": 1.4972809033982533e-05,
151
+ "max": 20.085067470418288,
152
+ "count": 2412
153
  },
154
  "SoccerTwos.Losses.BaselineLoss.sum": {
155
+ "value": 0.059470121189951895,
156
+ "min": 1.4972809033982533e-05,
157
+ "max": 20.085067470418288,
158
+ "count": 2412
159
  },
160
  "SoccerTwos.Policy.LearningRate.mean": {
161
+ "value": 0.003000000000000001,
162
+ "min": 0.003000000000000001,
163
+ "max": 0.003000000000000001,
164
+ "count": 2412
165
  },
166
  "SoccerTwos.Policy.LearningRate.sum": {
167
+ "value": 0.003000000000000001,
168
+ "min": 0.003000000000000001,
169
+ "max": 0.003000000000000001,
170
+ "count": 2412
171
  },
172
  "SoccerTwos.Policy.Epsilon.mean": {
173
  "value": 0.20000000000000007,
174
  "min": 0.20000000000000007,
175
  "max": 0.20000000000000007,
176
+ "count": 2412
177
  },
178
  "SoccerTwos.Policy.Epsilon.sum": {
179
  "value": 0.20000000000000007,
180
  "min": 0.20000000000000007,
181
  "max": 0.20000000000000007,
182
+ "count": 2412
183
  },
184
  "SoccerTwos.Policy.Beta.mean": {
185
  "value": 0.005000000000000001,
186
  "min": 0.005000000000000001,
187
  "max": 0.005000000000000001,
188
+ "count": 2412
189
  },
190
  "SoccerTwos.Policy.Beta.sum": {
191
  "value": 0.005000000000000001,
192
  "min": 0.005000000000000001,
193
  "max": 0.005000000000000001,
194
+ "count": 2412
195
  }
196
  },
197
  "metadata": {
198
  "timer_format_version": "0.1.0",
199
+ "start_time_seconds": "1685605326",
200
  "python_version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]",
201
  "command_line_arguments": "/home/wyt/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --force",
202
  "mlagents_version": "0.31.0.dev0",
 
204
  "communication_protocol_version": "1.5.0",
205
  "pytorch_version": "1.11.0+cu102",
206
  "numpy_version": "1.21.2",
207
+ "end_time_seconds": "1685658486"
208
  },
209
+ "total": 53160.56620799401,
210
  "count": 1,
211
+ "self": 0.21856565307825804,
212
  "children": {
213
  "run_training.setup": {
214
+ "total": 0.006621055072173476,
215
  "count": 1,
216
+ "self": 0.006621055072173476
217
  },
218
  "TrainerController.start_learning": {
219
+ "total": 53160.34102128586,
220
  "count": 1,
221
+ "self": 60.36968820123002,
222
  "children": {
223
  "TrainerController._reset_env": {
224
+ "total": 21.765231831697747,
225
+ "count": 995,
226
+ "self": 21.765231831697747
227
  },
228
  "TrainerController.advance": {
229
+ "total": 53078.05931469775,
230
+ "count": 3451096,
231
+ "self": 56.15520123485476,
232
  "children": {
233
  "env_step": {
234
+ "total": 39740.48723199195,
235
+ "count": 3451096,
236
+ "self": 29044.75445716479,
237
  "children": {
238
  "SubprocessEnvManager._take_step": {
239
+ "total": 10660.52838735492,
240
+ "count": 3451096,
241
+ "self": 288.1409287536517,
242
  "children": {
243
  "TorchPolicy.evaluate": {
244
+ "total": 10372.387458601268,
245
+ "count": 6469632,
246
+ "self": 10372.387458601268
247
  }
248
  }
249
  },
250
  "workers": {
251
+ "total": 35.20438747224398,
252
+ "count": 3451096,
253
  "self": 0.0,
254
  "children": {
255
  "worker_root": {
256
+ "total": 53080.893450592645,
257
+ "count": 3451096,
258
  "is_parallel": true,
259
+ "self": 30041.395939050708,
260
  "children": {
261
  "run_training.setup": {
262
  "total": 0.0,
 
265
  "self": 0.0,
266
  "children": {
267
  "steps_from_proto": {
268
+ "total": 0.001561193959787488,
269
  "count": 2,
270
  "is_parallel": true,
271
+ "self": 0.0003742906264960766,
272
  "children": {
273
  "_process_rank_one_or_two_observation": {
274
+ "total": 0.0011869033332914114,
275
  "count": 8,
276
  "is_parallel": true,
277
+ "self": 0.0011869033332914114
278
  }
279
  }
280
  },
281
  "UnityEnvironment.step": {
282
+ "total": 0.016142722917720675,
283
  "count": 1,
284
  "is_parallel": true,
285
+ "self": 0.0003737660590559244,
286
  "children": {
287
  "UnityEnvironment._generate_step_input": {
288
+ "total": 0.0003036879934370518,
289
  "count": 1,
290
  "is_parallel": true,
291
+ "self": 0.0003036879934370518
292
  },
293
  "communicator.exchange": {
294
+ "total": 0.01421576295979321,
295
  "count": 1,
296
  "is_parallel": true,
297
+ "self": 0.01421576295979321
298
  },
299
  "steps_from_proto": {
300
+ "total": 0.0012495059054344893,
301
  "count": 2,
302
  "is_parallel": true,
303
+ "self": 0.00025913421995937824,
304
  "children": {
305
  "_process_rank_one_or_two_observation": {
306
+ "total": 0.000990371685475111,
307
  "count": 8,
308
  "is_parallel": true,
309
+ "self": 0.000990371685475111
310
  }
311
  }
312
  }
 
315
  }
316
  },
317
  "UnityEnvironment.step": {
318
+ "total": 23038.307036999613,
319
+ "count": 3451095,
320
  "is_parallel": true,
321
+ "self": 1248.8022941302042,
322
  "children": {
323
  "UnityEnvironment._generate_step_input": {
324
+ "total": 857.56477716472,
325
+ "count": 3451095,
326
  "is_parallel": true,
327
+ "self": 857.56477716472
328
  },
329
  "communicator.exchange": {
330
+ "total": 17020.58405685937,
331
+ "count": 3451095,
332
  "is_parallel": true,
333
+ "self": 17020.58405685937
334
  },
335
  "steps_from_proto": {
336
+ "total": 3911.3559088453185,
337
+ "count": 6902190,
338
  "is_parallel": true,
339
+ "self": 814.7830948443152,
340
  "children": {
341
  "_process_rank_one_or_two_observation": {
342
+ "total": 3096.5728140010033,
343
+ "count": 27608760,
344
  "is_parallel": true,
345
+ "self": 3096.5728140010033
346
  }
347
  }
348
  }
349
  }
350
  },
351
  "steps_from_proto": {
352
+ "total": 1.1904745423235,
353
+ "count": 1988,
354
  "is_parallel": true,
355
+ "self": 0.2469224245287478,
356
  "children": {
357
  "_process_rank_one_or_two_observation": {
358
+ "total": 0.9435521177947521,
359
+ "count": 7952,
360
  "is_parallel": true,
361
+ "self": 0.9435521177947521
362
  }
363
  }
364
  }
 
369
  }
370
  },
371
  "trainer_advance": {
372
+ "total": 13281.416881470941,
373
+ "count": 3451096,
374
+ "self": 405.0560196649749,
375
  "children": {
376
  "process_trajectory": {
377
+ "total": 4799.068029365502,
378
+ "count": 3451096,
379
+ "self": 4783.627852224512,
380
  "children": {
381
  "RLTrainer._checkpoint": {
382
+ "total": 15.440177140990272,
383
  "count": 100,
384
+ "self": 15.440177140990272
385
  }
386
  }
387
  },
388
  "_update_policy": {
389
+ "total": 8077.292832440464,
390
+ "count": 2412,
391
+ "self": 5181.555676384829,
392
  "children": {
393
  "TorchPOCAOptimizer.update": {
394
+ "total": 2895.737156055635,
395
+ "count": 72360,
396
+ "self": 2895.737156055635
397
  }
398
  }
399
  }
 
402
  }
403
  },
404
  "trainer_threads": {
405
+ "total": 5.59026375412941e-07,
406
  "count": 1,
407
+ "self": 5.59026375412941e-07
408
  },
409
  "TrainerController._save_models": {
410
+ "total": 0.14678599615581334,
411
  "count": 1,
412
+ "self": 0.0011825382243841887,
413
  "children": {
414
  "RLTrainer._checkpoint": {
415
+ "total": 0.14560345793142915,
416
  "count": 1,
417
+ "self": 0.14560345793142915
418
  }
419
  }
420
  }
run_logs/training_status.json CHANGED
@@ -2,59 +2,59 @@
2
  "SoccerTwos": {
3
  "checkpoints": [
4
  {
5
- "steps": 48499680,
6
- "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-48499680.onnx",
7
  "reward": 0.0,
8
- "creation_time": 1685582717.6973877,
9
  "auxillary_file_paths": [
10
- "results/SoccerTwos/SoccerTwos/SoccerTwos-48499680.pt"
11
  ]
12
  },
13
  {
14
- "steps": 48999460,
15
- "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-48999460.onnx",
16
- "reward": null,
17
- "creation_time": 1685583235.9581857,
18
  "auxillary_file_paths": [
19
- "results/SoccerTwos/SoccerTwos/SoccerTwos-48999460.pt"
20
  ]
21
  },
22
  {
23
- "steps": 49499964,
24
- "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-49499964.onnx",
25
  "reward": 0.0,
26
- "creation_time": 1685583750.5695598,
27
  "auxillary_file_paths": [
28
- "results/SoccerTwos/SoccerTwos/SoccerTwos-49499964.pt"
29
  ]
30
  },
31
  {
32
- "steps": 49999430,
33
- "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-49999430.onnx",
34
  "reward": 0.0,
35
- "creation_time": 1685584265.6484303,
36
  "auxillary_file_paths": [
37
- "results/SoccerTwos/SoccerTwos/SoccerTwos-49999430.pt"
38
  ]
39
  },
40
  {
41
- "steps": 50000890,
42
- "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-50000890.onnx",
43
  "reward": 0.0,
44
- "creation_time": 1685584265.870237,
45
  "auxillary_file_paths": [
46
- "results/SoccerTwos/SoccerTwos/SoccerTwos-50000890.pt"
47
  ]
48
  }
49
  ],
50
- "elo": 1244.212996620815,
51
  "final_checkpoint": {
52
- "steps": 50000890,
53
  "file_path": "results/SoccerTwos/SoccerTwos.onnx",
54
  "reward": 0.0,
55
- "creation_time": 1685584265.870237,
56
  "auxillary_file_paths": [
57
- "results/SoccerTwos/SoccerTwos/SoccerTwos-50000890.pt"
58
  ]
59
  }
60
  },
 
2
  "SoccerTwos": {
3
  "checkpoints": [
4
  {
5
+ "steps": 48499991,
6
+ "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-48499991.onnx",
7
  "reward": 0.0,
8
+ "creation_time": 1685656889.272008,
9
  "auxillary_file_paths": [
10
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-48499991.pt"
11
  ]
12
  },
13
  {
14
+ "steps": 48999788,
15
+ "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-48999788.onnx",
16
+ "reward": 0.0,
17
+ "creation_time": 1685657426.1574485,
18
  "auxillary_file_paths": [
19
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-48999788.pt"
20
  ]
21
  },
22
  {
23
+ "steps": 49499900,
24
+ "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-49499900.onnx",
25
  "reward": 0.0,
26
+ "creation_time": 1685657955.9725094,
27
  "auxillary_file_paths": [
28
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-49499900.pt"
29
  ]
30
  },
31
  {
32
+ "steps": 49999973,
33
+ "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-49999973.onnx",
34
  "reward": 0.0,
35
+ "creation_time": 1685658486.5020576,
36
  "auxillary_file_paths": [
37
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-49999973.pt"
38
  ]
39
  },
40
  {
41
+ "steps": 50000004,
42
+ "file_path": "results/SoccerTwos/SoccerTwos/SoccerTwos-50000004.onnx",
43
  "reward": 0.0,
44
+ "creation_time": 1685658486.6577213,
45
  "auxillary_file_paths": [
46
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-50000004.pt"
47
  ]
48
  }
49
  ],
50
+ "elo": 1327.5113176463833,
51
  "final_checkpoint": {
52
+ "steps": 50000004,
53
  "file_path": "results/SoccerTwos/SoccerTwos.onnx",
54
  "reward": 0.0,
55
+ "creation_time": 1685658486.6577213,
56
  "auxillary_file_paths": [
57
+ "results/SoccerTwos/SoccerTwos/SoccerTwos-50000004.pt"
58
  ]
59
  }
60
  },