enaitzb commited on
Commit
5e8725c
1 Parent(s): 1176d05

First push

Browse files
Goalie.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528fe324137bdefc11d9df1e354ad7332ccf89e147e81e1630e2fd2b2144e9d3
3
+ size 2592006
Goalie/Goalie-1499980.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00ad4187d24ba32a1cb4211213ec9ca33b8c3673b689c62780dd40cd6b64a771
3
+ size 2592006
Goalie/Goalie-1499980.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b0ebb5fee644cbcbcfb2777aef65c79ecb12ecd6a18ce8568e08d662482eabe
3
+ size 32773832
Goalie/Goalie-1807818.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528fe324137bdefc11d9df1e354ad7332ccf89e147e81e1630e2fd2b2144e9d3
3
+ size 2592006
Goalie/Goalie-1807818.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2078a259efee484acea7aecdd1edfc10ff38d2c593a7e1b8216f530d2ddcddbe
3
+ size 32773832
Goalie/Goalie-499419.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c602068819b97fd30d4e67ffa292827a62dc8f1e761dcd576500c323cb25eb98
3
+ size 2592006
Goalie/Goalie-499419.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c72036980a75642f7e5d9078fcb440161b51df6ad897b8234f14f7eea9a733c
3
+ size 32773714
Goalie/Goalie-999974.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d208149066546b2cbdd5dc275d4df7f34acf0afc5651460bf68dd215bf886dd
3
+ size 2592006
Goalie/Goalie-999974.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b21e5a83916cbbae7958cf2969f95b0cdbb035e7002eaa2f3081f65d7b4162f
3
+ size 32773714
Goalie/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:074530d75a0e425d5c5fed68f395cdbf32b1f613aa92204ffa14015f46de5e21
3
+ size 32773360
Goalie/events.out.tfevents.1702974737.wkm0572l.17640.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04fe232bbbb85f35981575fc99b0e2d36b77b350830a095f1d37d4c732748bda
3
+ size 878789
README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: ml-agents
3
+ tags:
4
+ - Goalie
5
+ - deep-reinforcement-learning
6
+ - reinforcement-learning
7
+ - ML-Agents-Goalie
8
+ ---
9
+
10
+ # **poca** Agent playing **Goalie**
11
+ This is a trained model of a **poca** agent playing **Goalie**
12
+ using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
13
+
14
+ ## Usage (with ML-Agents)
15
+ The Documentation: https://unity-technologies.github.io/ml-agents/ML-Agents-Toolkit-Documentation/
16
+
17
+ We wrote a complete tutorial to learn to train your first agent using ML-Agents and publish it to the Hub:
18
+ - A *short tutorial* where you teach Huggy the Dog 🐶 to fetch the stick and then play with him directly in your
19
+ browser: https://huggingface.co/learn/deep-rl-course/unitbonus1/introduction
20
+ - A *longer tutorial* to understand how works ML-Agents:
21
+ https://huggingface.co/learn/deep-rl-course/unit5/introduction
22
+
23
+ ### Resume the training
24
+ ```bash
25
+ mlagents-learn <your_configuration_file_path.yaml> --run-id=<run_id> --resume
26
+ ```
27
+
28
+ ### Watch your Agent play
29
+ You can watch your agent **playing directly in your browser**
30
+
31
+ 1. If the environment is part of ML-Agents official environments, go to https://huggingface.co/unity
32
+ 2. Step 1: Find your model_id: enaitzb/poca-SoccerTwos
33
+ 3. Step 2: Select your *.nn /*.onnx file
34
+ 4. Click on Watch the agent play 👀
35
+
Striker.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946847c50a17e2840a72382dc711f3fdcd0d97de14359f55803a7945f811f1fd
3
+ size 1682731
Striker/Striker-1499979.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c591fe1c4c4ebdcb6ec249edc885bc6fa764eb3f77a0aa296f749d21da44e7d
3
+ size 1682731
Striker/Striker-1499979.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78f5abe4f082023ffc70e96edbdb195d4dc584b7b05f6502c21a598f241f43ca
3
+ size 27655842
Striker/Striker-1802718.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946847c50a17e2840a72382dc711f3fdcd0d97de14359f55803a7945f811f1fd
3
+ size 1682731
Striker/Striker-1802718.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8516c2db87188995f97de48da6ed7b1b22d9b66e72513be52b52c4d567df1b43
3
+ size 27655842
Striker/Striker-499317.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5adf593f9c5f99684ad2fc36409bdf2a594b7a8407eb1e974c916b8ef9c3bc38
3
+ size 1682731
Striker/Striker-499317.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4403febc48a359528d6a19b13d57fdb776e5f6b2bf886ace3862a3f1bdea21
3
+ size 27655718
Striker/Striker-999985.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595a72b959b2229f57071d55522ad2ac455c8ec13b01054c80ccb843704a966c
3
+ size 1682731
Striker/Striker-999985.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:353d639e252d1faf4aee5fb6782b0a804089344ac0d473fed3db3e42869fcb2b
3
+ size 27655718
Striker/checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e73e1cc5ad60c39a640fd4739c76772a2f459e59e04cf4d7adf493e734a8bd
3
+ size 27655222
Striker/events.out.tfevents.1702974737.wkm0572l.17640.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1b794a56df21ba4337fe15ff05d9e584eaf1a6425b5a7b5c8ec837b5e1b2f6d
3
+ size 731996
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"default_settings": null, "behaviors": {"Goalie": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 30000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 1000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}, "Striker": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 30000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 4000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "Proba", "initialize_from": null, "load_model": false, "resume": false, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
configuration.yaml ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_settings: null
2
+ behaviors:
3
+ Goalie:
4
+ trainer_type: poca
5
+ hyperparameters:
6
+ batch_size: 2048
7
+ buffer_size: 20480
8
+ learning_rate: 0.0003
9
+ beta: 0.005
10
+ epsilon: 0.2
11
+ lambd: 0.95
12
+ num_epoch: 3
13
+ learning_rate_schedule: constant
14
+ beta_schedule: constant
15
+ epsilon_schedule: constant
16
+ checkpoint_interval: 500000
17
+ network_settings:
18
+ normalize: false
19
+ hidden_units: 512
20
+ num_layers: 2
21
+ vis_encode_type: simple
22
+ memory: null
23
+ goal_conditioning_type: hyper
24
+ deterministic: false
25
+ reward_signals:
26
+ extrinsic:
27
+ gamma: 0.99
28
+ strength: 1.0
29
+ network_settings:
30
+ normalize: false
31
+ hidden_units: 128
32
+ num_layers: 2
33
+ vis_encode_type: simple
34
+ memory: null
35
+ goal_conditioning_type: hyper
36
+ deterministic: false
37
+ init_path: null
38
+ keep_checkpoints: 5
39
+ even_checkpoints: false
40
+ max_steps: 30000000
41
+ time_horizon: 1000
42
+ summary_freq: 10000
43
+ threaded: false
44
+ self_play:
45
+ save_steps: 50000
46
+ team_change: 200000
47
+ swap_steps: 1000
48
+ window: 10
49
+ play_against_latest_model_ratio: 0.5
50
+ initial_elo: 1200.0
51
+ behavioral_cloning: null
52
+ Striker:
53
+ trainer_type: poca
54
+ hyperparameters:
55
+ batch_size: 2048
56
+ buffer_size: 20480
57
+ learning_rate: 0.0003
58
+ beta: 0.005
59
+ epsilon: 0.2
60
+ lambd: 0.95
61
+ num_epoch: 3
62
+ learning_rate_schedule: constant
63
+ beta_schedule: constant
64
+ epsilon_schedule: constant
65
+ checkpoint_interval: 500000
66
+ network_settings:
67
+ normalize: false
68
+ hidden_units: 512
69
+ num_layers: 2
70
+ vis_encode_type: simple
71
+ memory: null
72
+ goal_conditioning_type: hyper
73
+ deterministic: false
74
+ reward_signals:
75
+ extrinsic:
76
+ gamma: 0.99
77
+ strength: 1.0
78
+ network_settings:
79
+ normalize: false
80
+ hidden_units: 128
81
+ num_layers: 2
82
+ vis_encode_type: simple
83
+ memory: null
84
+ goal_conditioning_type: hyper
85
+ deterministic: false
86
+ init_path: null
87
+ keep_checkpoints: 5
88
+ even_checkpoints: false
89
+ max_steps: 30000000
90
+ time_horizon: 1000
91
+ summary_freq: 10000
92
+ threaded: false
93
+ self_play:
94
+ save_steps: 50000
95
+ team_change: 200000
96
+ swap_steps: 4000
97
+ window: 10
98
+ play_against_latest_model_ratio: 0.5
99
+ initial_elo: 1200.0
100
+ behavioral_cloning: null
101
+ env_settings:
102
+ env_path: null
103
+ env_args: null
104
+ base_port: 5005
105
+ num_envs: 1
106
+ num_areas: 1
107
+ timeout_wait: 60
108
+ seed: -1
109
+ max_lifetime_restarts: 10
110
+ restarts_rate_limit_n: 1
111
+ restarts_rate_limit_period_s: 60
112
+ engine_settings:
113
+ width: 84
114
+ height: 84
115
+ quality_level: 5
116
+ time_scale: 20
117
+ target_frame_rate: -1
118
+ capture_frame_rate: 60
119
+ no_graphics: false
120
+ environment_parameters: null
121
+ checkpoint_settings:
122
+ run_id: Proba
123
+ initialize_from: null
124
+ load_model: false
125
+ resume: false
126
+ force: false
127
+ train_model: false
128
+ inference: false
129
+ results_dir: results
130
+ torch_settings:
131
+ device: null
132
+ debug: false
run_logs/timers.json ADDED
@@ -0,0 +1,611 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "root",
3
+ "gauges": {
4
+ "Goalie.Policy.Entropy.mean": {
5
+ "value": 2.6350436210632324,
6
+ "min": 2.6350436210632324,
7
+ "max": 3.295673131942749,
8
+ "count": 180
9
+ },
10
+ "Goalie.Policy.Entropy.sum": {
11
+ "value": 25570.462890625,
12
+ "min": 21048.62109375,
13
+ "max": 376798.96875,
14
+ "count": 180
15
+ },
16
+ "Goalie.Environment.EpisodeLength.mean": {
17
+ "value": 56.851190476190474,
18
+ "min": 46.45891003460208,
19
+ "max": 999.0,
20
+ "count": 180
21
+ },
22
+ "Goalie.Environment.EpisodeLength.sum": {
23
+ "value": 9551.0,
24
+ "min": 9013.0,
25
+ "max": 110862.0,
26
+ "count": 180
27
+ },
28
+ "Goalie.Self-play.ELO.mean": {
29
+ "value": 780.0279731783617,
30
+ "min": 780.0279731783617,
31
+ "max": 1197.0217993396393,
32
+ "count": 178
33
+ },
34
+ "Goalie.Self-play.ELO.sum": {
35
+ "value": 131044.69949396476,
36
+ "min": 1118.4588168213602,
37
+ "max": 170427.22083595095,
38
+ "count": 178
39
+ },
40
+ "Goalie.Step.mean": {
41
+ "value": 1799995.0,
42
+ "min": 9571.0,
43
+ "max": 1799995.0,
44
+ "count": 180
45
+ },
46
+ "Goalie.Step.sum": {
47
+ "value": 1799995.0,
48
+ "min": 9571.0,
49
+ "max": 1799995.0,
50
+ "count": 180
51
+ },
52
+ "Goalie.Policy.ExtrinsicBaselineEstimate.mean": {
53
+ "value": -0.6391041278839111,
54
+ "min": -0.6458126306533813,
55
+ "max": 0.07203566282987595,
56
+ "count": 180
57
+ },
58
+ "Goalie.Policy.ExtrinsicBaselineEstimate.sum": {
59
+ "value": -107.36949157714844,
60
+ "min": -133.0373992919922,
61
+ "max": 1.2246062755584717,
62
+ "count": 180
63
+ },
64
+ "Goalie.Policy.ExtrinsicValueEstimate.mean": {
65
+ "value": -0.6391041278839111,
66
+ "min": -0.6458126306533813,
67
+ "max": 0.07203566282987595,
68
+ "count": 180
69
+ },
70
+ "Goalie.Policy.ExtrinsicValueEstimate.sum": {
71
+ "value": -107.36949157714844,
72
+ "min": -133.0373992919922,
73
+ "max": 1.2246062755584717,
74
+ "count": 180
75
+ },
76
+ "Goalie.Environment.CumulativeReward.mean": {
77
+ "value": 0.059623811895670814,
78
+ "min": 0.045828039333589865,
79
+ "max": 0.9996801018714905,
80
+ "count": 180
81
+ },
82
+ "Goalie.Environment.CumulativeReward.sum": {
83
+ "value": 10.016800398472697,
84
+ "min": 9.021400986239314,
85
+ "max": 10.917801141738892,
86
+ "count": 180
87
+ },
88
+ "Goalie.Policy.ExtrinsicReward.mean": {
89
+ "value": -0.9403761930408931,
90
+ "min": -0.954171972018536,
91
+ "max": 0.9996801018714905,
92
+ "count": 180
93
+ },
94
+ "Goalie.Policy.ExtrinsicReward.sum": {
95
+ "value": -157.98320043087006,
96
+ "min": -204.1928020119667,
97
+ "max": 9.996801018714905,
98
+ "count": 180
99
+ },
100
+ "Goalie.Environment.GroupCumulativeReward.mean": {
101
+ "value": -1.0,
102
+ "min": -1.0,
103
+ "max": 0.0,
104
+ "count": 180
105
+ },
106
+ "Goalie.Environment.GroupCumulativeReward.sum": {
107
+ "value": -168.0,
108
+ "min": -214.0,
109
+ "max": 0.0,
110
+ "count": 180
111
+ },
112
+ "Goalie.IsTraining.mean": {
113
+ "value": 1.0,
114
+ "min": 1.0,
115
+ "max": 1.0,
116
+ "count": 180
117
+ },
118
+ "Goalie.IsTraining.sum": {
119
+ "value": 1.0,
120
+ "min": 1.0,
121
+ "max": 1.0,
122
+ "count": 180
123
+ },
124
+ "Goalie.Losses.PolicyLoss.mean": {
125
+ "value": 0.016466737778197665,
126
+ "min": 0.012115506251575425,
127
+ "max": 0.02352036564067627,
128
+ "count": 86
129
+ },
130
+ "Goalie.Losses.PolicyLoss.sum": {
131
+ "value": 0.016466737778197665,
132
+ "min": 0.012115506251575425,
133
+ "max": 0.02352036564067627,
134
+ "count": 86
135
+ },
136
+ "Goalie.Losses.ValueLoss.mean": {
137
+ "value": 0.022342384917040665,
138
+ "min": 0.0004670941584966689,
139
+ "max": 0.033746559917926786,
140
+ "count": 86
141
+ },
142
+ "Goalie.Losses.ValueLoss.sum": {
143
+ "value": 0.022342384917040665,
144
+ "min": 0.0004670941584966689,
145
+ "max": 0.033746559917926786,
146
+ "count": 86
147
+ },
148
+ "Goalie.Losses.BaselineLoss.mean": {
149
+ "value": 0.02234272056569656,
150
+ "min": 0.0004670941584966689,
151
+ "max": 0.033895690863331156,
152
+ "count": 86
153
+ },
154
+ "Goalie.Losses.BaselineLoss.sum": {
155
+ "value": 0.02234272056569656,
156
+ "min": 0.0004670941584966689,
157
+ "max": 0.033895690863331156,
158
+ "count": 86
159
+ },
160
+ "Goalie.Policy.LearningRate.mean": {
161
+ "value": 0.0003,
162
+ "min": 0.0003,
163
+ "max": 0.0003,
164
+ "count": 86
165
+ },
166
+ "Goalie.Policy.LearningRate.sum": {
167
+ "value": 0.0003,
168
+ "min": 0.0003,
169
+ "max": 0.0003,
170
+ "count": 86
171
+ },
172
+ "Goalie.Policy.Epsilon.mean": {
173
+ "value": 0.20000000000000007,
174
+ "min": 0.20000000000000007,
175
+ "max": 0.20000000000000007,
176
+ "count": 86
177
+ },
178
+ "Goalie.Policy.Epsilon.sum": {
179
+ "value": 0.20000000000000007,
180
+ "min": 0.20000000000000007,
181
+ "max": 0.20000000000000007,
182
+ "count": 86
183
+ },
184
+ "Goalie.Policy.Beta.mean": {
185
+ "value": 0.005000000000000001,
186
+ "min": 0.005000000000000001,
187
+ "max": 0.005000000000000001,
188
+ "count": 86
189
+ },
190
+ "Goalie.Policy.Beta.sum": {
191
+ "value": 0.005000000000000001,
192
+ "min": 0.005000000000000001,
193
+ "max": 0.005000000000000001,
194
+ "count": 86
195
+ },
196
+ "Striker.Policy.Entropy.mean": {
197
+ "value": 2.3203654289245605,
198
+ "min": 2.2051618099212646,
199
+ "max": 3.295681953430176,
200
+ "count": 180
201
+ },
202
+ "Striker.Policy.Entropy.sum": {
203
+ "value": 23760.54296875,
204
+ "min": 15216.9345703125,
205
+ "max": 1400902.125,
206
+ "count": 180
207
+ },
208
+ "Striker.Environment.EpisodeLength.mean": {
209
+ "value": 50.415841584158414,
210
+ "min": 35.613138686131386,
211
+ "max": 999.0,
212
+ "count": 180
213
+ },
214
+ "Striker.Environment.EpisodeLength.sum": {
215
+ "value": 10184.0,
216
+ "min": 8382.0,
217
+ "max": 412654.0,
218
+ "count": 180
219
+ },
220
+ "Striker.Step.mean": {
221
+ "value": 1799842.0,
222
+ "min": 9512.0,
223
+ "max": 1799842.0,
224
+ "count": 180
225
+ },
226
+ "Striker.Step.sum": {
227
+ "value": 1799842.0,
228
+ "min": 9512.0,
229
+ "max": 1799842.0,
230
+ "count": 180
231
+ },
232
+ "Striker.Policy.ExtrinsicBaselineEstimate.mean": {
233
+ "value": 0.6013904213905334,
234
+ "min": -0.16535364091396332,
235
+ "max": 0.6533077955245972,
236
+ "count": 180
237
+ },
238
+ "Striker.Policy.ExtrinsicBaselineEstimate.sum": {
239
+ "value": 120.87947082519531,
240
+ "min": -4.340723991394043,
241
+ "max": 179.00633239746094,
242
+ "count": 180
243
+ },
244
+ "Striker.Policy.ExtrinsicValueEstimate.mean": {
245
+ "value": 0.601508378982544,
246
+ "min": -0.16497276723384857,
247
+ "max": 0.6537038087844849,
248
+ "count": 180
249
+ },
250
+ "Striker.Policy.ExtrinsicValueEstimate.sum": {
251
+ "value": 120.90318298339844,
252
+ "min": -4.237868785858154,
253
+ "max": 179.11483764648438,
254
+ "count": 180
255
+ },
256
+ "Striker.Environment.CumulativeReward.mean": {
257
+ "value": -0.04979403170094413,
258
+ "min": -0.9997200965881348,
259
+ "max": -0.035969343590448156,
260
+ "count": 180
261
+ },
262
+ "Striker.Environment.CumulativeReward.sum": {
263
+ "value": -10.00860037188977,
264
+ "min": -10.743601087480783,
265
+ "max": -9.09960101544857,
266
+ "count": 180
267
+ },
268
+ "Striker.Policy.ExtrinsicReward.mean": {
269
+ "value": 0.8502029683162917,
270
+ "min": -1.9994401931762695,
271
+ "max": 0.891693424351894,
272
+ "count": 180
273
+ },
274
+ "Striker.Policy.ExtrinsicReward.sum": {
275
+ "value": 170.89079663157463,
276
+ "min": -20.052401542663574,
277
+ "max": 244.32399827241898,
278
+ "count": 180
279
+ },
280
+ "Striker.Environment.GroupCumulativeReward.mean": {
281
+ "value": 0.949791041772757,
282
+ "min": 0.0,
283
+ "max": 0.9636321185279066,
284
+ "count": 180
285
+ },
286
+ "Striker.Environment.GroupCumulativeReward.sum": {
287
+ "value": 190.90799939632416,
288
+ "min": 0.0,
289
+ "max": 264.0352004766464,
290
+ "count": 180
291
+ },
292
+ "Striker.Self-play.ELO.mean": {
293
+ "value": 1754.8676722052517,
294
+ "min": 1220.241356428936,
295
+ "max": 1754.8676722052517,
296
+ "count": 174
297
+ },
298
+ "Striker.Self-play.ELO.sum": {
299
+ "value": 354483.26978546084,
300
+ "min": 2454.501043758067,
301
+ "max": 477024.1997551883,
302
+ "count": 174
303
+ },
304
+ "Striker.IsTraining.mean": {
305
+ "value": 1.0,
306
+ "min": 1.0,
307
+ "max": 1.0,
308
+ "count": 180
309
+ },
310
+ "Striker.IsTraining.sum": {
311
+ "value": 1.0,
312
+ "min": 1.0,
313
+ "max": 1.0,
314
+ "count": 180
315
+ },
316
+ "Striker.Losses.PolicyLoss.mean": {
317
+ "value": 0.016374270856613294,
318
+ "min": 0.011526005501703669,
319
+ "max": 0.02352414818791052,
320
+ "count": 85
321
+ },
322
+ "Striker.Losses.PolicyLoss.sum": {
323
+ "value": 0.016374270856613294,
324
+ "min": 0.011526005501703669,
325
+ "max": 0.02352414818791052,
326
+ "count": 85
327
+ },
328
+ "Striker.Losses.ValueLoss.mean": {
329
+ "value": 0.023624402284622193,
330
+ "min": 0.0006618854221111784,
331
+ "max": 0.0355209739257892,
332
+ "count": 85
333
+ },
334
+ "Striker.Losses.ValueLoss.sum": {
335
+ "value": 0.023624402284622193,
336
+ "min": 0.0006618854221111784,
337
+ "max": 0.0355209739257892,
338
+ "count": 85
339
+ },
340
+ "Striker.Losses.BaselineLoss.mean": {
341
+ "value": 0.02373644212881724,
342
+ "min": 0.0006652884049496304,
343
+ "max": 0.03594138734042644,
344
+ "count": 85
345
+ },
346
+ "Striker.Losses.BaselineLoss.sum": {
347
+ "value": 0.02373644212881724,
348
+ "min": 0.0006652884049496304,
349
+ "max": 0.03594138734042644,
350
+ "count": 85
351
+ },
352
+ "Striker.Policy.LearningRate.mean": {
353
+ "value": 0.0003,
354
+ "min": 0.0003,
355
+ "max": 0.0003,
356
+ "count": 85
357
+ },
358
+ "Striker.Policy.LearningRate.sum": {
359
+ "value": 0.0003,
360
+ "min": 0.0003,
361
+ "max": 0.0003,
362
+ "count": 85
363
+ },
364
+ "Striker.Policy.Epsilon.mean": {
365
+ "value": 0.20000000000000007,
366
+ "min": 0.20000000000000007,
367
+ "max": 0.20000000000000007,
368
+ "count": 85
369
+ },
370
+ "Striker.Policy.Epsilon.sum": {
371
+ "value": 0.20000000000000007,
372
+ "min": 0.20000000000000007,
373
+ "max": 0.20000000000000007,
374
+ "count": 85
375
+ },
376
+ "Striker.Policy.Beta.mean": {
377
+ "value": 0.005000000000000001,
378
+ "min": 0.005000000000000001,
379
+ "max": 0.005000000000000001,
380
+ "count": 85
381
+ },
382
+ "Striker.Policy.Beta.sum": {
383
+ "value": 0.005000000000000001,
384
+ "min": 0.005000000000000001,
385
+ "max": 0.005000000000000001,
386
+ "count": 85
387
+ }
388
+ },
389
+ "metadata": {
390
+ "timer_format_version": "0.1.0",
391
+ "start_time_seconds": "1702974733",
392
+ "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
393
+ "command_line_arguments": "/home/operario/anaconda3/envs/mlagents/bin/mlagents-learn ml-agents/config/poca/StrikersVsGoalie.yaml --run-id=Proba",
394
+ "mlagents_version": "1.0.0",
395
+ "mlagents_envs_version": "1.0.0",
396
+ "communication_protocol_version": "1.5.0",
397
+ "pytorch_version": "2.1.1+cu121",
398
+ "numpy_version": "1.21.2",
399
+ "end_time_seconds": "1702980564"
400
+ },
401
+ "total": 5831.547219161999,
402
+ "count": 1,
403
+ "self": 0.004055126997627667,
404
+ "children": {
405
+ "run_training.setup": {
406
+ "total": 0.010500086000320152,
407
+ "count": 1,
408
+ "self": 0.010500086000320152
409
+ },
410
+ "TrainerController.start_learning": {
411
+ "total": 5831.532663949001,
412
+ "count": 1,
413
+ "self": 5.20653796030183,
414
+ "children": {
415
+ "TrainerController._reset_env": {
416
+ "total": 5.0229465030006395,
417
+ "count": 19,
418
+ "self": 5.0229465030006395
419
+ },
420
+ "TrainerController.advance": {
421
+ "total": 5821.1293753806995,
422
+ "count": 360326,
423
+ "self": 6.000143237119119,
424
+ "children": {
425
+ "env_step": {
426
+ "total": 3709.103530842298,
427
+ "count": 360326,
428
+ "self": 3106.020965303721,
429
+ "children": {
430
+ "SubprocessEnvManager._take_step": {
431
+ "total": 599.6483402995009,
432
+ "count": 360326,
433
+ "self": 29.682954025429353,
434
+ "children": {
435
+ "TorchPolicy.evaluate": {
436
+ "total": 569.9653862740715,
437
+ "count": 685680,
438
+ "self": 569.9653862740715
439
+ }
440
+ }
441
+ },
442
+ "workers": {
443
+ "total": 3.434225239076113,
444
+ "count": 360325,
445
+ "self": 0.0,
446
+ "children": {
447
+ "worker_root": {
448
+ "total": 5702.365667279509,
449
+ "count": 360325,
450
+ "is_parallel": true,
451
+ "self": 3209.9978119108673,
452
+ "children": {
453
+ "run_training.setup": {
454
+ "total": 0.0,
455
+ "count": 0,
456
+ "is_parallel": true,
457
+ "self": 0.0,
458
+ "children": {
459
+ "steps_from_proto": {
460
+ "total": 0.0012914009994346998,
461
+ "count": 2,
462
+ "is_parallel": true,
463
+ "self": 0.0003063059994019568,
464
+ "children": {
465
+ "_process_rank_one_or_two_observation": {
466
+ "total": 0.000985095000032743,
467
+ "count": 6,
468
+ "is_parallel": true,
469
+ "self": 0.000985095000032743
470
+ }
471
+ }
472
+ },
473
+ "UnityEnvironment.step": {
474
+ "total": 0.020311455999944883,
475
+ "count": 1,
476
+ "is_parallel": true,
477
+ "self": 0.0004727990008177585,
478
+ "children": {
479
+ "UnityEnvironment._generate_step_input": {
480
+ "total": 0.00023028099985822337,
481
+ "count": 1,
482
+ "is_parallel": true,
483
+ "self": 0.00023028099985822337
484
+ },
485
+ "communicator.exchange": {
486
+ "total": 0.01830136799981119,
487
+ "count": 1,
488
+ "is_parallel": true,
489
+ "self": 0.01830136799981119
490
+ },
491
+ "steps_from_proto": {
492
+ "total": 0.00130700799945771,
493
+ "count": 2,
494
+ "is_parallel": true,
495
+ "self": 0.0002533939996283152,
496
+ "children": {
497
+ "_process_rank_one_or_two_observation": {
498
+ "total": 0.0010536139998293947,
499
+ "count": 6,
500
+ "is_parallel": true,
501
+ "self": 0.0010536139998293947
502
+ }
503
+ }
504
+ }
505
+ }
506
+ }
507
+ }
508
+ },
509
+ "UnityEnvironment.step": {
510
+ "total": 2492.3496092696414,
511
+ "count": 360324,
512
+ "is_parallel": true,
513
+ "self": 116.63121924221014,
514
+ "children": {
515
+ "UnityEnvironment._generate_step_input": {
516
+ "total": 65.49518589350373,
517
+ "count": 360324,
518
+ "is_parallel": true,
519
+ "self": 65.49518589350373
520
+ },
521
+ "communicator.exchange": {
522
+ "total": 1983.5648633142227,
523
+ "count": 360324,
524
+ "is_parallel": true,
525
+ "self": 1983.5648633142227
526
+ },
527
+ "steps_from_proto": {
528
+ "total": 326.6583408197048,
529
+ "count": 720648,
530
+ "is_parallel": true,
531
+ "self": 63.17436062367142,
532
+ "children": {
533
+ "_process_rank_one_or_two_observation": {
534
+ "total": 263.4839801960334,
535
+ "count": 2161944,
536
+ "is_parallel": true,
537
+ "self": 263.4839801960334
538
+ }
539
+ }
540
+ }
541
+ }
542
+ },
543
+ "steps_from_proto": {
544
+ "total": 0.018246098999952665,
545
+ "count": 36,
546
+ "is_parallel": true,
547
+ "self": 0.003693893993840902,
548
+ "children": {
549
+ "_process_rank_one_or_two_observation": {
550
+ "total": 0.014552205006111762,
551
+ "count": 108,
552
+ "is_parallel": true,
553
+ "self": 0.014552205006111762
554
+ }
555
+ }
556
+ }
557
+ }
558
+ }
559
+ }
560
+ }
561
+ }
562
+ },
563
+ "trainer_advance": {
564
+ "total": 2106.0257013012824,
565
+ "count": 720650,
566
+ "self": 37.160836643673065,
567
+ "children": {
568
+ "process_trajectory": {
569
+ "total": 342.6532845196207,
570
+ "count": 720650,
571
+ "self": 341.9924087116178,
572
+ "children": {
573
+ "RLTrainer._checkpoint": {
574
+ "total": 0.6608758080028565,
575
+ "count": 6,
576
+ "self": 0.6608758080028565
577
+ }
578
+ }
579
+ },
580
+ "_update_policy": {
581
+ "total": 1726.2115801379887,
582
+ "count": 173,
583
+ "self": 296.8522167799774,
584
+ "children": {
585
+ "TorchPOCAOptimizer.update": {
586
+ "total": 1429.3593633580113,
587
+ "count": 5190,
588
+ "self": 1429.3593633580113
589
+ }
590
+ }
591
+ }
592
+ }
593
+ }
594
+ }
595
+ },
596
+ "TrainerController._save_models": {
597
+ "total": 0.1738041049993626,
598
+ "count": 1,
599
+ "self": 0.00232954700186383,
600
+ "children": {
601
+ "RLTrainer._checkpoint": {
602
+ "total": 0.17147455799749878,
603
+ "count": 2,
604
+ "self": 0.17147455799749878
605
+ }
606
+ }
607
+ }
608
+ }
609
+ }
610
+ }
611
+ }
run_logs/training_status.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Goalie": {
3
+ "checkpoints": [
4
+ {
5
+ "steps": 499419,
6
+ "file_path": "results/Proba/Goalie/Goalie-499419.onnx",
7
+ "reward": 0.7520077661252939,
8
+ "creation_time": 1702976047.3382895,
9
+ "auxillary_file_paths": [
10
+ "results/Proba/Goalie/Goalie-499419.pt"
11
+ ]
12
+ },
13
+ {
14
+ "steps": 999974,
15
+ "file_path": "results/Proba/Goalie/Goalie-999974.onnx",
16
+ "reward": 0.3689773957824932,
17
+ "creation_time": 1702977402.499321,
18
+ "auxillary_file_paths": [
19
+ "results/Proba/Goalie/Goalie-999974.pt"
20
+ ]
21
+ },
22
+ {
23
+ "steps": 1499980,
24
+ "file_path": "results/Proba/Goalie/Goalie-1499980.onnx",
25
+ "reward": 0.042800000247855984,
26
+ "creation_time": 1702979254.7808285,
27
+ "auxillary_file_paths": [
28
+ "results/Proba/Goalie/Goalie-1499980.pt"
29
+ ]
30
+ },
31
+ {
32
+ "steps": 1807818,
33
+ "file_path": "results/Proba/Goalie/Goalie-1807818.onnx",
34
+ "reward": null,
35
+ "creation_time": 1702980564.4739382,
36
+ "auxillary_file_paths": [
37
+ "results/Proba/Goalie/Goalie-1807818.pt"
38
+ ]
39
+ }
40
+ ],
41
+ "elo": 771.3258240415493,
42
+ "final_checkpoint": {
43
+ "steps": 1807818,
44
+ "file_path": "results/Proba/Goalie.onnx",
45
+ "reward": null,
46
+ "creation_time": 1702980564.4739382,
47
+ "auxillary_file_paths": [
48
+ "results/Proba/Goalie/Goalie-1807818.pt"
49
+ ]
50
+ }
51
+ },
52
+ "Striker": {
53
+ "checkpoints": [
54
+ {
55
+ "steps": 499317,
56
+ "file_path": "results/Proba/Striker/Striker-499317.onnx",
57
+ "reward": -0.5727692818125854,
58
+ "creation_time": 1702976347.5755029,
59
+ "auxillary_file_paths": [
60
+ "results/Proba/Striker/Striker-499317.pt"
61
+ ]
62
+ },
63
+ {
64
+ "steps": 999985,
65
+ "file_path": "results/Proba/Striker/Striker-999985.onnx",
66
+ "reward": -0.07280247349200057,
67
+ "creation_time": 1702977703.5105634,
68
+ "auxillary_file_paths": [
69
+ "results/Proba/Striker/Striker-999985.pt"
70
+ ]
71
+ },
72
+ {
73
+ "steps": 1499979,
74
+ "file_path": "results/Proba/Striker/Striker-1499979.onnx",
75
+ "reward": -0.05407068019914896,
76
+ "creation_time": 1702979607.5258744,
77
+ "auxillary_file_paths": [
78
+ "results/Proba/Striker/Striker-1499979.pt"
79
+ ]
80
+ },
81
+ {
82
+ "steps": 1802718,
83
+ "file_path": "results/Proba/Striker/Striker-1802718.onnx",
84
+ "reward": -0.03104210407228062,
85
+ "creation_time": 1702980564.5476835,
86
+ "auxillary_file_paths": [
87
+ "results/Proba/Striker/Striker-1802718.pt"
88
+ ]
89
+ }
90
+ ],
91
+ "elo": 1755.7615527828686,
92
+ "final_checkpoint": {
93
+ "steps": 1802718,
94
+ "file_path": "results/Proba/Striker.onnx",
95
+ "reward": -0.03104210407228062,
96
+ "creation_time": 1702980564.5476835,
97
+ "auxillary_file_paths": [
98
+ "results/Proba/Striker/Striker-1802718.pt"
99
+ ]
100
+ }
101
+ },
102
+ "metadata": {
103
+ "stats_format_version": "0.3.0",
104
+ "mlagents_version": "1.0.0",
105
+ "torch_version": "2.1.1+cu121"
106
+ }
107
+ }