First push

Browse files

Files changed (10) hide show

README.md +4 -4
SoccerTwos.onnx +3 -0
SoccerTwos/SoccerTwos-10226.onnx +3 -0
SoccerTwos/SoccerTwos-10226.pt +3 -0
SoccerTwos/checkpoint.pt +3 -0
SoccerTwos/events.out.tfevents.1703231756.wkm0572l.11238.0 +3 -0
config.json +1 -1
configuration.yaml +4 -53
run_logs/timers.json +185 -477
run_logs/training_status.json +12 -89

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
 ---
 library_name: ml-agents
 tags:
-- Goalie
 - deep-reinforcement-learning
 - reinforcement-learning
-- ML-Agents-Goalie
 ---
-  # **poca** Agent playing **Goalie**
-  This is a trained model of a **poca** agent playing **Goalie**
   using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
   ## Usage (with ML-Agents)

 ---
 library_name: ml-agents
 tags:
+- SoccerTwos
 - deep-reinforcement-learning
 - reinforcement-learning
+- ML-Agents-SoccerTwos
 ---
+  # **poca** Agent playing **SoccerTwos**
+  This is a trained model of a **poca** agent playing **SoccerTwos**
   using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
   ## Usage (with ML-Agents)

SoccerTwos.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:275a0485aca002cd3c8515f9b79701644da1dbe14f450c70b6957227bbb42bc7
+size 1766874

SoccerTwos/SoccerTwos-10226.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:275a0485aca002cd3c8515f9b79701644da1dbe14f450c70b6957227bbb42bc7
+size 1766874

SoccerTwos/SoccerTwos-10226.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:009fbab2d9a53a6ef264f5e2d17c20c0b3acf7cdee5325cfc1fbba3ebab35e4f
+size 9479442

SoccerTwos/checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a06df9de48ce32dab3fb02eca880b901924b24aa7e3d2521f391886040579c0
+size 9479202

SoccerTwos/events.out.tfevents.1703231756.wkm0572l.11238.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d85cfd3b8131f4551f237a5491837ff6a05060c2438e5f77a25d88095bb1e24
+size 11296

config.json CHANGED Viewed

@@ -1 +1 @@

- {"default_settings": null, "behaviors": {"~~Goalie~~": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": ~~30000000, "time_horizon": 1000, "summary_freq":~~ 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 1000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}, "Striker": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 30000000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": ~~4000~~, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "~~Proba~~", "initialize_from": null, "load_model": false, "resume": false, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}

+ {"default_settings": null, "behaviors": {"SoccerTwos": {"trainer_type": "poca", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "constant", "beta_schedule": "constant", "epsilon_schedule": "constant"}, "checkpoint_interval": 500000, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 5, "even_checkpoints": false, "max_steps": 10000, "time_horizon": 1000, "summary_freq": 10000, "threaded": false, "self_play": {"save_steps": 50000, "team_change": 200000, "swap_steps": 2000, "window": 10, "play_against_latest_model_ratio": 0.5, "initial_elo": 1200.0}, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "Soccer", "initialize_from": null, "load_model": false, "resume": false, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}

configuration.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 default_settings: null
 behaviors:
-  Goalie:
     trainer_type: poca
     hyperparameters:
       batch_size: 2048
@@ -37,63 +37,14 @@ behaviors:
     init_path: null
     keep_checkpoints: 5
     even_checkpoints: false
-    max_steps: 30000000
     time_horizon: 1000
     summary_freq: 10000
     threaded: false
     self_play:
       save_steps: 50000
       team_change: 200000
-      swap_steps: 1000
-      window: 10
-      play_against_latest_model_ratio: 0.5
-      initial_elo: 1200.0
-    behavioral_cloning: null
-  Striker:
-    trainer_type: poca
-    hyperparameters:
-      batch_size: 2048
-      buffer_size: 20480
-      learning_rate: 0.0003
-      beta: 0.005
-      epsilon: 0.2
-      lambd: 0.95
-      num_epoch: 3
-      learning_rate_schedule: constant
-      beta_schedule: constant
-      epsilon_schedule: constant
-    checkpoint_interval: 500000
-    network_settings:
-      normalize: false
-      hidden_units: 512
-      num_layers: 2
-      vis_encode_type: simple
-      memory: null
-      goal_conditioning_type: hyper
-      deterministic: false
-    reward_signals:
-      extrinsic:
-        gamma: 0.99
-        strength: 1.0
-        network_settings:
-          normalize: false
-          hidden_units: 128
-          num_layers: 2
-          vis_encode_type: simple
-          memory: null
-          goal_conditioning_type: hyper
-          deterministic: false
-    init_path: null
-    keep_checkpoints: 5
-    even_checkpoints: false
-    max_steps: 30000000
-    time_horizon: 1000
-    summary_freq: 10000
-    threaded: false
-    self_play:
-      save_steps: 50000
-      team_change: 200000
-      swap_steps: 4000
       window: 10
       play_against_latest_model_ratio: 0.5
       initial_elo: 1200.0
@@ -119,7 +70,7 @@ engine_settings:
   no_graphics: false
 environment_parameters: null
 checkpoint_settings:
-  run_id: Proba
   initialize_from: null
   load_model: false
   resume: false

 default_settings: null
 behaviors:
+  SoccerTwos:
     trainer_type: poca
     hyperparameters:
       batch_size: 2048
     init_path: null
     keep_checkpoints: 5
     even_checkpoints: false
+    max_steps: 10000
     time_horizon: 1000
     summary_freq: 10000
     threaded: false
     self_play:
       save_steps: 50000
       team_change: 200000
+      swap_steps: 2000
       window: 10
       play_against_latest_model_ratio: 0.5
       initial_elo: 1200.0
   no_graphics: false
 environment_parameters: null
 checkpoint_settings:
+  run_id: Soccer
   initialize_from: null
   load_model: false
   resume: false

run_logs/timers.json CHANGED Viewed

@@ -1,454 +1,190 @@
 {
     "name": "root",
     "gauges": {
-        "Goalie.Policy.Entropy.mean": {
-            "value": 2.6350436210632324,
-            "min": 2.6350436210632324,
-            "max": 3.295673131942749,
-            "count": 180
-        },
-        "Goalie.Policy.Entropy.sum": {
-            "value": 25570.462890625,
-            "min": 21048.62109375,
-            "max": 376798.96875,
-            "count": 180
-        },
-        "Goalie.Environment.EpisodeLength.mean": {
-            "value": 56.851190476190474,
-            "min": 46.45891003460208,
-            "max": 999.0,
-            "count": 180
-        },
-        "Goalie.Environment.EpisodeLength.sum": {
-            "value": 9551.0,
-            "min": 9013.0,
-            "max": 110862.0,
-            "count": 180
-        },
-        "Goalie.Self-play.ELO.mean": {
-            "value": 780.0279731783617,
-            "min": 780.0279731783617,
-            "max": 1197.0217993396393,
-            "count": 178
-        },
-        "Goalie.Self-play.ELO.sum": {
-            "value": 131044.69949396476,
-            "min": 1118.4588168213602,
-            "max": 170427.22083595095,
-            "count": 178
-        },
-        "Goalie.Step.mean": {
-            "value": 1799995.0,
-            "min": 9571.0,
-            "max": 1799995.0,
-            "count": 180
-        },
-        "Goalie.Step.sum": {
-            "value": 1799995.0,
-            "min": 9571.0,
-            "max": 1799995.0,
-            "count": 180
-        },
-        "Goalie.Policy.ExtrinsicBaselineEstimate.mean": {
-            "value": -0.6391041278839111,
-            "min": -0.6458126306533813,
-            "max": 0.07203566282987595,
-            "count": 180
-        },
-        "Goalie.Policy.ExtrinsicBaselineEstimate.sum": {
-            "value": -107.36949157714844,
-            "min": -133.0373992919922,
-            "max": 1.2246062755584717,
-            "count": 180
-        },
-        "Goalie.Policy.ExtrinsicValueEstimate.mean": {
-            "value": -0.6391041278839111,
-            "min": -0.6458126306533813,
-            "max": 0.07203566282987595,
-            "count": 180
-        },
-        "Goalie.Policy.ExtrinsicValueEstimate.sum": {
-            "value": -107.36949157714844,
-            "min": -133.0373992919922,
-            "max": 1.2246062755584717,
-            "count": 180
-        },
-        "Goalie.Environment.CumulativeReward.mean": {
-            "value": 0.059623811895670814,
-            "min": 0.045828039333589865,
-            "max": 0.9996801018714905,
-            "count": 180
-        },
-        "Goalie.Environment.CumulativeReward.sum": {
-            "value": 10.016800398472697,
-            "min": 9.021400986239314,
-            "max": 10.917801141738892,
-            "count": 180
-        },
-        "Goalie.Policy.ExtrinsicReward.mean": {
-            "value": -0.9403761930408931,
-            "min": -0.954171972018536,
-            "max": 0.9996801018714905,
-            "count": 180
-        },
-        "Goalie.Policy.ExtrinsicReward.sum": {
-            "value": -157.98320043087006,
-            "min": -204.1928020119667,
-            "max": 9.996801018714905,
-            "count": 180
-        },
-        "Goalie.Environment.GroupCumulativeReward.mean": {
-            "value": -1.0,
-            "min": -1.0,
-            "max": 0.0,
-            "count": 180
-        },
-        "Goalie.Environment.GroupCumulativeReward.sum": {
-            "value": -168.0,
-            "min": -214.0,
-            "max": 0.0,
-            "count": 180
-        },
-        "Goalie.IsTraining.mean": {
-            "value": 1.0,
-            "min": 1.0,
-            "max": 1.0,
-            "count": 180
-        },
-        "Goalie.IsTraining.sum": {
-            "value": 1.0,
-            "min": 1.0,
-            "max": 1.0,
-            "count": 180
-        },
-        "Goalie.Losses.PolicyLoss.mean": {
-            "value": 0.016466737778197665,
-            "min": 0.012115506251575425,
-            "max": 0.02352036564067627,
-            "count": 86
-        },
-        "Goalie.Losses.PolicyLoss.sum": {
-            "value": 0.016466737778197665,
-            "min": 0.012115506251575425,
-            "max": 0.02352036564067627,
-            "count": 86
-        },
-        "Goalie.Losses.ValueLoss.mean": {
-            "value": 0.022342384917040665,
-            "min": 0.0004670941584966689,
-            "max": 0.033746559917926786,
-            "count": 86
-        },
-        "Goalie.Losses.ValueLoss.sum": {
-            "value": 0.022342384917040665,
-            "min": 0.0004670941584966689,
-            "max": 0.033746559917926786,
-            "count": 86
-        },
-        "Goalie.Losses.BaselineLoss.mean": {
-            "value": 0.02234272056569656,
-            "min": 0.0004670941584966689,
-            "max": 0.033895690863331156,
-            "count": 86
-        },
-        "Goalie.Losses.BaselineLoss.sum": {
-            "value": 0.02234272056569656,
-            "min": 0.0004670941584966689,
-            "max": 0.033895690863331156,
-            "count": 86
-        },
-        "Goalie.Policy.LearningRate.mean": {
-            "value": 0.0003,
-            "min": 0.0003,
-            "max": 0.0003,
-            "count": 86
-        },
-        "Goalie.Policy.LearningRate.sum": {
-            "value": 0.0003,
-            "min": 0.0003,
-            "max": 0.0003,
-            "count": 86
-        },
-        "Goalie.Policy.Epsilon.mean": {
-            "value": 0.20000000000000007,
-            "min": 0.20000000000000007,
-            "max": 0.20000000000000007,
-            "count": 86
-        },
-        "Goalie.Policy.Epsilon.sum": {
-            "value": 0.20000000000000007,
-            "min": 0.20000000000000007,
-            "max": 0.20000000000000007,
-            "count": 86
-        },
-        "Goalie.Policy.Beta.mean": {
-            "value": 0.005000000000000001,
-            "min": 0.005000000000000001,
-            "max": 0.005000000000000001,
-            "count": 86
-        },
-        "Goalie.Policy.Beta.sum": {
-            "value": 0.005000000000000001,
-            "min": 0.005000000000000001,
-            "max": 0.005000000000000001,
-            "count": 86
-        },
-        "Striker.Policy.Entropy.mean": {
-            "value": 2.3203654289245605,
-            "min": 2.2051618099212646,
-            "max": 3.295681953430176,
-            "count": 180
-        },
-        "Striker.Policy.Entropy.sum": {
-            "value": 23760.54296875,
-            "min": 15216.9345703125,
-            "max": 1400902.125,
-            "count": 180
-        },
-        "Striker.Environment.EpisodeLength.mean": {
-            "value": 50.415841584158414,
-            "min": 35.613138686131386,
-            "max": 999.0,
-            "count": 180
-        },
-        "Striker.Environment.EpisodeLength.sum": {
-            "value": 10184.0,
-            "min": 8382.0,
-            "max": 412654.0,
-            "count": 180
-        },
-        "Striker.Step.mean": {
-            "value": 1799842.0,
-            "min": 9512.0,
-            "max": 1799842.0,
-            "count": 180
-        },
-        "Striker.Step.sum": {
-            "value": 1799842.0,
-            "min": 9512.0,
-            "max": 1799842.0,
-            "count": 180
-        },
-        "Striker.Policy.ExtrinsicBaselineEstimate.mean": {
-            "value": 0.6013904213905334,
-            "min": -0.16535364091396332,
-            "max": 0.6533077955245972,
-            "count": 180
-        },
-        "Striker.Policy.ExtrinsicBaselineEstimate.sum": {
-            "value": 120.87947082519531,
-            "min": -4.340723991394043,
-            "max": 179.00633239746094,
-            "count": 180
-        },
-        "Striker.Policy.ExtrinsicValueEstimate.mean": {
-            "value": 0.601508378982544,
-            "min": -0.16497276723384857,
-            "max": 0.6537038087844849,
-            "count": 180
-        },
-        "Striker.Policy.ExtrinsicValueEstimate.sum": {
-            "value": 120.90318298339844,
-            "min": -4.237868785858154,
-            "max": 179.11483764648438,
-            "count": 180
-        },
-        "Striker.Environment.CumulativeReward.mean": {
-            "value": -0.04979403170094413,
-            "min": -0.9997200965881348,
-            "max": -0.035969343590448156,
-            "count": 180
-        },
-        "Striker.Environment.CumulativeReward.sum": {
-            "value": -10.00860037188977,
-            "min": -10.743601087480783,
-            "max": -9.09960101544857,
-            "count": 180
-        },
-        "Striker.Policy.ExtrinsicReward.mean": {
-            "value": 0.8502029683162917,
-            "min": -1.9994401931762695,
-            "max": 0.891693424351894,
-            "count": 180
-        },
-        "Striker.Policy.ExtrinsicReward.sum": {
-            "value": 170.89079663157463,
-            "min": -20.052401542663574,
-            "max": 244.32399827241898,
-            "count": 180
-        },
-        "Striker.Environment.GroupCumulativeReward.mean": {
-            "value": 0.949791041772757,
             "min": 0.0,
-            "max": 0.9636321185279066,
-            "count": 180
         },
-        "Striker.Environment.GroupCumulativeReward.sum": {
-            "value": 190.90799939632416,
             "min": 0.0,
-            "max": 264.0352004766464,
-            "count": 180
-        },
-        "Striker.Self-play.ELO.mean": {
-            "value": 1754.8676722052517,
-            "min": 1220.241356428936,
-            "max": 1754.8676722052517,
-            "count": 174
-        },
-        "Striker.Self-play.ELO.sum": {
-            "value": 354483.26978546084,
-            "min": 2454.501043758067,
-            "max": 477024.1997551883,
-            "count": 174
-        },
-        "Striker.IsTraining.mean": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
-            "count": 180
         },
-        "Striker.IsTraining.sum": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
-            "count": 180
-        },
-        "Striker.Losses.PolicyLoss.mean": {
-            "value": 0.016374270856613294,
-            "min": 0.011526005501703669,
-            "max": 0.02352414818791052,
-            "count": 85
-        },
-        "Striker.Losses.PolicyLoss.sum": {
-            "value": 0.016374270856613294,
-            "min": 0.011526005501703669,
-            "max": 0.02352414818791052,
-            "count": 85
-        },
-        "Striker.Losses.ValueLoss.mean": {
-            "value": 0.023624402284622193,
-            "min": 0.0006618854221111784,
-            "max": 0.0355209739257892,
-            "count": 85
-        },
-        "Striker.Losses.ValueLoss.sum": {
-            "value": 0.023624402284622193,
-            "min": 0.0006618854221111784,
-            "max": 0.0355209739257892,
-            "count": 85
-        },
-        "Striker.Losses.BaselineLoss.mean": {
-            "value": 0.02373644212881724,
-            "min": 0.0006652884049496304,
-            "max": 0.03594138734042644,
-            "count": 85
-        },
-        "Striker.Losses.BaselineLoss.sum": {
-            "value": 0.02373644212881724,
-            "min": 0.0006652884049496304,
-            "max": 0.03594138734042644,
-            "count": 85
-        },
-        "Striker.Policy.LearningRate.mean": {
-            "value": 0.0003,
-            "min": 0.0003,
-            "max": 0.0003,
-            "count": 85
-        },
-        "Striker.Policy.LearningRate.sum": {
-            "value": 0.0003,
-            "min": 0.0003,
-            "max": 0.0003,
-            "count": 85
-        },
-        "Striker.Policy.Epsilon.mean": {
-            "value": 0.20000000000000007,
-            "min": 0.20000000000000007,
-            "max": 0.20000000000000007,
-            "count": 85
-        },
-        "Striker.Policy.Epsilon.sum": {
-            "value": 0.20000000000000007,
-            "min": 0.20000000000000007,
-            "max": 0.20000000000000007,
-            "count": 85
-        },
-        "Striker.Policy.Beta.mean": {
-            "value": 0.005000000000000001,
-            "min": 0.005000000000000001,
-            "max": 0.005000000000000001,
-            "count": 85
-        },
-        "Striker.Policy.Beta.sum": {
-            "value": 0.005000000000000001,
-            "min": 0.005000000000000001,
-            "max": 0.005000000000000001,
-            "count": 85
         }
     },
     "metadata": {
         "timer_format_version": "0.1.0",
-        "start_time_seconds": "1702974733",
         "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
-        "command_line_arguments": "/home/operario/anaconda3/envs/mlagents/bin/mlagents-learn ml-agents/config/poca/StrikersVsGoalie.yaml --run-id=Proba",
         "mlagents_version": "1.0.0",
         "mlagents_envs_version": "1.0.0",
         "communication_protocol_version": "1.5.0",
         "pytorch_version": "2.1.1+cu121",
         "numpy_version": "1.21.2",
-        "end_time_seconds": "1702980564"
     },
-    "total": 5831.547219161999,
     "count": 1,
-    "self": 0.004055126997627667,
     "children": {
         "run_training.setup": {
-            "total": 0.010500086000320152,
             "count": 1,
-            "self": 0.010500086000320152
         },
         "TrainerController.start_learning": {
-            "total": 5831.532663949001,
             "count": 1,
-            "self": 5.20653796030183,
             "children": {
                 "TrainerController._reset_env": {
-                    "total": 5.0229465030006395,
-                    "count": 19,
-                    "self": 5.0229465030006395
                 },
                 "TrainerController.advance": {
-                    "total": 5821.1293753806995,
-                    "count": 360326,
-                    "self": 6.000143237119119,
                     "children": {
                         "env_step": {
-                            "total": 3709.103530842298,
-                            "count": 360326,
-                            "self": 3106.020965303721,
                             "children": {
                                 "SubprocessEnvManager._take_step": {
-                                    "total": 599.6483402995009,
-                                    "count": 360326,
-                                    "self": 29.682954025429353,
                                     "children": {
                                         "TorchPolicy.evaluate": {
-                                            "total": 569.9653862740715,
-                                            "count": 685680,
-                                            "self": 569.9653862740715
                                         }
                                     }
                                 },
                                 "workers": {
-                                    "total": 3.434225239076113,
-                                    "count": 360325,
                                     "self": 0.0,
                                     "children": {
                                         "worker_root": {
-                                            "total": 5702.365667279509,
-                                            "count": 360325,
                                             "is_parallel": true,
-                                            "self": 3209.9978119108673,
                                             "children": {
                                                 "run_training.setup": {
                                                     "total": 0.0,
@@ -457,48 +193,48 @@
                                                     "self": 0.0,
                                                     "children": {
                                                         "steps_from_proto": {
-                                                            "total": 0.0012914009994346998,
                                                             "count": 2,
                                                             "is_parallel": true,
-                                                            "self": 0.0003063059994019568,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
-                                                                    "total": 0.000985095000032743,
-                                                                    "count": 6,
                                                                     "is_parallel": true,
-                                                                    "self": 0.000985095000032743
                                                                 }
                                                             }
                                                         },
                                                         "UnityEnvironment.step": {
-                                                            "total": 0.020311455999944883,
                                                             "count": 1,
                                                             "is_parallel": true,
-                                                            "self": 0.0004727990008177585,
                                                             "children": {
                                                                 "UnityEnvironment._generate_step_input": {
-                                                                    "total": 0.00023028099985822337,
                                                                     "count": 1,
                                                                     "is_parallel": true,
-                                                                    "self": 0.00023028099985822337
                                                                 },
                                                                 "communicator.exchange": {
-                                                                    "total": 0.01830136799981119,
                                                                     "count": 1,
                                                                     "is_parallel": true,
-                                                                    "self": 0.01830136799981119
                                                                 },
                                                                 "steps_from_proto": {
-                                                                    "total": 0.00130700799945771,
                                                                     "count": 2,
                                                                     "is_parallel": true,
-                                                                    "self": 0.0002533939996283152,
                                                                     "children": {
                                                                         "_process_rank_one_or_two_observation": {
-                                                                            "total": 0.0010536139998293947,
-                                                                            "count": 6,
                                                                             "is_parallel": true,
-                                                                            "self": 0.0010536139998293947
                                                                         }
                                                                     }
                                                                 }
@@ -507,52 +243,38 @@
                                                     }
                                                 },
                                                 "UnityEnvironment.step": {
-                                                    "total": 2492.3496092696414,
-                                                    "count": 360324,
                                                     "is_parallel": true,
-                                                    "self": 116.63121924221014,
                                                     "children": {
                                                         "UnityEnvironment._generate_step_input": {
-                                                            "total": 65.49518589350373,
-                                                            "count": 360324,
                                                             "is_parallel": true,
-                                                            "self": 65.49518589350373
                                                         },
                                                         "communicator.exchange": {
-                                                            "total": 1983.5648633142227,
-                                                            "count": 360324,
                                                             "is_parallel": true,
-                                                            "self": 1983.5648633142227
                                                         },
                                                         "steps_from_proto": {
-                                                            "total": 326.6583408197048,
-                                                            "count": 720648,
                                                             "is_parallel": true,
-                                                            "self": 63.17436062367142,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
-                                                                    "total": 263.4839801960334,
-                                                                    "count": 2161944,
                                                                     "is_parallel": true,
-                                                                    "self": 263.4839801960334
                                                                 }
                                                             }
                                                         }
                                                     }
-                                                },
-                                                "steps_from_proto": {
-                                                    "total": 0.018246098999952665,
-                                                    "count": 36,
-                                                    "is_parallel": true,
-                                                    "self": 0.003693893993840902,
-                                                    "children": {
-                                                        "_process_rank_one_or_two_observation": {
-                                                            "total": 0.014552205006111762,
-                                                            "count": 108,
-                                                            "is_parallel": true,
-                                                            "self": 0.014552205006111762
-                                                        }
-                                                    }
                                                 }
                                             }
                                         }
@@ -561,47 +283,33 @@
                             }
                         },
                         "trainer_advance": {
-                            "total": 2106.0257013012824,
-                            "count": 720650,
-                            "self": 37.160836643673065,
                             "children": {
                                 "process_trajectory": {
-                                    "total": 342.6532845196207,
-                                    "count": 720650,
-                                    "self": 341.9924087116178,
-                                    "children": {
-                                        "RLTrainer._checkpoint": {
-                                            "total": 0.6608758080028565,
-                                            "count": 6,
-                                            "self": 0.6608758080028565
-                                        }
-                                    }
-                                },
-                                "_update_policy": {
-                                    "total": 1726.2115801379887,
-                                    "count": 173,
-                                    "self": 296.8522167799774,
-                                    "children": {
-                                        "TorchPOCAOptimizer.update": {
-                                            "total": 1429.3593633580113,
-                                            "count": 5190,
-                                            "self": 1429.3593633580113
-                                        }
-                                    }
                                 }
                             }
                         }
                     }
                 },
                 "TrainerController._save_models": {
-                    "total": 0.1738041049993626,
                     "count": 1,
-                    "self": 0.00232954700186383,
                     "children": {
                         "RLTrainer._checkpoint": {
-                            "total": 0.17147455799749878,
-                            "count": 2,
-                            "self": 0.17147455799749878
                         }
                     }
                 }

 {
     "name": "root",
     "gauges": {
+        "SoccerTwos.Policy.Entropy.mean": {
+            "value": 3.295698404312134,
+            "min": 3.295698404312134,
+            "max": 3.295698404312134,
+            "count": 1
+        },
+        "SoccerTwos.Policy.Entropy.sum": {
+            "value": 105462.3515625,
+            "min": 105462.3515625,
+            "max": 105462.3515625,
+            "count": 1
+        },
+        "SoccerTwos.Environment.EpisodeLength.mean": {
+            "value": 510.3,
+            "min": 510.3,
+            "max": 510.3,
+            "count": 1
+        },
+        "SoccerTwos.Environment.EpisodeLength.sum": {
+            "value": 20412.0,
+            "min": 20412.0,
+            "max": 20412.0,
+            "count": 1
+        },
+        "SoccerTwos.Self-play.ELO.mean": {
+            "value": 1198.6983882804839,
+            "min": 1198.6983882804839,
+            "max": 1198.6983882804839,
+            "count": 1
+        },
+        "SoccerTwos.Self-play.ELO.sum": {
+            "value": 16781.777435926775,
+            "min": 16781.777435926775,
+            "max": 16781.777435926775,
+            "count": 1
+        },
+        "SoccerTwos.Step.mean": {
+            "value": 9226.0,
+            "min": 9226.0,
+            "max": 9226.0,
+            "count": 1
+        },
+        "SoccerTwos.Step.sum": {
+            "value": 9226.0,
+            "min": 9226.0,
+            "max": 9226.0,
+            "count": 1
+        },
+        "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
+            "value": 0.06891250610351562,
+            "min": 0.06891250610351562,
+            "max": 0.06891250610351562,
+            "count": 1
+        },
+        "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
+            "value": 1.3093376159667969,
+            "min": 1.3093376159667969,
+            "max": 1.3093376159667969,
+            "count": 1
+        },
+        "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
+            "value": 0.06894762814044952,
+            "min": 0.06894762814044952,
+            "max": 0.06894762814044952,
+            "count": 1
+        },
+        "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
+            "value": 1.3100049495697021,
+            "min": 1.3100049495697021,
+            "max": 1.3100049495697021,
+            "count": 1
+        },
+        "SoccerTwos.Environment.CumulativeReward.mean": {
+            "value": 0.0,
             "min": 0.0,
+            "max": 0.0,
+            "count": 1
         },
+        "SoccerTwos.Environment.CumulativeReward.sum": {
+            "value": 0.0,
             "min": 0.0,
+            "max": 0.0,
+            "count": 1
+        },
+        "SoccerTwos.Policy.ExtrinsicReward.mean": {
+            "value": -0.35837894991824504,
+            "min": -0.35837894991824504,
+            "max": -0.35837894991824504,
+            "count": 1
+        },
+        "SoccerTwos.Policy.ExtrinsicReward.sum": {
+            "value": -6.809200048446655,
+            "min": -6.809200048446655,
+            "max": -6.809200048446655,
+            "count": 1
+        },
+        "SoccerTwos.Environment.GroupCumulativeReward.mean": {
+            "value": -0.35837894991824504,
+            "min": -0.35837894991824504,
+            "max": -0.35837894991824504,
+            "count": 1
+        },
+        "SoccerTwos.Environment.GroupCumulativeReward.sum": {
+            "value": -6.809200048446655,
+            "min": -6.809200048446655,
+            "max": -6.809200048446655,
+            "count": 1
+        },
+        "SoccerTwos.IsTraining.mean": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
+            "count": 1
         },
+        "SoccerTwos.IsTraining.sum": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
+            "count": 1
         }
     },
     "metadata": {
         "timer_format_version": "0.1.0",
+        "start_time_seconds": "1703231750",
         "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
+        "command_line_arguments": "/home/operario/anaconda3/envs/mlagents/bin/mlagents-learn SoccerTwos.yaml --run-id=Soccer",
         "mlagents_version": "1.0.0",
         "mlagents_envs_version": "1.0.0",
         "communication_protocol_version": "1.5.0",
         "pytorch_version": "2.1.1+cu121",
         "numpy_version": "1.21.2",
+        "end_time_seconds": "1703231769"
     },
+    "total": 18.828908071000114,
     "count": 1,
+    "self": 0.0038752350001232116,
     "children": {
         "run_training.setup": {
+            "total": 0.011371966999831784,
             "count": 1,
+            "self": 0.011371966999831784
         },
         "TrainerController.start_learning": {
+            "total": 18.81366086900016,
             "count": 1,
+            "self": 0.012658913007271622,
             "children": {
                 "TrainerController._reset_env": {
+                    "total": 6.519004453999969,
+                    "count": 1,
+                    "self": 6.519004453999969
                 },
                 "TrainerController.advance": {
+                    "total": 12.224295807993258,
+                    "count": 1005,
+                    "self": 0.012754022996887215,
                     "children": {
                         "env_step": {
+                            "total": 11.209075801001745,
+                            "count": 1005,
+                            "self": 9.642230745010238,
                             "children": {
                                 "SubprocessEnvManager._take_step": {
+                                    "total": 1.558794789998501,
+                                    "count": 1005,
+                                    "self": 0.07714688002579351,
                                     "children": {
                                         "TorchPolicy.evaluate": {
+                                            "total": 1.4816479099727076,
+                                            "count": 2000,
+                                            "self": 1.4816479099727076
                                         }
                                     }
                                 },
                                 "workers": {
+                                    "total": 0.008050265993006178,
+                                    "count": 1005,
                                     "self": 0.0,
                                     "children": {
                                         "worker_root": {
+                                            "total": 18.191109271007917,
+                                            "count": 1005,
                                             "is_parallel": true,
+                                            "self": 9.978554264029299,
                                             "children": {
                                                 "run_training.setup": {
                                                     "total": 0.0,
                                                     "self": 0.0,
                                                     "children": {
                                                         "steps_from_proto": {
+                                                            "total": 0.0016817149999042158,
                                                             "count": 2,
                                                             "is_parallel": true,
+                                                            "self": 0.0005443020008897292,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
+                                                                    "total": 0.0011374129990144866,
+                                                                    "count": 8,
                                                                     "is_parallel": true,
+                                                                    "self": 0.0011374129990144866
                                                                 }
                                                             }
                                                         },
                                                         "UnityEnvironment.step": {
+                                                            "total": 0.0247552379996705,
                                                             "count": 1,
                                                             "is_parallel": true,
+                                                            "self": 0.0005118170001878752,
                                                             "children": {
                                                                 "UnityEnvironment._generate_step_input": {
+                                                                    "total": 0.0006443479996960377,
                                                                     "count": 1,
                                                                     "is_parallel": true,
+                                                                    "self": 0.0006443479996960377
                                                                 },
                                                                 "communicator.exchange": {
+                                                                    "total": 0.02191044499977579,
                                                                     "count": 1,
                                                                     "is_parallel": true,
+                                                                    "self": 0.02191044499977579
                                                                 },
                                                                 "steps_from_proto": {
+                                                                    "total": 0.0016886280000107945,
                                                                     "count": 2,
                                                                     "is_parallel": true,
+                                                                    "self": 0.0003622190006353776,
                                                                     "children": {
                                                                         "_process_rank_one_or_two_observation": {
+                                                                            "total": 0.001326408999375417,
+                                                                            "count": 8,
                                                                             "is_parallel": true,
+                                                                            "self": 0.001326408999375417
                                                                         }
                                                                     }
                                                                 }
                                                     }
                                                 },
                                                 "UnityEnvironment.step": {
+                                                    "total": 8.212555006978619,
+                                                    "count": 1004,
                                                     "is_parallel": true,
+                                                    "self": 0.3458426219631292,
                                                     "children": {
                                                         "UnityEnvironment._generate_step_input": {
+                                                            "total": 0.20143538798947702,
+                                                            "count": 1004,
                                                             "is_parallel": true,
+                                                            "self": 0.20143538798947702
                                                         },
                                                         "communicator.exchange": {
+                                                            "total": 6.67237666901201,
+                                                            "count": 1004,
                                                             "is_parallel": true,
+                                                            "self": 6.67237666901201
                                                         },
                                                         "steps_from_proto": {
+                                                            "total": 0.9929003280140023,
+                                                            "count": 2008,
                                                             "is_parallel": true,
+                                                            "self": 0.18314762201407575,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
+                                                                    "total": 0.8097527059999265,
+                                                                    "count": 8032,
                                                                     "is_parallel": true,
+                                                                    "self": 0.8097527059999265
                                                                 }
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                             }
                         },
                         "trainer_advance": {
+                            "total": 1.0024659839946253,
+                            "count": 1005,
+                            "self": 0.05726587000845029,
                             "children": {
                                 "process_trajectory": {
+                                    "total": 0.945200113986175,
+                                    "count": 1005,
+                                    "self": 0.945200113986175
                                 }
                             }
                         }
                     }
                 },
+                "trainer_threads": {
+                    "total": 6.089994712965563e-07,
+                    "count": 1,
+                    "self": 6.089994712965563e-07
+                },
                 "TrainerController._save_models": {
+                    "total": 0.05770108500018978,
                     "count": 1,
+                    "self": 0.0008857650000209105,
                     "children": {
                         "RLTrainer._checkpoint": {
+                            "total": 0.05681532000016887,
+                            "count": 1,
+                            "self": 0.05681532000016887
                         }
                     }
                 }

run_logs/training_status.json CHANGED Viewed

@@ -1,101 +1,24 @@
 {
-    "Goalie": {
         "checkpoints": [
             {
-                "steps": 499419,
-                "file_path": "results/Proba/Goalie/Goalie-499419.onnx",
-                "reward": 0.7520077661252939,
-                "creation_time": 1702976047.3382895,
                 "auxillary_file_paths": [
-                    "results/Proba/Goalie/Goalie-499419.pt"
-                ]
-            },
-            {
-                "steps": 999974,
-                "file_path": "results/Proba/Goalie/Goalie-999974.onnx",
-                "reward": 0.3689773957824932,
-                "creation_time": 1702977402.499321,
-                "auxillary_file_paths": [
-                    "results/Proba/Goalie/Goalie-999974.pt"
-                ]
-            },
-            {
-                "steps": 1499980,
-                "file_path": "results/Proba/Goalie/Goalie-1499980.onnx",
-                "reward": 0.042800000247855984,
-                "creation_time": 1702979254.7808285,
-                "auxillary_file_paths": [
-                    "results/Proba/Goalie/Goalie-1499980.pt"
-                ]
-            },
-            {
-                "steps": 1807818,
-                "file_path": "results/Proba/Goalie/Goalie-1807818.onnx",
-                "reward": null,
-                "creation_time": 1702980564.4739382,
-                "auxillary_file_paths": [
-                    "results/Proba/Goalie/Goalie-1807818.pt"
-                ]
-            }
-        ],
-        "elo": 771.3258240415493,
-        "final_checkpoint": {
-            "steps": 1807818,
-            "file_path": "results/Proba/Goalie.onnx",
-            "reward": null,
-            "creation_time": 1702980564.4739382,
-            "auxillary_file_paths": [
-                "results/Proba/Goalie/Goalie-1807818.pt"
-            ]
-        }
-    },
-    "Striker": {
-        "checkpoints": [
-            {
-                "steps": 499317,
-                "file_path": "results/Proba/Striker/Striker-499317.onnx",
-                "reward": -0.5727692818125854,
-                "creation_time": 1702976347.5755029,
-                "auxillary_file_paths": [
-                    "results/Proba/Striker/Striker-499317.pt"
-                ]
-            },
-            {
-                "steps": 999985,
-                "file_path": "results/Proba/Striker/Striker-999985.onnx",
-                "reward": -0.07280247349200057,
-                "creation_time": 1702977703.5105634,
-                "auxillary_file_paths": [
-                    "results/Proba/Striker/Striker-999985.pt"
-                ]
-            },
-            {
-                "steps": 1499979,
-                "file_path": "results/Proba/Striker/Striker-1499979.onnx",
-                "reward": -0.05407068019914896,
-                "creation_time": 1702979607.5258744,
-                "auxillary_file_paths": [
-                    "results/Proba/Striker/Striker-1499979.pt"
-                ]
-            },
-            {
-                "steps": 1802718,
-                "file_path": "results/Proba/Striker/Striker-1802718.onnx",
-                "reward": -0.03104210407228062,
-                "creation_time": 1702980564.5476835,
-                "auxillary_file_paths": [
-                    "results/Proba/Striker/Striker-1802718.pt"
                 ]
             }
         ],
-        "elo": 1755.7615527828686,
         "final_checkpoint": {
-            "steps": 1802718,
-            "file_path": "results/Proba/Striker.onnx",
-            "reward": -0.03104210407228062,
-            "creation_time": 1702980564.5476835,
             "auxillary_file_paths": [
-                "results/Proba/Striker/Striker-1802718.pt"
             ]
         }
     },

 {
+    "SoccerTwos": {
+        "elo": 1197.0439391453008,
         "checkpoints": [
             {
+                "steps": 10226,
+                "file_path": "results/Soccer/SoccerTwos/SoccerTwos-10226.onnx",
+                "reward": 0.0,
+                "creation_time": 1703231769.47281,
                 "auxillary_file_paths": [
+                    "results/Soccer/SoccerTwos/SoccerTwos-10226.pt"
                 ]
             }
         ],
         "final_checkpoint": {
+            "steps": 10226,
+            "file_path": "results/Soccer/SoccerTwos.onnx",
+            "reward": 0.0,
+            "creation_time": 1703231769.47281,
             "auxillary_file_paths": [
+                "results/Soccer/SoccerTwos/SoccerTwos-10226.pt"
             ]
         }
     },