{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.638219952583313, "min": 1.6089037656784058, "max": 3.2957236766815186, "count": 1011 }, "SoccerTwos.Policy.Entropy.sum": { "value": 30982.015625, "min": 22959.16015625, "max": 148542.90625, "count": 1011 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 57.593023255813954, "min": 40.107438016528924, "max": 999.0, "count": 1011 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19812.0, "min": 14156.0, "max": 26768.0, "count": 1011 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1613.0008561851519, "min": 1179.2735291350461, "max": 1647.6743100257872, "count": 983 }, "SoccerTwos.Self-play.ELO.sum": { "value": 277436.1472638461, "min": 2359.847192796909, "max": 394732.96866034204, "count": 983 }, "SoccerTwos.Step.mean": { "value": 10109949.0, "min": 9402.0, "max": 10109949.0, "count": 1011 }, "SoccerTwos.Step.sum": { "value": 10109949.0, "min": 9402.0, "max": 10109949.0, "count": 1011 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.0005055301007814705, "min": -0.1182592436671257, "max": 0.2634982466697693, "count": 1011 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 0.08745670318603516, "min": -20.22408676147461, "max": 32.67378234863281, "count": 1011 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.0035853772424161434, "min": -0.12163829803466797, "max": 0.26015952229499817, "count": 1011 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 0.6202702522277832, "min": -20.517311096191406, "max": 32.25978088378906, "count": 1011 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1011 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1011 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.0023271686079874205, "min": -0.5673882347695968, "max": 0.5339767441673334, "count": 1011 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -0.40260016918182373, "min": -59.69360011816025, "max": 58.12560027837753, "count": 1011 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.0023271686079874205, "min": -0.5673882347695968, "max": 0.5339767441673334, "count": 1011 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -0.40260016918182373, "min": -59.69360011816025, "max": 58.12560027837753, "count": 1011 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1011 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1011 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.013799193984110995, "min": 0.01064961291073511, "max": 0.025349935970734806, "count": 486 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.013799193984110995, "min": 0.01064961291073511, "max": 0.025349935970734806, "count": 486 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10146005029479663, "min": 2.4162537101801716e-06, "max": 0.12898998806873957, "count": 486 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10146005029479663, "min": 2.4162537101801716e-06, "max": 0.12898998806873957, "count": 486 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.1026929795742035, "min": 2.4604478691495992e-06, "max": 0.13161144281427065, "count": 486 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.1026929795742035, "min": 2.4604478691495992e-06, "max": 0.13161144281427065, "count": 486 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 486 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 486 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 486 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 486 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 486 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 486 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1715519873", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Rafae\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.3.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1715535465" }, "total": 15591.33269739995, "count": 1, "self": 0.091457299888134, "children": { "run_training.setup": { "total": 0.058069500024430454, "count": 1, "self": 0.058069500024430454 }, "TrainerController.start_learning": { "total": 15591.183170600038, "count": 1, "self": 9.14924971171422, "children": { "TrainerController._reset_env": { "total": 6.0616550002014264, "count": 51, "self": 6.0616550002014264 }, "TrainerController.advance": { "total": 15575.892421888071, "count": 689472, "self": 9.409835662168916, "children": { "env_step": { "total": 6583.059942214342, "count": 689472, "self": 5184.5185684148455, "children": { "SubprocessEnvManager._take_step": { "total": 1392.9289004872553, "count": 689472, "self": 58.66539047745755, "children": { "TorchPolicy.evaluate": { "total": 1334.2635100097978, "count": 1276906, "self": 1334.2635100097978 } } }, "workers": { "total": 5.612473312241491, "count": 689471, "self": 0.0, "children": { "worker_root": { "total": 15575.050148127077, "count": 689471, "is_parallel": true, "self": 11396.621896038007, "children": { "steps_from_proto": { "total": 0.051714500470552593, "count": 102, "is_parallel": true, "self": 0.01063710026210174, "children": { "_process_rank_one_or_two_observation": { "total": 0.041077400208450854, "count": 408, "is_parallel": true, "self": 0.041077400208450854 } } }, "UnityEnvironment.step": { "total": 4178.3765375886, "count": 689471, "is_parallel": true, "self": 195.98685446876334, "children": { "UnityEnvironment._generate_step_input": { "total": 138.73867431184044, "count": 689471, "is_parallel": true, "self": 138.73867431184044 }, "communicator.exchange": { "total": 3228.0042822958203, "count": 689471, "is_parallel": true, "self": 3228.0042822958203 }, "steps_from_proto": { "total": 615.6467265121755, "count": 1378942, "is_parallel": true, "self": 125.21249386359705, "children": { "_process_rank_one_or_two_observation": { "total": 490.4342326485785, "count": 5515768, "is_parallel": true, "self": 490.4342326485785 } } } } } } } } } } }, "trainer_advance": { "total": 8983.42264401156, "count": 689471, "self": 76.34780337876873, "children": { "process_trajectory": { "total": 1440.100434932916, "count": 689471, "self": 1438.5187913328991, "children": { "RLTrainer._checkpoint": { "total": 1.5816436000168324, "count": 20, "self": 1.5816436000168324 } } }, "_update_policy": { "total": 7466.974405699875, "count": 487, "self": 866.51175010507, "children": { "TorchPOCAOptimizer.update": { "total": 6600.462655594805, "count": 14610, "self": 6600.462655594805 } } } } } } }, "trainer_threads": { "total": 7.00005330145359e-07, "count": 1, "self": 7.00005330145359e-07 }, "TrainerController._save_models": { "total": 0.07984330004546791, "count": 1, "self": 0.0018597000162117183, "children": { "RLTrainer._checkpoint": { "total": 0.0779836000292562, "count": 1, "self": 0.0779836000292562 } } } } } } }