{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.355676531791687, "min": 1.2595282793045044, "max": 3.2957334518432617, "count": 5000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 27503.96484375, "min": 18434.8828125, "max": 136131.90625, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 63.753246753246756, "min": 38.04651162790697, "max": 999.0, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19636.0, "min": 16352.0, "max": 26792.0, "count": 5000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1753.9429907197841, "min": 1202.1977274614337, "max": 1760.1562520316327, "count": 4997 }, "SoccerTwos.Self-play.ELO.sum": { "value": 270107.22057084675, "min": 2423.3702738040024, "max": 433800.0533564773, "count": 4997 }, "SoccerTwos.Step.mean": { "value": 49999960.0, "min": 9418.0, "max": 49999960.0, "count": 5000 }, "SoccerTwos.Step.sum": { "value": 49999960.0, "min": 9418.0, "max": 49999960.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.011028742417693138, "min": -0.14795070886611938, "max": 0.2025386244058609, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 1.709455132484436, "min": -27.66678237915039, "max": 36.051876068115234, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.0136345736682415, "min": -0.1496773511171341, "max": 0.20268335938453674, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 2.113358974456787, "min": -27.98966407775879, "max": 36.07763671875, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.0886129040871897, "min": -0.4509333305888706, "max": 0.4753636428804109, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 13.735000133514404, "min": -65.7037997841835, "max": 78.1201999783516, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.0886129040871897, "min": -0.4509333305888706, "max": 0.4753636428804109, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 13.735000133514404, "min": -65.7037997841835, "max": 78.1201999783516, "count": 5000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.019243442461205025, "min": 0.010030461488349829, "max": 0.02729246802239989, "count": 2425 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.019243442461205025, "min": 0.010030461488349829, "max": 0.02729246802239989, "count": 2425 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10468184426426888, "min": 2.6213669601323395e-05, "max": 0.13021189173062642, "count": 2425 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10468184426426888, "min": 2.6213669601323395e-05, "max": 0.13021189173062642, "count": 2425 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10552370697259902, "min": 2.5664739465961852e-05, "max": 0.1335003413259983, "count": 2425 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10552370697259902, "min": 2.5664739465961852e-05, "max": 0.1335003413259983, "count": 2425 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2425 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2425 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2425 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2425 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2425 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2425 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1700343720", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Bart\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.1.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1700541470" }, "total": 197750.01964850002, "count": 1, "self": 2.93624120001914, "children": { "run_training.setup": { "total": 0.16958929999964312, "count": 1, "self": 0.16958929999964312 }, "TrainerController.start_learning": { "total": 197746.913818, "count": 1, "self": 114.08844991368824, "children": { "TrainerController._reset_env": { "total": 15.87249729965697, "count": 250, "self": 15.87249729965697 }, "TrainerController.advance": { "total": 197616.63309128664, "count": 3458165, "self": 121.2730981123168, "children": { "env_step": { "total": 92693.30244368566, "count": 3458165, "self": 68691.84114710255, "children": { "SubprocessEnvManager._take_step": { "total": 23931.51071588529, "count": 3458165, "self": 664.9480715348036, "children": { "TorchPolicy.evaluate": { "total": 23266.562644350488, "count": 6278838, "self": 23266.562644350488 } } }, "workers": { "total": 69.95058069782681, "count": 3458165, "self": 0.0, "children": { "worker_root": { "total": 197573.14665168486, "count": 3458165, "is_parallel": true, "self": 145118.66756776237, "children": { "steps_from_proto": { "total": 0.5794901004701387, "count": 500, "is_parallel": true, "self": 0.12235650054935832, "children": { "_process_rank_one_or_two_observation": { "total": 0.4571335999207804, "count": 2000, "is_parallel": true, "self": 0.4571335999207804 } } }, "UnityEnvironment.step": { "total": 52453.899593822, "count": 3458165, "is_parallel": true, "self": 2489.8652344180155, "children": { "UnityEnvironment._generate_step_input": { "total": 2045.738513398479, "count": 3458165, "is_parallel": true, "self": 2045.738513398479 }, "communicator.exchange": { "total": 39605.51885173192, "count": 3458165, "is_parallel": true, "self": 39605.51885173192 }, "steps_from_proto": { "total": 8312.77699427359, "count": 6916330, "is_parallel": true, "self": 1717.371448144404, "children": { "_process_rank_one_or_two_observation": { "total": 6595.405546129186, "count": 27665320, "is_parallel": true, "self": 6595.405546129186 } } } } } } } } } } }, "trainer_advance": { "total": 104802.05754948867, "count": 3458165, "self": 1222.6277214249421, "children": { "process_trajectory": { "total": 24035.652533464032, "count": 3458165, "self": 24009.69206736404, "children": { "RLTrainer._checkpoint": { "total": 25.9604660999903, "count": 100, "self": 25.9604660999903 } } }, "_update_policy": { "total": 79543.7772945997, "count": 2425, "self": 9002.741671698313, "children": { "TorchPOCAOptimizer.update": { "total": 70541.03562290139, "count": 72750, "self": 70541.03562290139 } } } } } } }, "trainer_threads": { "total": 1.100008375942707e-06, "count": 1, "self": 1.100008375942707e-06 }, "TrainerController._save_models": { "total": 0.31977840000763535, "count": 1, "self": 0.051443700038362294, "children": { "RLTrainer._checkpoint": { "total": 0.26833469996927306, "count": 1, "self": 0.26833469996927306 } } } } } } }