{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8589556217193604, "min": 1.7968449592590332, "max": 3.2958035469055176, "count": 1000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 44614.93359375, "min": 17576.984375, "max": 158781.328125, "count": 1000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 63.19753086419753, "min": 43.389380530973455, "max": 999.0, "count": 1000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20476.0, "min": 11940.0, "max": 30392.0, "count": 1000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1526.3366739403805, "min": 1174.8256778602927, "max": 1550.0887477943952, "count": 948 }, "SoccerTwos.Self-play.ELO.sum": { "value": 247266.54117834163, "min": 2350.0433218615703, "max": 326047.0684427479, "count": 948 }, "SoccerTwos.Step.mean": { "value": 9999862.0, "min": 9212.0, "max": 9999862.0, "count": 1000 }, "SoccerTwos.Step.sum": { "value": 9999862.0, "min": 9212.0, "max": 9999862.0, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.03417586162686348, "min": -1.004157304763794, "max": 0.15304332971572876, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -5.502313613891602, "min": -20.68668556213379, "max": 20.247821807861328, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.032918691635131836, "min": -0.911669909954071, "max": 0.15981008112430573, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -5.2999091148376465, "min": -20.943920135498047, "max": 19.441890716552734, "count": 1000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.08191055836884872, "min": -0.5454545454545454, "max": 0.45976000513349263, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -13.187599897384644, "min": -57.70120018720627, "max": 53.491599917411804, "count": 1000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.08191055836884872, "min": -0.5454545454545454, "max": 0.45976000513349263, "count": 1000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -13.187599897384644, "min": -57.70120018720627, "max": 53.491599917411804, "count": 1000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.019980312860570847, "min": 0.009590227813168895, "max": 0.025493065896444022, "count": 478 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.019980312860570847, "min": 0.009590227813168895, "max": 0.025493065896444022, "count": 478 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0832578254242738, "min": 4.7653056422329125e-07, "max": 0.1161196747329086, "count": 478 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0832578254242738, "min": 4.7653056422329125e-07, "max": 0.1161196747329086, "count": 478 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.08481532682975133, "min": 4.451297523170676e-07, "max": 0.12645079990228017, "count": 478 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.08481532682975133, "min": 4.451297523170676e-07, "max": 0.12645079990228017, "count": 478 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0005000000000000001, "min": 0.0005000000000000001, "max": 0.0005000000000000001, "count": 478 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0005000000000000001, "min": 0.0005000000000000001, "max": 0.0005000000000000001, "count": 478 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 478 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 478 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 478 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 478 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1701793030", "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:34:57) [MSC v.1936 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\matan\\miniconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.1.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1701844919" }, "total": 51890.59303139988, "count": 1, "self": 3.2154135999735445, "children": { "run_training.setup": { "total": 0.16921620001085103, "count": 1, "self": 0.16921620001085103 }, "TrainerController.start_learning": { "total": 51887.2084015999, "count": 1, "self": 19.635966478148475, "children": { "TrainerController._reset_env": { "total": 8.817512499401346, "count": 50, "self": 8.817512499401346 }, "TrainerController.advance": { "total": 51858.4699838222, "count": 674615, "self": 21.030452833510935, "children": { "env_step": { "total": 17000.4457803627, "count": 674615, "self": 12946.687948333798, "children": { "SubprocessEnvManager._take_step": { "total": 4039.4298561678734, "count": 674615, "self": 143.63739571580663, "children": { "TorchPolicy.evaluate": { "total": 3895.7924604520667, "count": 1268924, "self": 3895.7924604520667 } } }, "workers": { "total": 14.3279758610297, "count": 674615, "self": 0.0, "children": { "worker_root": { "total": 51847.647360827774, "count": 674615, "is_parallel": true, "self": 41657.57638210198, "children": { "steps_from_proto": { "total": 0.12065709917806089, "count": 100, "is_parallel": true, "self": 0.023648294620215893, "children": { "_process_rank_one_or_two_observation": { "total": 0.097008804557845, "count": 400, "is_parallel": true, "self": 0.097008804557845 } } }, "UnityEnvironment.step": { "total": 10189.950321626617, "count": 674615, "is_parallel": true, "self": 522.9825835197698, "children": { "UnityEnvironment._generate_step_input": { "total": 506.43297991901636, "count": 674615, "is_parallel": true, "self": 506.43297991901636 }, "communicator.exchange": { "total": 7511.215284928912, "count": 674615, "is_parallel": true, "self": 7511.215284928912 }, "steps_from_proto": { "total": 1649.3194732589182, "count": 1349230, "is_parallel": true, "self": 330.0740717528388, "children": { "_process_rank_one_or_two_observation": { "total": 1319.2454015060794, "count": 5396920, "is_parallel": true, "self": 1319.2454015060794 } } } } } } } } } } }, "trainer_advance": { "total": 34836.99375062599, "count": 674615, "self": 152.04400805849582, "children": { "process_trajectory": { "total": 4999.631665664958, "count": 674615, "self": 4995.5961233654525, "children": { "RLTrainer._checkpoint": { "total": 4.035542299505323, "count": 20, "self": 4.035542299505323 } } }, "_update_policy": { "total": 29685.318076902535, "count": 478, "self": 1836.6619248155039, "children": { "TorchPOCAOptimizer.update": { "total": 27848.65615208703, "count": 14340, "self": 27848.65615208703 } } } } } } }, "trainer_threads": { "total": 1.400010660290718e-06, "count": 1, "self": 1.400010660290718e-06 }, "TrainerController._save_models": { "total": 0.2849374001380056, "count": 1, "self": 0.015972800087183714, "children": { "RLTrainer._checkpoint": { "total": 0.2689646000508219, "count": 1, "self": 0.2689646000508219 } } } } } } }