{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.7217804193496704, "min": 1.6875516176223755, "max": 3.295717716217041, "count": 699 }, "SoccerTwos.Policy.Entropy.sum": { "value": 34380.51171875, "min": 24468.837890625, "max": 123590.421875, "count": 699 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 47.14563106796116, "min": 42.75221238938053, "max": 999.0, "count": 699 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19424.0, "min": 6876.0, "max": 33260.0, "count": 699 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1578.3195672727065, "min": 1197.28074009066, "max": 1617.8895409730983, "count": 689 }, "SoccerTwos.Self-play.ELO.sum": { "value": 325133.83085817756, "min": 2394.56148018132, "max": 361897.8473158516, "count": 689 }, "SoccerTwos.Step.mean": { "value": 6999969.0, "min": 9886.0, "max": 6999969.0, "count": 700 }, "SoccerTwos.Step.sum": { "value": 6999969.0, "min": 9886.0, "max": 6999969.0, "count": 700 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.02631920948624611, "min": -0.14210456609725952, "max": 0.1985418200492859, "count": 700 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 5.421757221221924, "min": -26.43144989013672, "max": 22.401569366455078, "count": 700 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.024701273068785667, "min": -0.15171635150909424, "max": 0.20422151684761047, "count": 700 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 5.0884623527526855, "min": -28.219242095947266, "max": 21.444114685058594, "count": 700 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 700 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 700 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.06302815762538354, "min": -0.6428571428571429, "max": 0.6339868859189456, "count": 700 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 12.98380047082901, "min": -56.424800157547, "max": 56.126399993896484, "count": 700 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.06302815762538354, "min": -0.6428571428571429, "max": 0.6339868859189456, "count": 700 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 12.98380047082901, "min": -56.424800157547, "max": 56.126399993896484, "count": 700 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 700 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 700 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.018832366993107523, "min": 0.009509839072901134, "max": 0.024497803673148154, "count": 337 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.018832366993107523, "min": 0.009509839072901134, "max": 0.024497803673148154, "count": 337 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.1446676621834437, "min": 8.033821553302309e-05, "max": 0.1567095915476481, "count": 337 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.1446676621834437, "min": 8.033821553302309e-05, "max": 0.1567095915476481, "count": 337 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.14640799860159556, "min": 7.992449051622923e-05, "max": 0.15997838725646338, "count": 337 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.14640799860159556, "min": 7.992449051622923e-05, "max": 0.15997838725646338, "count": 337 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 337 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 337 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.19999999999999993, "max": 0.20000000000000007, "count": 337 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.19999999999999993, "max": 0.20000000000000007, "count": 337 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 337 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 337 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1717717876", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\wte42\\miniconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwosV3 --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.3.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1717765560" }, "total": 47684.713077099994, "count": 1, "self": 1.451411599991843, "children": { "run_training.setup": { "total": 0.17478389997268096, "count": 1, "self": 0.17478389997268096 }, "TrainerController.start_learning": { "total": 47683.08688160003, "count": 1, "self": 18.834646819450427, "children": { "TrainerController._reset_env": { "total": 13.741266000142787, "count": 35, "self": 13.741266000142787 }, "TrainerController.advance": { "total": 47650.20483378047, "count": 479422, "self": 16.988993382430635, "children": { "env_step": { "total": 17777.007047212042, "count": 479422, "self": 13457.020940388378, "children": { "SubprocessEnvManager._take_step": { "total": 4307.962841312983, "count": 479422, "self": 110.12339973135386, "children": { "TorchPolicy.evaluate": { "total": 4197.839441581629, "count": 881662, "self": 4197.839441581629 } } }, "workers": { "total": 12.023265510681085, "count": 479422, "self": 0.0, "children": { "worker_root": { "total": 47647.30676872132, "count": 479422, "is_parallel": true, "self": 37017.34267190518, "children": { "steps_from_proto": { "total": 0.13943679991643876, "count": 70, "is_parallel": true, "self": 0.02416329929837957, "children": { "_process_rank_one_or_two_observation": { "total": 0.11527350061805919, "count": 280, "is_parallel": true, "self": 0.11527350061805919 } } }, "UnityEnvironment.step": { "total": 10629.824660016224, "count": 479422, "is_parallel": true, "self": 527.9875359146972, "children": { "UnityEnvironment._generate_step_input": { "total": 584.1747871130356, "count": 479422, "is_parallel": true, "self": 584.1747871130356 }, "communicator.exchange": { "total": 7449.744073592068, "count": 479422, "is_parallel": true, "self": 7449.744073592068 }, "steps_from_proto": { "total": 2067.918263396423, "count": 958844, "is_parallel": true, "self": 351.15841074171476, "children": { "_process_rank_one_or_two_observation": { "total": 1716.759852654708, "count": 3835376, "is_parallel": true, "self": 1716.759852654708 } } } } } } } } } } }, "trainer_advance": { "total": 29856.208793186, "count": 479422, "self": 130.85290888464078, "children": { "process_trajectory": { "total": 4982.646843700728, "count": 479422, "self": 4979.123502500646, "children": { "RLTrainer._checkpoint": { "total": 3.5233412000816315, "count": 14, "self": 3.5233412000816315 } } }, "_update_policy": { "total": 24742.709040600632, "count": 337, "self": 2444.509963997174, "children": { "TorchPOCAOptimizer.update": { "total": 22298.199076603458, "count": 10128, "self": 22298.199076603458 } } } } } } }, "trainer_threads": { "total": 1.6999547369778156e-06, "count": 1, "self": 1.6999547369778156e-06 }, "TrainerController._save_models": { "total": 0.3061333000077866, "count": 1, "self": 0.08921610005199909, "children": { "RLTrainer._checkpoint": { "total": 0.2169171999557875, "count": 1, "self": 0.2169171999557875 } } } } } } }