{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.798728108406067, "min": 1.798728108406067, "max": 3.2957170009613037, "count": 585 }, "SoccerTwos.Policy.Entropy.sum": { "value": 35974.5625, "min": 15857.1796875, "max": 119581.6484375, "count": 585 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 54.46666666666667, "min": 43.642857142857146, "max": 999.0, "count": 585 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19608.0, "min": 15308.0, "max": 24940.0, "count": 585 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1517.7656562104169, "min": 1196.156832315803, "max": 1536.9369848336548, "count": 568 }, "SoccerTwos.Self-play.ELO.sum": { "value": 273197.818117875, "min": 2392.313664631606, "max": 326380.3934481398, "count": 568 }, "SoccerTwos.Step.mean": { "value": 5849901.0, "min": 9490.0, "max": 5849901.0, "count": 585 }, "SoccerTwos.Step.sum": { "value": 5849901.0, "min": 9490.0, "max": 5849901.0, "count": 585 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.012372388504445553, "min": -0.08308380842208862, "max": 0.13892248272895813, "count": 585 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -2.2394022941589355, "min": -14.955085754394531, "max": 26.575284957885742, "count": 585 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.008933898992836475, "min": -0.08326149731874466, "max": 0.13363541662693024, "count": 585 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -1.6170357465744019, "min": -14.987069129943848, "max": 26.994354248046875, "count": 585 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 585 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 585 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.07342320647687543, "min": -0.6237818165258928, "max": 0.48822608719701355, "count": 585 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 13.289600372314453, "min": -50.34759998321533, "max": 52.97639977931976, "count": 585 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.07342320647687543, "min": -0.6237818165258928, "max": 0.48822608719701355, "count": 585 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 13.289600372314453, "min": -50.34759998321533, "max": 52.97639977931976, "count": 585 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 585 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 585 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.016741516183704636, "min": 0.011226411445143943, "max": 0.025212356952639917, "count": 280 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.016741516183704636, "min": 0.011226411445143943, "max": 0.025212356952639917, "count": 280 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.1160004697740078, "min": 6.358821580458122e-05, "max": 0.1160004697740078, "count": 280 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.1160004697740078, "min": 6.358821580458122e-05, "max": 0.1160004697740078, "count": 280 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.1187271848320961, "min": 6.622356292306601e-05, "max": 0.1187271848320961, "count": 280 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.1187271848320961, "min": 6.622356292306601e-05, "max": 0.1187271848320961, "count": 280 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 280 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 280 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 280 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 280 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 280 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 280 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1677771483", "python_version": "3.9.16 (main, Mar 1 2023, 18:30:21) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\TomZh\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.13.1+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1677801456" }, "total": 29972.835507, "count": 1, "self": 2.6154442000006384, "children": { "run_training.setup": { "total": 0.18303540000000051, "count": 1, "self": 0.18303540000000051 }, "TrainerController.start_learning": { "total": 29970.037027399998, "count": 1, "self": 16.058169298830762, "children": { "TrainerController._reset_env": { "total": 7.600575699990017, "count": 30, "self": 7.600575699990017 }, "TrainerController.advance": { "total": 29946.003335301175, "count": 397307, "self": 17.125882601168996, "children": { "env_step": { "total": 12905.713010200048, "count": 397307, "self": 10114.983236501166, "children": { "SubprocessEnvManager._take_step": { "total": 2780.0445279996893, "count": 397307, "self": 97.12233600213312, "children": { "TorchPolicy.evaluate": { "total": 2682.922191997556, "count": 739362, "self": 2682.922191997556 } } }, "workers": { "total": 10.685245699193267, "count": 397307, "self": 0.0, "children": { "worker_root": { "total": 29906.11344050029, "count": 397307, "is_parallel": true, "self": 21671.860640298626, "children": { "steps_from_proto": { "total": 0.0923370000031083, "count": 60, "is_parallel": true, "self": 0.018933799997398637, "children": { "_process_rank_one_or_two_observation": { "total": 0.07340320000570966, "count": 240, "is_parallel": true, "self": 0.07340320000570966 } } }, "UnityEnvironment.step": { "total": 8234.160463201659, "count": 397307, "is_parallel": true, "self": 413.37905410003896, "children": { "UnityEnvironment._generate_step_input": { "total": 333.058526001064, "count": 397307, "is_parallel": true, "self": 333.058526001064 }, "communicator.exchange": { "total": 6212.587333700833, "count": 397307, "is_parallel": true, "self": 6212.587333700833 }, "steps_from_proto": { "total": 1275.1355493997225, "count": 794614, "is_parallel": true, "self": 266.60938360031696, "children": { "_process_rank_one_or_two_observation": { "total": 1008.5261657994056, "count": 3178456, "is_parallel": true, "self": 1008.5261657994056 } } } } } } } } } } }, "trainer_advance": { "total": 17023.164442499958, "count": 397307, "self": 109.81007539940038, "children": { "process_trajectory": { "total": 2794.4918756005545, "count": 397307, "self": 2791.7885615005584, "children": { "RLTrainer._checkpoint": { "total": 2.703314099996078, "count": 11, "self": 2.703314099996078 } } }, "_update_policy": { "total": 14118.862491500002, "count": 281, "self": 1317.6930475999707, "children": { "TorchPOCAOptimizer.update": { "total": 12801.169443900031, "count": 8415, "self": 12801.169443900031 } } } } } } }, "trainer_threads": { "total": 2.7999994927085936e-06, "count": 1, "self": 2.7999994927085936e-06 }, "TrainerController._save_models": { "total": 0.37494430000151624, "count": 1, "self": 0.01606280000487459, "children": { "RLTrainer._checkpoint": { "total": 0.35888149999664165, "count": 1, "self": 0.35888149999664165 } } } } } } }