{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.6342812776565552, "min": 1.5662215948104858, "max": 3.2957284450531006, "count": 1000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 33679.26953125, "min": 17119.169921875, "max": 139861.90625, "count": 1000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 67.76, "min": 40.54545454545455, "max": 999.0, "count": 1000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20328.0, "min": 10232.0, "max": 30160.0, "count": 1000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1584.2755936150656, "min": 1193.2407824035395, "max": 1608.146968486107, "count": 981 }, "SoccerTwos.Self-play.ELO.sum": { "value": 237641.33904225985, "min": 2386.481564807079, "max": 347504.5448226008, "count": 981 }, "SoccerTwos.Step.mean": { "value": 9999954.0, "min": 9096.0, "max": 9999954.0, "count": 1000 }, "SoccerTwos.Step.sum": { "value": 9999954.0, "min": 9096.0, "max": 9999954.0, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.0016860488103702664, "min": -0.1444861739873886, "max": 0.18350601196289062, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.25122126936912537, "min": -27.30788803100586, "max": 35.41666030883789, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.0005662887124344707, "min": -0.14453111588954926, "max": 0.18132120370864868, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 0.08437702059745789, "min": -27.316381454467773, "max": 34.635433197021484, "count": 1000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.10547382559552289, "min": -0.5625, "max": 0.395371212200685, "count": 1000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -15.71560001373291, "min": -71.15400016307831, "max": 60.65840017795563, "count": 1000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.10547382559552289, "min": -0.5625, "max": 0.395371212200685, "count": 1000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -15.71560001373291, "min": -71.15400016307831, "max": 60.65840017795563, "count": 1000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.014029478260393564, "min": 0.008492918137926608, "max": 0.023828427963114032, "count": 481 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.014029478260393564, "min": 0.008492918137926608, "max": 0.023828427963114032, "count": 481 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.11306864693760872, "min": 2.0196957909017025e-05, "max": 0.12350079442063967, "count": 481 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.11306864693760872, "min": 2.0196957909017025e-05, "max": 0.12350079442063967, "count": 481 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.11507400696476301, "min": 2.0780706017831106e-05, "max": 0.12515864272912344, "count": 481 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.11507400696476301, "min": 2.0780706017831106e-05, "max": 0.12515864272912344, "count": 481 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 481 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 481 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 481 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 481 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 481 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 481 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1715069201", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Roland\\.conda\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --force", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.3.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1715091594" }, "total": 22393.599547899998, "count": 1, "self": 0.23282859999744687, "children": { "run_training.setup": { "total": 0.09124540000630077, "count": 1, "self": 0.09124540000630077 }, "TrainerController.start_learning": { "total": 22393.275473899994, "count": 1, "self": 16.00719990253856, "children": { "TrainerController._reset_env": { "total": 6.862254000006942, "count": 50, "self": 6.862254000006942 }, "TrainerController.advance": { "total": 22370.295933897432, "count": 683404, "self": 13.25511979496514, "children": { "env_step": { "total": 9218.389595900779, "count": 683404, "self": 7027.691942584046, "children": { "SubprocessEnvManager._take_step": { "total": 2181.25434030866, "count": 683404, "self": 81.52678240906971, "children": { "TorchPolicy.evaluate": { "total": 2099.7275578995905, "count": 1260838, "self": 2099.7275578995905 } } }, "workers": { "total": 9.443313008072437, "count": 683404, "self": 0.0, "children": { "worker_root": { "total": 22371.271692193666, "count": 683404, "is_parallel": true, "self": 17055.29984119827, "children": { "steps_from_proto": { "total": 0.08420680000563152, "count": 100, "is_parallel": true, "self": 0.016182699997443706, "children": { "_process_rank_one_or_two_observation": { "total": 0.06802410000818782, "count": 400, "is_parallel": true, "self": 0.06802410000818782 } } }, "UnityEnvironment.step": { "total": 5315.887644195391, "count": 683404, "is_parallel": true, "self": 317.1790888785763, "children": { "UnityEnvironment._generate_step_input": { "total": 246.76691750533064, "count": 683404, "is_parallel": true, "self": 246.76691750533064 }, "communicator.exchange": { "total": 3737.981215006963, "count": 683404, "is_parallel": true, "self": 3737.981215006963 }, "steps_from_proto": { "total": 1013.9604228045209, "count": 1366808, "is_parallel": true, "self": 197.48299331474118, "children": { "_process_rank_one_or_two_observation": { "total": 816.4774294897798, "count": 5467232, "is_parallel": true, "self": 816.4774294897798 } } } } } } } } } } }, "trainer_advance": { "total": 13138.651218201689, "count": 683404, "self": 137.16260030404374, "children": { "process_trajectory": { "total": 2367.0253823976673, "count": 683404, "self": 2364.7300185976637, "children": { "RLTrainer._checkpoint": { "total": 2.2953638000035426, "count": 20, "self": 2.2953638000035426 } } }, "_update_policy": { "total": 10634.463235499978, "count": 481, "self": 1384.975474700972, "children": { "TorchPOCAOptimizer.update": { "total": 9249.487760799006, "count": 14439, "self": 9249.487760799006 } } } } } } }, "trainer_threads": { "total": 8.00006091594696e-07, "count": 1, "self": 8.00006091594696e-07 }, "TrainerController._save_models": { "total": 0.11008530000981409, "count": 1, "self": 0.013254200006485917, "children": { "RLTrainer._checkpoint": { "total": 0.09683110000332817, "count": 1, "self": 0.09683110000332817 } } } } } } }