{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.5932320356369019, "min": 1.5016505718231201, "max": 1.745996356010437, "count": 1206 }, "SoccerTwos.Policy.Entropy.sum": { "value": 31048.90625, "min": 25631.4140625, "max": 41233.62890625, "count": 1206 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 71.3768115942029, "min": 45.28440366972477, "max": 119.14285714285714, "count": 1206 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19700.0, "min": 13156.0, "max": 22016.0, "count": 1206 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1529.4984329634783, "min": 1477.5859388363544, "max": 1612.6695298792558, "count": 1206 }, "SoccerTwos.Self-play.ELO.sum": { "value": 211070.78374896, "min": 128689.4620998312, "max": 329568.675261479, "count": 1206 }, "SoccerTwos.Step.mean": { "value": 36119914.0, "min": 24069963.0, "max": 36119914.0, "count": 1206 }, "SoccerTwos.Step.sum": { "value": 36119914.0, "min": 24069963.0, "max": 36119914.0, "count": 1206 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.028788791969418526, "min": -0.14242351055145264, "max": 0.08708593994379044, "count": 1206 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -3.972853183746338, "min": -20.651409149169922, "max": 10.431331634521484, "count": 1206 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.03223952651023865, "min": -0.14488904178142548, "max": 0.08526767045259476, "count": 1206 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -4.449054718017578, "min": -21.0089111328125, "max": 11.612297058105469, "count": 1206 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1206 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1206 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.18953768109929733, "min": -0.45386880111694333, "max": 0.33275094234718466, "count": 1206 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -26.156199991703033, "min": -57.70319998264313, "max": 43.89559996128082, "count": 1206 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.18953768109929733, "min": -0.45386880111694333, "max": 0.33275094234718466, "count": 1206 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -26.156199991703033, "min": -57.70319998264313, "max": 43.89559996128082, "count": 1206 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1206 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1206 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.02783555166485409, "min": 0.022215450095245615, "max": 0.03516377701113622, "count": 584 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.02783555166485409, "min": 0.022215450095245615, "max": 0.03516377701113622, "count": 584 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.08402746667464574, "min": 0.05958654967447122, "max": 0.10001727566123009, "count": 584 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.08402746667464574, "min": 0.05958654967447122, "max": 0.10001727566123009, "count": 584 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.08519837260246277, "min": 0.06038877206544081, "max": 0.10178241382042567, "count": 584 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.08519837260246277, "min": 0.06038877206544081, "max": 0.10178241382042567, "count": 584 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0005000000000000001, "min": 0.0005000000000000001, "max": 0.0005000000000000001, "count": 584 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0005000000000000001, "min": 0.0005000000000000001, "max": 0.0005000000000000001, "count": 584 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 584 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 584 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 584 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 584 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1717346107", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Jeste\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos-v2 --no-graphics --torch-device cuda --resume", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.3.0", "numpy_version": "1.24.3", "end_time_seconds": "1717363014" }, "total": 16907.010431599992, "count": 1, "self": 0.0491213999921456, "children": { "run_training.setup": { "total": 0.08449160000600386, "count": 1, "self": 0.08449160000600386 }, "TrainerController.start_learning": { "total": 16906.876818599994, "count": 1, "self": 10.754153696529102, "children": { "TrainerController._reset_env": { "total": 15.293022299854783, "count": 62, "self": 15.293022299854783 }, "TrainerController.advance": { "total": 16880.695971903595, "count": 819248, "self": 10.736753806617344, "children": { "env_step": { "total": 12331.277499803007, "count": 819248, "self": 6652.798938405846, "children": { "SubprocessEnvManager._take_step": { "total": 5671.828558090914, "count": 819248, "self": 76.0575644809287, "children": { "TorchPolicy.evaluate": { "total": 5595.770993609985, "count": 1514862, "self": 5595.770993609985 } } }, "workers": { "total": 6.650003306247527, "count": 819247, "self": 0.0, "children": { "worker_root": { "total": 16879.08530110467, "count": 819247, "is_parallel": true, "self": 11573.817548899926, "children": { "steps_from_proto": { "total": 0.08027969991962891, "count": 124, "is_parallel": true, "self": 0.014920400179107673, "children": { "_process_rank_one_or_two_observation": { "total": 0.06535929974052124, "count": 496, "is_parallel": true, "self": 0.06535929974052124 } } }, "UnityEnvironment.step": { "total": 5305.187472504826, "count": 819247, "is_parallel": true, "self": 301.55195310743875, "children": { "UnityEnvironment._generate_step_input": { "total": 253.8752837996144, "count": 819247, "is_parallel": true, "self": 253.8752837996144 }, "communicator.exchange": { "total": 3780.561313590748, "count": 819247, "is_parallel": true, "self": 3780.561313590748 }, "steps_from_proto": { "total": 969.1989220070245, "count": 1638494, "is_parallel": true, "self": 185.13892348716035, "children": { "_process_rank_one_or_two_observation": { "total": 784.0599985198642, "count": 6553976, "is_parallel": true, "self": 784.0599985198642 } } } } } } } } } } }, "trainer_advance": { "total": 4538.68171829397, "count": 819247, "self": 105.74872958290507, "children": { "process_trajectory": { "total": 2295.629787311045, "count": 819247, "self": 2291.5712614110817, "children": { "RLTrainer._checkpoint": { "total": 4.05852589996357, "count": 24, "self": 4.05852589996357 } } }, "_update_policy": { "total": 2137.30320140002, "count": 585, "self": 1320.8601817983872, "children": { "TorchPOCAOptimizer.update": { "total": 816.4430196016328, "count": 17550, "self": 816.4430196016328 } } } } } } }, "trainer_threads": { "total": 9.00006853044033e-07, "count": 1, "self": 9.00006853044033e-07 }, "TrainerController._save_models": { "total": 0.13366980000864714, "count": 1, "self": 0.006770200008759275, "children": { "RLTrainer._checkpoint": { "total": 0.12689959999988787, "count": 1, "self": 0.12689959999988787 } } } } } } }