{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.4872379302978516, "min": 1.3691848516464233, "max": 1.5180870294570923, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 29126.068359375, "min": 26924.76953125, "max": 31682.85546875, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 55.17204301075269, "min": 43.629629629629626, "max": 89.61290322580645, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20524.0, "min": 16228.0, "max": 22224.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1616.4376838638466, "min": 1579.196744553692, "max": 1700.385513235949, "count": 500 }, "SoccerTwos.Self-play.ELO.sum": { "value": 300657.4091986755, "min": 172213.5726064411, "max": 364738.29520057444, "count": 500 }, "SoccerTwos.Step.mean": { "value": 19999978.0, "min": 15009988.0, "max": 19999978.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 19999978.0, "min": 15009988.0, "max": 19999978.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.019980132579803467, "min": -0.1321440488100052, "max": 0.061585795134305954, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -5.054973602294922, "min": -30.52527618408203, "max": 15.7752046585083, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.02246411144733429, "min": -0.13097290694713593, "max": 0.061111632734537125, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -5.683420181274414, "min": -30.254741668701172, "max": 15.583466529846191, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.05270054082612734, "min": -0.40565573778308806, "max": 0.30312405130531217, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 9.749600052833557, "min": -54.468600153923035, "max": 47.89360010623932, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.05270054082612734, "min": -0.40565573778308806, "max": 0.30312405130531217, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 9.749600052833557, "min": -54.468600153923035, "max": 47.89360010623932, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.021534866566071287, "min": 0.020611216948501827, "max": 0.02840943965714097, "count": 82 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.021534866566071287, "min": 0.020611216948501827, "max": 0.02840943965714097, "count": 82 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.09841666143324415, "min": 0.08451716904923068, "max": 0.10750022770489677, "count": 82 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.09841666143324415, "min": 0.08451716904923068, "max": 0.10750022770489677, "count": 82 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.09949219896126602, "min": 0.08530456951614153, "max": 0.10901509269566859, "count": 82 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.09949219896126602, "min": 0.08530456951614153, "max": 0.10901509269566859, "count": 82 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 9.999999999999999e-05, "min": 9.999999999999999e-05, "max": 9.999999999999999e-05, "count": 82 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 9.999999999999999e-05, "min": 9.999999999999999e-05, "max": 9.999999999999999e-05, "count": 82 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.19999999999999998, "min": 0.19999999999999998, "max": 0.19999999999999998, "count": 82 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.19999999999999998, "min": 0.19999999999999998, "max": 0.19999999999999998, "count": 82 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005, "min": 0.005, "max": 0.005, "count": 82 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005, "min": 0.005, "max": 0.005, "count": 82 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1678088896", "python_version": "3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\Alex\\.conda\\envs\\rl\\Scripts\\mlagents-learn config\\poca\\SoccerTwos-v4.yaml --env=training-envs-executables\\SoccerTwos\\SoccerTwos.exe --run-id SoccerTwos --no-graphics --resume", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.13.1+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1678103026" }, "total": 14129.332310592, "count": 1, "self": 1.1111714860016946, "children": { "run_training.setup": { "total": 0.19717049600000003, "count": 1, "self": 0.19717049600000003 }, "TrainerController.start_learning": { "total": 14128.02396861, "count": 1, "self": 9.5890246804247, "children": { "TrainerController._reset_env": { "total": 4.5362276140002935, "count": 26, "self": 4.5362276140002935 }, "TrainerController.advance": { "total": 14113.775025608575, "count": 344478, "self": 8.740378945574776, "children": { "env_step": { "total": 6499.026367676286, "count": 344478, "self": 5181.30116057813, "children": { "SubprocessEnvManager._take_step": { "total": 1312.0876138652366, "count": 344478, "self": 45.67449664603737, "children": { "TorchPolicy.evaluate": { "total": 1266.4131172191992, "count": 626106, "self": 1266.4131172191992 } } }, "workers": { "total": 5.637593232919463, "count": 344478, "self": 0.0, "children": { "worker_root": { "total": 14111.950555176158, "count": 344478, "is_parallel": true, "self": 9999.969528788399, "children": { "steps_from_proto": { "total": 0.05718324399976371, "count": 52, "is_parallel": true, "self": 0.010959142993296034, "children": { "_process_rank_one_or_two_observation": { "total": 0.04622410100646768, "count": 208, "is_parallel": true, "self": 0.04622410100646768 } } }, "UnityEnvironment.step": { "total": 4111.923843143759, "count": 344478, "is_parallel": true, "self": 212.07101066499308, "children": { "UnityEnvironment._generate_step_input": { "total": 196.16795433004822, "count": 344478, "is_parallel": true, "self": 196.16795433004822 }, "communicator.exchange": { "total": 2925.141056170298, "count": 344478, "is_parallel": true, "self": 2925.141056170298 }, "steps_from_proto": { "total": 778.5438219784198, "count": 688956, "is_parallel": true, "self": 151.49830487435952, "children": { "_process_rank_one_or_two_observation": { "total": 627.0455171040603, "count": 2755824, "is_parallel": true, "self": 627.0455171040603 } } } } } } } } } } }, "trainer_advance": { "total": 7606.008278986715, "count": 344478, "self": 53.78237858206376, "children": { "process_trajectory": { "total": 1932.6900048486557, "count": 344478, "self": 1931.339307254654, "children": { "RLTrainer._checkpoint": { "total": 1.3506975940017583, "count": 10, "self": 1.3506975940017583 } } }, "_update_policy": { "total": 5619.535895555995, "count": 82, "self": 646.1680682530086, "children": { "TorchPOCAOptimizer.update": { "total": 4973.367827302986, "count": 9676, "self": 4973.367827302986 } } } } } } }, "trainer_threads": { "total": 9.049999789567664e-07, "count": 1, "self": 9.049999789567664e-07 }, "TrainerController._save_models": { "total": 0.12368980199971702, "count": 1, "self": 0.0024554240008001216, "children": { "RLTrainer._checkpoint": { "total": 0.1212343779989169, "count": 1, "self": 0.1212343779989169 } } } } } } }