{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 3.226175308227539, "min": 3.226175308227539, "max": 3.295647382736206, "count": 24 }, "SoccerTwos.Policy.Entropy.sum": { "value": 51412.328125, "min": 6199.6796875, "max": 105460.71875, "count": 24 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 731.0, "min": 412.3333333333333, "max": 999.0, "count": 24 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20468.0, "min": 3996.0, "max": 30704.0, "count": 24 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1202.967596949221, "min": 1197.1979931128012, "max": 1202.967596949221, "count": 21 }, "SoccerTwos.Self-play.ELO.sum": { "value": 9623.740775593767, "min": 2398.0030090611704, "max": 14411.361186046594, "count": 21 }, "SoccerTwos.Step.mean": { "value": 239844.0, "min": 9118.0, "max": 239844.0, "count": 24 }, "SoccerTwos.Step.sum": { "value": 239844.0, "min": 9118.0, "max": 239844.0, "count": 24 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.0005443634581752121, "min": -0.020596221089363098, "max": 0.04275628179311752, "count": 24 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.0076210880652070045, "min": -0.2693004012107849, "max": 0.5258617401123047, "count": 24 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.00023066645371727645, "min": -0.02258630283176899, "max": 0.042664770036935806, "count": 24 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -0.0032293302938342094, "min": -0.2920090854167938, "max": 0.49306783080101013, "count": 24 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 24 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 24 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.13174285633223398, "min": -0.5714285714285714, "max": 0.2171000043551127, "count": 24 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -1.8443999886512756, "min": -8.0, "max": 2.6052000522613525, "count": 24 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.13174285633223398, "min": -0.5714285714285714, "max": 0.2171000043551127, "count": 24 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -1.8443999886512756, "min": -8.0, "max": 2.6052000522613525, "count": 24 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 24 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 24 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.015392292079438145, "min": 0.012119114518282004, "max": 0.021384467855871964, "count": 10 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.015392292079438145, "min": 0.012119114518282004, "max": 0.021384467855871964, "count": 10 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0032874997705221176, "min": 0.0009290036687161773, "max": 0.004932885508363446, "count": 10 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0032874997705221176, "min": 0.0009290036687161773, "max": 0.004932885508363446, "count": 10 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.003340853110421449, "min": 0.0009316830431943703, "max": 0.004962614985803763, "count": 10 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.003340853110421449, "min": 0.0009316830431943703, "max": 0.004962614985803763, "count": 10 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 10 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 10 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 10 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 10 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 10 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 10 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1715539664", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\filiz\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.3.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1715540572" }, "total": 908.4570849000011, "count": 1, "self": 2.229777199914679, "children": { "run_training.setup": { "total": 0.21335080009885132, "count": 1, "self": 0.21335080009885132 }, "TrainerController.start_learning": { "total": 906.0139568999875, "count": 1, "self": 0.7225895945448428, "children": { "TrainerController._reset_env": { "total": 9.232590700034052, "count": 2, "self": 9.232590700034052 }, "TrainerController.advance": { "total": 895.67689510528, "count": 16151, "self": 0.9127869671210647, "children": { "env_step": { "total": 441.53616642113775, "count": 16151, "self": 329.4912768241484, "children": { "SubprocessEnvManager._take_step": { "total": 111.61974380165339, "count": 16151, "self": 4.349910187069327, "children": { "TorchPolicy.evaluate": { "total": 107.26983361458406, "count": 32102, "self": 107.26983361458406 } } }, "workers": { "total": 0.42514579533599317, "count": 16151, "self": 0.0, "children": { "worker_root": { "total": 870.9920613132417, "count": 16151, "is_parallel": true, "self": 621.0913708154112, "children": { "steps_from_proto": { "total": 0.005008700070902705, "count": 4, "is_parallel": true, "self": 0.0013208999298512936, "children": { "_process_rank_one_or_two_observation": { "total": 0.0036878001410514116, "count": 16, "is_parallel": true, "self": 0.0036878001410514116 } } }, "UnityEnvironment.step": { "total": 249.8956817977596, "count": 16151, "is_parallel": true, "self": 13.191661384887993, "children": { "UnityEnvironment._generate_step_input": { "total": 10.73485070397146, "count": 16151, "is_parallel": true, "self": 10.73485070397146 }, "communicator.exchange": { "total": 183.78361381031573, "count": 16151, "is_parallel": true, "self": 183.78361381031573 }, "steps_from_proto": { "total": 42.185555898584425, "count": 32302, "is_parallel": true, "self": 9.241883121198043, "children": { "_process_rank_one_or_two_observation": { "total": 32.94367277738638, "count": 129208, "is_parallel": true, "self": 32.94367277738638 } } } } } } } } } } }, "trainer_advance": { "total": 453.22794171702117, "count": 16151, "self": 5.210769797442481, "children": { "process_trajectory": { "total": 63.35066211945377, "count": 16151, "self": 63.35066211945377 }, "_update_policy": { "total": 384.6665098001249, "count": 11, "self": 60.40139749785885, "children": { "TorchPOCAOptimizer.update": { "total": 324.26511230226606, "count": 345, "self": 324.26511230226606 } } } } } } }, "trainer_threads": { "total": 9.00006853044033e-06, "count": 1, "self": 9.00006853044033e-06 }, "TrainerController._save_models": { "total": 0.3818725000601262, "count": 1, "self": 0.012093800120055676, "children": { "RLTrainer._checkpoint": { "total": 0.3697786999400705, "count": 1, "self": 0.3697786999400705 } } } } } } }