{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.516158938407898, "min": 1.448338270187378, "max": 3.2401530742645264, "count": 2127 }, "SoccerTwos.Policy.Entropy.sum": { "value": 29304.3203125, "min": 19510.81640625, "max": 126053.265625, "count": 2127 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 61.63291139240506, "min": 38.736, "max": 999.0, "count": 2127 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19476.0, "min": 10068.0, "max": 30996.0, "count": 2127 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1591.4299410886467, "min": 1191.1008011682009, "max": 1626.5368108872508, "count": 2098 }, "SoccerTwos.Self-play.ELO.sum": { "value": 251445.9306920062, "min": 2382.2016023364017, "max": 356234.31472028594, "count": 2098 }, "SoccerTwos.Step.mean": { "value": 21599978.0, "min": 329914.0, "max": 21599978.0, "count": 2128 }, "SoccerTwos.Step.sum": { "value": 21599978.0, "min": 329914.0, "max": 21599978.0, "count": 2128 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.033536143600940704, "min": -0.13744910061359406, "max": 0.1891539841890335, "count": 2128 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -5.298710823059082, "min": -23.503795623779297, "max": 25.52860450744629, "count": 2128 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.037157971411943436, "min": -0.1399516612291336, "max": 0.19412744045257568, "count": 2128 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -5.870959281921387, "min": -23.931734085083008, "max": 25.963455200195312, "count": 2128 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 2128 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 2128 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.10256202613251118, "min": -0.5714285714285714, "max": 0.443529404261533, "count": 2128 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -16.204800128936768, "min": -68.07479977607727, "max": 55.54260015487671, "count": 2128 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.10256202613251118, "min": -0.5714285714285714, "max": 0.443529404261533, "count": 2128 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -16.204800128936768, "min": -68.07479977607727, "max": 55.54260015487671, "count": 2128 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 2128 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 2128 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.016245158187424145, "min": 0.009610125252705378, "max": 0.02519673923185716, "count": 1028 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.016245158187424145, "min": 0.009610125252705378, "max": 0.02519673923185716, "count": 1028 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10199188242355982, "min": 5.646665780053203e-06, "max": 0.12414719959100087, "count": 1028 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10199188242355982, "min": 5.646665780053203e-06, "max": 0.12414719959100087, "count": 1028 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10414373353123665, "min": 5.847689984269285e-06, "max": 0.12693018863598507, "count": 1028 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10414373353123665, "min": 5.847689984269285e-06, "max": 0.12693018863598507, "count": 1028 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 1028 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 1028 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 1028 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 1028 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 1028 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 1028 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1700159813", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\D:\\miniconda\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --resume", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.1.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1700208708" }, "total": 48895.5697197, "count": 1, "self": 0.4110450000152923, "children": { "run_training.setup": { "total": 0.13796769999316894, "count": 1, "self": 0.13796769999316894 }, "TrainerController.start_learning": { "total": 48895.02070699999, "count": 1, "self": 23.887236402340932, "children": { "TrainerController._reset_env": { "total": 7.713660199922742, "count": 108, "self": 7.713660199922742 }, "TrainerController.advance": { "total": 48863.3042896977, "count": 1464850, "self": 22.73807657911675, "children": { "env_step": { "total": 17230.106904804707, "count": 1464850, "self": 13183.983838003565, "children": { "SubprocessEnvManager._take_step": { "total": 4031.84877231461, "count": 1464850, "self": 147.05162680894136, "children": { "TorchPolicy.evaluate": { "total": 3884.7971455056686, "count": 2677068, "self": 3884.7971455056686 } } }, "workers": { "total": 14.27429448653129, "count": 1464850, "self": 0.0, "children": { "worker_root": { "total": 48845.03136850911, "count": 1464850, "is_parallel": true, "self": 38487.16090651386, "children": { "steps_from_proto": { "total": 0.15183669986436144, "count": 216, "is_parallel": true, "self": 0.03025069975410588, "children": { "_process_rank_one_or_two_observation": { "total": 0.12158600011025555, "count": 864, "is_parallel": true, "self": 0.12158600011025555 } } }, "UnityEnvironment.step": { "total": 10357.718625295383, "count": 1464850, "is_parallel": true, "self": 560.9129239993636, "children": { "UnityEnvironment._generate_step_input": { "total": 507.1465280097036, "count": 1464850, "is_parallel": true, "self": 507.1465280097036 }, "communicator.exchange": { "total": 7548.055724506266, "count": 1464850, "is_parallel": true, "self": 7548.055724506266 }, "steps_from_proto": { "total": 1741.6034487800498, "count": 2929700, "is_parallel": true, "self": 347.34043145005126, "children": { "_process_rank_one_or_two_observation": { "total": 1394.2630173299985, "count": 11718800, "is_parallel": true, "self": 1394.2630173299985 } } } } } } } } } } }, "trainer_advance": { "total": 31610.459308313875, "count": 1464850, "self": 213.4314369407657, "children": { "process_trajectory": { "total": 5405.310427572869, "count": 1464850, "self": 5400.676771472936, "children": { "RLTrainer._checkpoint": { "total": 4.633656099933432, "count": 43, "self": 4.633656099933432 } } }, "_update_policy": { "total": 25991.71744380024, "count": 1029, "self": 2498.212324496766, "children": { "TorchPOCAOptimizer.update": { "total": 23493.505119303474, "count": 30877, "self": 23493.505119303474 } } } } } } }, "trainer_threads": { "total": 1.300009898841381e-06, "count": 1, "self": 1.300009898841381e-06 }, "TrainerController._save_models": { "total": 0.11551940001663752, "count": 1, "self": 0.008432500035269186, "children": { "RLTrainer._checkpoint": { "total": 0.10708689998136833, "count": 1, "self": 0.10708689998136833 } } } } } } }