{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8351835012435913, "min": 1.7619726657867432, "max": 3.2957541942596436, "count": 943 }, "SoccerTwos.Policy.Entropy.sum": { "value": 39170.15625, "min": 10430.99609375, "max": 125923.859375, "count": 943 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 56.27777777777778, "min": 42.424778761061944, "max": 999.0, "count": 943 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20260.0, "min": 11056.0, "max": 31052.0, "count": 943 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1548.496950492456, "min": 1180.1236578844307, "max": 1565.0653219843734, "count": 899 }, "SoccerTwos.Self-play.ELO.sum": { "value": 278729.4510886421, "min": 2360.2473157688614, "max": 344267.333247176, "count": 899 }, "SoccerTwos.Step.mean": { "value": 9429998.0, "min": 9542.0, "max": 9429998.0, "count": 943 }, "SoccerTwos.Step.sum": { "value": 9429998.0, "min": 9542.0, "max": 9429998.0, "count": 943 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.09211795777082443, "min": -0.13512776792049408, "max": 0.24034865200519562, "count": 943 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -16.58123207092285, "min": -24.322998046875, "max": 38.74200439453125, "count": 943 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.08809732645750046, "min": -0.13800619542598724, "max": 0.2362421452999115, "count": 943 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -15.857519149780273, "min": -24.841114044189453, "max": 39.339271545410156, "count": 943 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 943 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 943 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.014417781432469686, "min": -0.5714285714285714, "max": 0.6048038452863693, "count": 943 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 2.5952006578445435, "min": -68.30320000648499, "max": 92.59899979829788, "count": 943 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.014417781432469686, "min": -0.5714285714285714, "max": 0.6048038452863693, "count": 943 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 2.5952006578445435, "min": -68.30320000648499, "max": 92.59899979829788, "count": 943 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 943 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 943 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.013937272881351721, "min": 0.009750445959313462, "max": 0.024288216698914768, "count": 450 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.013937272881351721, "min": 0.009750445959313462, "max": 0.024288216698914768, "count": 450 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.11118999322255453, "min": 1.778935893526068e-05, "max": 0.12314764087398847, "count": 450 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.11118999322255453, "min": 1.778935893526068e-05, "max": 0.12314764087398847, "count": 450 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.11287796373168628, "min": 1.7486079195805358e-05, "max": 0.12482764149705569, "count": 450 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.11287796373168628, "min": 1.7486079195805358e-05, "max": 0.12482764149705569, "count": 450 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 450 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 450 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 450 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 450 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 450 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 450 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1716338528", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\zakyz\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.3.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1716378319" }, "total": 39793.9377009999, "count": 1, "self": 0.8665592996403575, "children": { "run_training.setup": { "total": 0.2729110000655055, "count": 1, "self": 0.2729110000655055 }, "TrainerController.start_learning": { "total": 39792.798230700195, "count": 1, "self": 20.20682390173897, "children": { "TrainerController._reset_env": { "total": 23.2952534975484, "count": 32, "self": 23.2952534975484 }, "TrainerController.advance": { "total": 39749.05335520068, "count": 634972, "self": 20.16771548707038, "children": { "env_step": { "total": 15679.14878873853, "count": 634972, "self": 12152.196535510942, "children": { "SubprocessEnvManager._take_step": { "total": 3513.7960403021425, "count": 634972, "self": 138.55363113852218, "children": { "TorchPolicy.evaluate": { "total": 3375.2424091636203, "count": 1190114, "self": 3375.2424091636203 } } }, "workers": { "total": 13.156212925445288, "count": 634972, "self": 0.0, "children": { "worker_root": { "total": 39725.1944738701, "count": 634972, "is_parallel": true, "self": 30145.001811450347, "children": { "steps_from_proto": { "total": 0.08248470118269324, "count": 64, "is_parallel": true, "self": 0.015484603121876717, "children": { "_process_rank_one_or_two_observation": { "total": 0.06700009806081653, "count": 256, "is_parallel": true, "self": 0.06700009806081653 } } }, "UnityEnvironment.step": { "total": 9580.110177718569, "count": 634972, "is_parallel": true, "self": 527.6923948265612, "children": { "UnityEnvironment._generate_step_input": { "total": 506.2595134936273, "count": 634972, "is_parallel": true, "self": 506.2595134936273 }, "communicator.exchange": { "total": 6880.880738124717, "count": 634972, "is_parallel": true, "self": 6880.880738124717 }, "steps_from_proto": { "total": 1665.277531273663, "count": 1269944, "is_parallel": true, "self": 313.94569505471736, "children": { "_process_rank_one_or_two_observation": { "total": 1351.3318362189457, "count": 5079776, "is_parallel": true, "self": 1351.3318362189457 } } } } } } } } } } }, "trainer_advance": { "total": 24049.73685097508, "count": 634972, "self": 149.7493197284639, "children": { "process_trajectory": { "total": 3702.828004044015, "count": 634972, "self": 3699.636707644444, "children": { "RLTrainer._checkpoint": { "total": 3.1912963995710015, "count": 18, "self": 3.1912963995710015 } } }, "_update_policy": { "total": 20197.159527202602, "count": 451, "self": 1883.811665987596, "children": { "TorchPOCAOptimizer.update": { "total": 18313.347861215007, "count": 13520, "self": 18313.347861215007 } } } } } } }, "trainer_threads": { "total": 1.80024653673172e-06, "count": 1, "self": 1.80024653673172e-06 }, "TrainerController._save_models": { "total": 0.2427962999790907, "count": 1, "self": 0.05353789962828159, "children": { "RLTrainer._checkpoint": { "total": 0.1892584003508091, "count": 1, "self": 0.1892584003508091 } } } } } } }