{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.950676441192627, "min": 2.9117202758789062, "max": 3.295745372772217, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 57691.625, "min": 13246.7890625, "max": 122224.5859375, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 143.61111111111111, "min": 112.06666666666666, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20680.0, "min": 13536.0, "max": 28884.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1302.7872894631637, "min": 1192.066323604376, "max": 1305.7186883267184, "count": 388 }, "SoccerTwos.Self-play.ELO.sum": { "value": 93800.68484134779, "min": 2384.132647208752, "max": 117325.9542187593, "count": 388 }, "SoccerTwos.Step.mean": { "value": 4999630.0, "min": 9458.0, "max": 4999630.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999630.0, "min": 9458.0, "max": 4999630.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.022456247359514236, "min": -0.045314107090234756, "max": 0.09331739693880081, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 1.6168497800827026, "min": -0.7702248692512512, "max": 3.919330596923828, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.022108759731054306, "min": -0.04532609134912491, "max": 0.0941377505660057, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 1.5918307304382324, "min": -0.7703975439071655, "max": 3.9537856578826904, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.00044444368945227727, "min": -0.5662444432576498, "max": 0.4368666609128316, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 0.031999945640563965, "min": -11.32039999961853, "max": 19.597599804401398, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.00044444368945227727, "min": -0.5662444432576498, "max": 0.4368666609128316, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 0.031999945640563965, "min": -11.32039999961853, "max": 19.597599804401398, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01807358275691513, "min": 0.010362477377445127, "max": 0.02287560102219383, "count": 233 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01807358275691513, "min": 0.010362477377445127, "max": 0.02287560102219383, "count": 233 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.043964517613252006, "min": 1.3621115044770704e-07, "max": 0.043964517613252006, "count": 233 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.043964517613252006, "min": 1.3621115044770704e-07, "max": 0.043964517613252006, "count": 233 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.04480604628721873, "min": 1.5980604146648146e-07, "max": 0.04480604628721873, "count": 233 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.04480604628721873, "min": 1.5980604146648146e-07, "max": 0.04480604628721873, "count": 233 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 233 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 233 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 233 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 233 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 233 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 233 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1707750121", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\romua\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1707772965" }, "total": 22843.73997580004, "count": 1, "self": 0.5188202001154423, "children": { "run_training.setup": { "total": 0.14712119987234473, "count": 1, "self": 0.14712119987234473 }, "TrainerController.start_learning": { "total": 22843.07403440005, "count": 1, "self": 10.158057366497815, "children": { "TrainerController._reset_env": { "total": 9.921969200950116, "count": 25, "self": 9.921969200950116 }, "TrainerController.advance": { "total": 22822.824496632675, "count": 324740, "self": 10.456424907781184, "children": { "env_step": { "total": 7768.179501714418, "count": 324740, "self": 5913.668538457248, "children": { "SubprocessEnvManager._take_step": { "total": 1847.7423227129038, "count": 324740, "self": 63.068200608482584, "children": { "TorchPolicy.evaluate": { "total": 1784.6741221044213, "count": 643580, "self": 1784.6741221044213 } } }, "workers": { "total": 6.768640544265509, "count": 324740, "self": 0.0, "children": { "worker_root": { "total": 22820.900319712004, "count": 324740, "is_parallel": true, "self": 18283.300610938575, "children": { "steps_from_proto": { "total": 0.06490549840964377, "count": 50, "is_parallel": true, "self": 0.013156597735360265, "children": { "_process_rank_one_or_two_observation": { "total": 0.051748900674283504, "count": 200, "is_parallel": true, "self": 0.051748900674283504 } } }, "UnityEnvironment.step": { "total": 4537.534803275019, "count": 324740, "is_parallel": true, "self": 260.1497614134569, "children": { "UnityEnvironment._generate_step_input": { "total": 242.45066723087803, "count": 324740, "is_parallel": true, "self": 242.45066723087803 }, "communicator.exchange": { "total": 3196.430697218515, "count": 324740, "is_parallel": true, "self": 3196.430697218515 }, "steps_from_proto": { "total": 838.503677412169, "count": 649480, "is_parallel": true, "self": 169.40999462315813, "children": { "_process_rank_one_or_two_observation": { "total": 669.0936827890109, "count": 2597920, "is_parallel": true, "self": 669.0936827890109 } } } } } } } } } } }, "trainer_advance": { "total": 15044.188570010476, "count": 324740, "self": 80.26394582935609, "children": { "process_trajectory": { "total": 1937.9987270799465, "count": 324740, "self": 1936.1144363800995, "children": { "RLTrainer._checkpoint": { "total": 1.8842906998470426, "count": 10, "self": 1.8842906998470426 } } }, "_update_policy": { "total": 13025.925897101173, "count": 233, "self": 1028.4865490051452, "children": { "TorchPOCAOptimizer.update": { "total": 11997.439348096028, "count": 6990, "self": 11997.439348096028 } } } } } } }, "trainer_threads": { "total": 1.2998934835195541e-06, "count": 1, "self": 1.2998934835195541e-06 }, "TrainerController._save_models": { "total": 0.1695099000353366, "count": 1, "self": 0.002812399994581938, "children": { "RLTrainer._checkpoint": { "total": 0.16669750004075468, "count": 1, "self": 0.16669750004075468 } } } } } } }