{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.9030113220214844, "min": 2.0336380004882812, "max": 3.295714855194092, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 36508.26953125, "min": 11147.744140625, "max": 182223.890625, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 999.0, "min": 409.3, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19980.0, "min": 10548.0, "max": 29564.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1182.4339183981829, "min": 1178.4425282934696, "max": 1198.7535925691843, "count": 93 }, "SoccerTwos.Self-play.ELO.sum": { "value": 2364.8678367963657, "min": 2356.8850565869393, "max": 9535.565921712896, "count": 93 }, "SoccerTwos.Step.mean": { "value": 4999014.0, "min": 9798.0, "max": 4999014.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999014.0, "min": 9798.0, "max": 4999014.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.0064850314520299435, "min": -0.0413593128323555, "max": 0.023214448243379593, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 0.06485031545162201, "min": -0.6596869230270386, "max": 0.29968172311782837, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.0056223622523248196, "min": -0.04110932722687721, "max": 0.025155019015073776, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 0.05622362345457077, "min": -0.6306700110435486, "max": 0.29314225912094116, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.1, "min": -0.4166666666666667, "max": 0.17608333627382913, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -1.0, "min": -5.0, "max": 2.1130000352859497, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.1, "min": -0.4166666666666667, "max": 0.17608333627382913, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -1.0, "min": -5.0, "max": 2.1130000352859497, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01572269879591962, "min": 0.009883445129768612, "max": 0.025067056366242468, "count": 229 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01572269879591962, "min": 0.009883445129768612, "max": 0.025067056366242468, "count": 229 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0007847993865046494, "min": 2.3100934424628387e-09, "max": 0.0047873390295232335, "count": 229 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0007847993865046494, "min": 2.3100934424628387e-09, "max": 0.0047873390295232335, "count": 229 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.0007887554895811869, "min": 2.9675129538849393e-09, "max": 0.004578714010616144, "count": 229 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.0007887554895811869, "min": 2.9675129538849393e-09, "max": 0.004578714010616144, "count": 229 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 229 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 229 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 229 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 229 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 229 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 229 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1689515243", "python_version": "3.9.17 (main, Jul 5 2023, 20:47:11) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\sunfi\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.0.1+cpu", "numpy_version": "1.19.5", "end_time_seconds": "1689534452" }, "total": 19208.5823395, "count": 1, "self": 0.8866258000016387, "children": { "run_training.setup": { "total": 0.21185169999999953, "count": 1, "self": 0.21185169999999953 }, "TrainerController.start_learning": { "total": 19207.483861999997, "count": 1, "self": 10.57180819995483, "children": { "TrainerController._reset_env": { "total": 17.462689599991908, "count": 50, "self": 17.462689599991908 }, "TrainerController.advance": { "total": 19179.25673830005, "count": 344275, "self": 11.360243499417265, "children": { "env_step": { "total": 9463.100118600454, "count": 344275, "self": 7612.4956442001685, "children": { "SubprocessEnvManager._take_step": { "total": 1843.4619058000512, "count": 344275, "self": 69.18349509872951, "children": { "TorchPolicy.evaluate": { "total": 1774.2784107013217, "count": 684270, "self": 1774.2784107013217 } } }, "workers": { "total": 7.14256860023416, "count": 344275, "self": 0.0, "children": { "worker_root": { "total": 19178.977480199846, "count": 344275, "is_parallel": true, "self": 12998.933225298833, "children": { "steps_from_proto": { "total": 0.1853417000062425, "count": 100, "is_parallel": true, "self": 0.02571290000403792, "children": { "_process_rank_one_or_two_observation": { "total": 0.15962880000220458, "count": 400, "is_parallel": true, "self": 0.15962880000220458 } } }, "UnityEnvironment.step": { "total": 6179.8589132010065, "count": 344275, "is_parallel": true, "self": 281.6747465000899, "children": { "UnityEnvironment._generate_step_input": { "total": 264.9148281006053, "count": 344275, "is_parallel": true, "self": 264.9148281006053 }, "communicator.exchange": { "total": 4183.226496600134, "count": 344275, "is_parallel": true, "self": 4183.226496600134 }, "steps_from_proto": { "total": 1450.0428420001772, "count": 688550, "is_parallel": true, "self": 193.88153099756482, "children": { "_process_rank_one_or_two_observation": { "total": 1256.1613110026124, "count": 2754200, "is_parallel": true, "self": 1256.1613110026124 } } } } } } } } } } }, "trainer_advance": { "total": 9704.79637620018, "count": 344275, "self": 72.86080160026177, "children": { "process_trajectory": { "total": 1373.3680769999087, "count": 344275, "self": 1371.400627499907, "children": { "RLTrainer._checkpoint": { "total": 1.9674495000017487, "count": 10, "self": 1.9674495000017487 } } }, "_update_policy": { "total": 8258.56749760001, "count": 229, "self": 1254.9338376999676, "children": { "TorchPOCAOptimizer.update": { "total": 7003.633659900042, "count": 6873, "self": 7003.633659900042 } } } } } } }, "trainer_threads": { "total": 1.0000003385357559e-06, "count": 1, "self": 1.0000003385357559e-06 }, "TrainerController._save_models": { "total": 0.1926248999989184, "count": 1, "self": 0.007822600000508828, "children": { "RLTrainer._checkpoint": { "total": 0.18480229999840958, "count": 1, "self": 0.18480229999840958 } } } } } } }