{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.4993818998336792, "min": 1.4761028289794922, "max": 3.2957253456115723, "count": 1503 }, "SoccerTwos.Policy.Entropy.sum": { "value": 33394.234375, "min": 8805.59765625, "max": 137453.59375, "count": 1503 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 56.67816091954023, "min": 36.28148148148148, "max": 999.0, "count": 1503 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19724.0, "min": 11036.0, "max": 29364.0, "count": 1503 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1569.5431821869038, "min": 1172.6436559699075, "max": 1580.7630775319462, "count": 1348 }, "SoccerTwos.Self-play.ELO.sum": { "value": 273100.51370052126, "min": 2348.8145162520595, "max": 412056.54571461055, "count": 1348 }, "SoccerTwos.Step.mean": { "value": 15029797.0, "min": 9698.0, "max": 15029797.0, "count": 1503 }, "SoccerTwos.Step.sum": { "value": 15029797.0, "min": 9698.0, "max": 15029797.0, "count": 1503 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.0333280973136425, "min": -0.13251015543937683, "max": 0.16841717064380646, "count": 1503 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 5.799088954925537, "min": -31.934947967529297, "max": 31.200580596923828, "count": 1503 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.027952956035733223, "min": -0.13822214305400848, "max": 0.17876125872135162, "count": 1503 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 4.863814353942871, "min": -33.3115348815918, "max": 31.62996482849121, "count": 1503 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1503 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1503 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.0699563214833709, "min": -0.75, "max": 0.47833939541526366, "count": 1503 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 12.172399938106537, "min": -62.77059978246689, "max": 53.74360013008118, "count": 1503 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.0699563214833709, "min": -0.75, "max": 0.47833939541526366, "count": 1503 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 12.172399938106537, "min": -62.77059978246689, "max": 53.74360013008118, "count": 1503 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1503 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1503 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.015542845497839153, "min": 0.009906640709959901, "max": 0.02692531718251606, "count": 720 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.015542845497839153, "min": 0.009906640709959901, "max": 0.02692531718251606, "count": 720 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10501614883542061, "min": 3.3772016353831687e-08, "max": 0.13716114684939384, "count": 720 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10501614883542061, "min": 3.3772016353831687e-08, "max": 0.13716114684939384, "count": 720 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10641206701596578, "min": 3.6074097190900524e-08, "max": 0.1394977351029714, "count": 720 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10641206701596578, "min": 3.6074097190900524e-08, "max": 0.1394977351029714, "count": 720 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 720 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 720 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 720 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 720 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 720 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 720 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1686831289", "python_version": "3.9.16 (main, May 17 2023, 17:49:16) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\user\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --force", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.0.1+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1687020767" }, "total": 189478.32581049998, "count": 1, "self": 10.097653099976014, "children": { "run_training.setup": { "total": 0.2547436000000003, "count": 1, "self": 0.2547436000000003 }, "TrainerController.start_learning": { "total": 189467.9734138, "count": 1, "self": 80.08307909406722, "children": { "TrainerController._reset_env": { "total": 14.846915599920031, "count": 76, "self": 14.846915599920031 }, "TrainerController.advance": { "total": 189370.691320906, "count": 1030100, "self": 91.2220391977462, "children": { "env_step": { "total": 68703.11599500524, "count": 1030100, "self": 54689.35356180647, "children": { "SubprocessEnvManager._take_step": { "total": 13965.146060000296, "count": 1030100, "self": 513.0806386912063, "children": { "TorchPolicy.evaluate": { "total": 13452.06542130909, "count": 1901394, "self": 13452.06542130909 } } }, "workers": { "total": 48.61637319847291, "count": 1030100, "self": 0.0, "children": { "worker_root": { "total": 189327.458764, "count": 1030100, "is_parallel": true, "self": 144562.9227191959, "children": { "steps_from_proto": { "total": 0.514742100045626, "count": 152, "is_parallel": true, "self": 0.11642250034527013, "children": { "_process_rank_one_or_two_observation": { "total": 0.3983195997003559, "count": 608, "is_parallel": true, "self": 0.3983195997003559 } } }, "UnityEnvironment.step": { "total": 44764.02130270407, "count": 1030100, "is_parallel": true, "self": 2582.350405013014, "children": { "UnityEnvironment._generate_step_input": { "total": 2368.1888878930604, "count": 1030100, "is_parallel": true, "self": 2368.1888878930604 }, "communicator.exchange": { "total": 31451.65345590269, "count": 1030100, "is_parallel": true, "self": 31451.65345590269 }, "steps_from_proto": { "total": 8361.828553895308, "count": 2060200, "is_parallel": true, "self": 1773.611002981178, "children": { "_process_rank_one_or_two_observation": { "total": 6588.21755091413, "count": 8240800, "is_parallel": true, "self": 6588.21755091413 } } } } } } } } } } }, "trainer_advance": { "total": 120576.35328670302, "count": 1030099, "self": 524.5858657971839, "children": { "process_trajectory": { "total": 16936.068953705922, "count": 1030099, "self": 16885.13393480595, "children": { "RLTrainer._checkpoint": { "total": 50.93501889997515, "count": 30, "self": 50.93501889997515 } } }, "_update_policy": { "total": 103115.69846719991, "count": 721, "self": 8654.884043800528, "children": { "TorchPOCAOptimizer.update": { "total": 94460.81442339938, "count": 21639, "self": 94460.81442339938 } } } } } } }, "trainer_threads": { "total": 5.1000097300857306e-06, "count": 1, "self": 5.1000097300857306e-06 }, "TrainerController._save_models": { "total": 2.3520931000239216, "count": 1, "self": 1.230845500016585, "children": { "RLTrainer._checkpoint": { "total": 1.1212476000073366, "count": 1, "self": 1.1212476000073366 } } } } } } }