{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8532294034957886, "min": 1.7865139245986938, "max": 3.295811414718628, "count": 5000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 38487.8671875, "min": 5800.6279296875, "max": 115795.28125, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 65.48, "min": 43.442477876106196, "max": 999.0, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19644.0, "min": 13432.0, "max": 30056.0, "count": 5000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1671.0176464921628, "min": 1195.1253736477672, "max": 1764.9751663169422, "count": 4968 }, "SoccerTwos.Self-play.ELO.sum": { "value": 250652.64697382442, "min": 2391.7283571473363, "max": 381643.94451162743, "count": 4968 }, "SoccerTwos.Step.mean": { "value": 49999920.0, "min": 9046.0, "max": 49999920.0, "count": 5000 }, "SoccerTwos.Step.sum": { "value": 49999920.0, "min": 9046.0, "max": 49999920.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.03229290619492531, "min": -0.14206135272979736, "max": 0.1776188760995865, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -4.8762288093566895, "min": -20.314773559570312, "max": 28.028095245361328, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.03124230168759823, "min": -0.14606380462646484, "max": 0.1839049607515335, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -4.717587471008301, "min": -20.88712501525879, "max": 27.312236785888672, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.005737748367107467, "min": -0.5714285714285714, "max": 0.6006048743317767, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 0.8664000034332275, "min": -66.25439995527267, "max": 66.174800157547, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.005737748367107467, "min": -0.5714285714285714, "max": 0.6006048743317767, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 0.8664000034332275, "min": -66.25439995527267, "max": 66.174800157547, "count": 5000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.011845738316090622, "min": 0.007440140519611305, "max": 0.018005548412475036, "count": 1216 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.011845738316090622, "min": 0.007440140519611305, "max": 0.018005548412475036, "count": 1216 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.09918897251288096, "min": 2.523424664104823e-05, "max": 0.1382937341928482, "count": 1216 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.09918897251288096, "min": 2.523424664104823e-05, "max": 0.1382937341928482, "count": 1216 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10142053862412771, "min": 2.476414823225544e-05, "max": 0.1417475367585818, "count": 1216 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10142053862412771, "min": 2.476414823225544e-05, "max": 0.1417475367585818, "count": 1216 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 1216 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 1216 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.15000000000000002, "min": 0.15000000000000002, "max": 0.15000000000000002, "count": 1216 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.15000000000000002, "min": 0.15000000000000002, "max": 0.15000000000000002, "count": 1216 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 1216 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 1216 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1675622187", "python_version": "3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\ramon\\anaconda3\\envs\\rl_football_gpu\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwosGpu50M --no-graphics --torch-device=cuda", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.13.1+cu117", "numpy_version": "1.21.2", "end_time_seconds": "1676229765" }, "total": 607578.1839447999, "count": 1, "self": 6.685378099908121, "children": { "run_training.setup": { "total": 0.6113058999999978, "count": 1, "self": 0.6113058999999978 }, "TrainerController.start_learning": { "total": 607570.8872608, "count": 1, "self": 423.8880830210401, "children": { "TrainerController._reset_env": { "total": 70.75092309986466, "count": 143, "self": 70.75092309986466 }, "TrainerController.advance": { "total": 607074.5658364792, "count": 3426394, "self": 455.34990817680955, "children": { "env_step": { "total": 443559.17826784507, "count": 3426394, "self": 282288.46389600344, "children": { "SubprocessEnvManager._take_step": { "total": 160998.49036457582, "count": 3426394, "self": 2865.982247389882, "children": { "TorchPolicy.evaluate": { "total": 158132.50811718593, "count": 6269072, "self": 158132.50811718593 } } }, "workers": { "total": 272.2240072658506, "count": 3426394, "self": 0.0, "children": { "worker_root": { "total": 606947.3217270984, "count": 3426394, "is_parallel": true, "self": 381055.083762191, "children": { "steps_from_proto": { "total": 1.6658315005754716, "count": 286, "is_parallel": true, "self": 0.34884800013656303, "children": { "_process_rank_one_or_two_observation": { "total": 1.3169835004389085, "count": 1144, "is_parallel": true, "self": 1.3169835004389085 } } }, "UnityEnvironment.step": { "total": 225890.57213340682, "count": 3426394, "is_parallel": true, "self": 11142.004610689182, "children": { "UnityEnvironment._generate_step_input": { "total": 9678.473195383567, "count": 3426394, "is_parallel": true, "self": 9678.473195383567 }, "communicator.exchange": { "total": 165653.08175009323, "count": 3426394, "is_parallel": true, "self": 165653.08175009323 }, "steps_from_proto": { "total": 39417.01257724084, "count": 6852788, "is_parallel": true, "self": 8358.596127365636, "children": { "_process_rank_one_or_two_observation": { "total": 31058.416449875207, "count": 27411152, "is_parallel": true, "self": 31058.416449875207 } } } } } } } } } } }, "trainer_advance": { "total": 163060.03766045737, "count": 3426394, "self": 2095.0413615892176, "children": { "process_trajectory": { "total": 78211.21985016807, "count": 3426394, "self": 78086.00886026803, "children": { "RLTrainer._checkpoint": { "total": 125.21098990003793, "count": 100, "self": 125.21098990003793 } } }, "_update_policy": { "total": 82753.7764487001, "count": 1216, "self": 57088.19903259386, "children": { "TorchPOCAOptimizer.update": { "total": 25665.577416106236, "count": 36480, "self": 25665.577416106236 } } } } } } }, "trainer_threads": { "total": 7.00005330145359e-06, "count": 1, "self": 7.00005330145359e-06 }, "TrainerController._save_models": { "total": 1.682411199901253, "count": 1, "self": 0.3388715998735279, "children": { "RLTrainer._checkpoint": { "total": 1.343539600027725, "count": 1, "self": 1.343539600027725 } } } } } } }