{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.4635698795318604, "min": 2.446720838546753, "max": 3.295762777328491, "count": 538 }, "SoccerTwos.Policy.Entropy.sum": { "value": 45802.69140625, "min": 16417.015625, "max": 117394.859375, "count": 538 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 73.38235294117646, "min": 58.57142857142857, "max": 999.0, "count": 538 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19960.0, "min": 16060.0, "max": 23728.0, "count": 538 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1443.1961367673673, "min": 1180.0585366420014, "max": 1443.1961367673673, "count": 381 }, "SoccerTwos.Self-play.ELO.sum": { "value": 196274.67460036196, "min": 2360.117073284003, "max": 235878.3833372058, "count": 381 }, "SoccerTwos.Step.mean": { "value": 5379836.0, "min": 9730.0, "max": 5379836.0, "count": 538 }, "SoccerTwos.Step.sum": { "value": 5379836.0, "min": 9730.0, "max": 5379836.0, "count": 538 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.06327828019857407, "min": -0.033435847610235214, "max": 0.17925363779067993, "count": 538 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 8.54256820678711, "min": -4.279788494110107, "max": 21.68968963623047, "count": 538 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.0657651424407959, "min": -0.039436567574739456, "max": 0.17373915016651154, "count": 538 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 8.878293991088867, "min": -5.04788064956665, "max": 21.022438049316406, "count": 538 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 538 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 538 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.11454221849088315, "min": -0.6784666644202338, "max": 0.4112533370653788, "count": 538 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 15.463199496269226, "min": -32.52720022201538, "max": 49.219600200653076, "count": 538 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.11454221849088315, "min": -0.6784666644202338, "max": 0.4112533370653788, "count": 538 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 15.463199496269226, "min": -32.52720022201538, "max": 49.219600200653076, "count": 538 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 538 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 538 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01657080788863823, "min": 0.010172825842164456, "max": 0.023306229035370052, "count": 251 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01657080788863823, "min": 0.010172825842164456, "max": 0.023306229035370052, "count": 251 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.08066019043326378, "min": 1.6107967966879262e-08, "max": 0.0869602623085181, "count": 251 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.08066019043326378, "min": 1.6107967966879262e-08, "max": 0.0869602623085181, "count": 251 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.08196957533558209, "min": 1.901390097221641e-08, "max": 0.0884813460210959, "count": 251 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.08196957533558209, "min": 1.901390097221641e-08, "max": 0.0884813460210959, "count": 251 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 251 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 251 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 251 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 251 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 251 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 251 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1707768727", "python_version": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr 5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\boyima\\Documents\\source\\projects\\huggingface_reinforcement_learning\\unit-7-multi-agents-RL\\.venv-310\\Scripts\\mlagents-learn .\\config\\poca\\SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1707778860" }, "total": 10133.190361400135, "count": 1, "self": 0.4750309002120048, "children": { "run_training.setup": { "total": 0.18059429991990328, "count": 1, "self": 0.18059429991990328 }, "TrainerController.start_learning": { "total": 10132.534736200003, "count": 1, "self": 6.037219664081931, "children": { "TrainerController._reset_env": { "total": 7.811581298941746, "count": 27, "self": 7.811581298941746 }, "TrainerController.advance": { "total": 10118.557489536935, "count": 351746, "self": 6.191700093680993, "children": { "env_step": { "total": 4697.140038458165, "count": 351746, "self": 3669.804710606113, "children": { "SubprocessEnvManager._take_step": { "total": 1023.3092603802215, "count": 351746, "self": 39.312477950239554, "children": { "TorchPolicy.evaluate": { "total": 983.996782429982, "count": 691996, "self": 983.996782429982 } } }, "workers": { "total": 4.026067471830174, "count": 351746, "self": 0.0, "children": { "worker_root": { "total": 10113.101719118422, "count": 351746, "is_parallel": true, "self": 7177.556545335101, "children": { "steps_from_proto": { "total": 0.032745200442150235, "count": 54, "is_parallel": true, "self": 0.0067990003153681755, "children": { "_process_rank_one_or_two_observation": { "total": 0.02594620012678206, "count": 216, "is_parallel": true, "self": 0.02594620012678206 } } }, "UnityEnvironment.step": { "total": 2935.512428582879, "count": 351746, "is_parallel": true, "self": 141.4528527527582, "children": { "UnityEnvironment._generate_step_input": { "total": 124.9565692790784, "count": 351746, "is_parallel": true, "self": 124.9565692790784 }, "communicator.exchange": { "total": 2254.9616139177233, "count": 351746, "is_parallel": true, "self": 2254.9616139177233 }, "steps_from_proto": { "total": 414.1413926333189, "count": 703492, "is_parallel": true, "self": 83.2990423021838, "children": { "_process_rank_one_or_two_observation": { "total": 330.8423503311351, "count": 2813968, "is_parallel": true, "self": 330.8423503311351 } } } } } } } } } } }, "trainer_advance": { "total": 5415.22575098509, "count": 351746, "self": 57.044643357396126, "children": { "process_trajectory": { "total": 743.4585713285487, "count": 351746, "self": 742.381257928675, "children": { "RLTrainer._checkpoint": { "total": 1.077313399873674, "count": 10, "self": 1.077313399873674 } } }, "_update_policy": { "total": 4614.722536299145, "count": 252, "self": 563.7134744019713, "children": { "TorchPOCAOptimizer.update": { "total": 4051.0090618971735, "count": 7537, "self": 4051.0090618971735 } } } } } } }, "trainer_threads": { "total": 1.400010660290718e-06, "count": 1, "self": 1.400010660290718e-06 }, "TrainerController._save_models": { "total": 0.1284443000331521, "count": 1, "self": 0.011575200129300356, "children": { "RLTrainer._checkpoint": { "total": 0.11686909990385175, "count": 1, "self": 0.11686909990385175 } } } } } } }