{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.319329023361206, "min": 2.2426493167877197, "max": 2.546591281890869, "count": 846 }, "SoccerTwos.Policy.Entropy.sum": { "value": 47128.765625, "min": 27355.8984375, "max": 55454.2734375, "count": 846 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 52.858695652173914, "min": 43.234234234234236, "max": 96.23076923076923, "count": 846 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19452.0, "min": 9440.0, "max": 20892.0, "count": 846 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1597.3496499606313, "min": 1528.9726333877204, "max": 1622.7613583211412, "count": 846 }, "SoccerTwos.Self-play.ELO.sum": { "value": 293912.33559275616, "min": 132162.47816857527, "max": 354704.1019926546, "count": 846 }, "SoccerTwos.Step.mean": { "value": 26549973.0, "min": 18099990.0, "max": 26549973.0, "count": 846 }, "SoccerTwos.Step.sum": { "value": 26549973.0, "min": 18099990.0, "max": 26549973.0, "count": 846 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.020791135728359222, "min": -0.10922888666391373, "max": 0.09116566926240921, "count": 846 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -3.846360206604004, "min": -16.016483306884766, "max": 18.324298858642578, "count": 846 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.027282260358333588, "min": -0.11557411402463913, "max": 0.09426523000001907, "count": 846 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -5.047218322753906, "min": -16.411523818969727, "max": 18.947311401367188, "count": 846 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 846 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 846 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.0008000019434336069, "min": -0.3872473694776234, "max": 0.4122607398916174, "count": 846 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 0.14800035953521729, "min": -58.861600160598755, "max": 55.65519988536835, "count": 846 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.0008000019434336069, "min": -0.3872473694776234, "max": 0.4122607398916174, "count": 846 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 0.14800035953521729, "min": -58.861600160598755, "max": 55.65519988536835, "count": 846 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 846 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 846 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.015358561110770098, "min": 0.012239268567645923, "max": 0.024514615524094552, "count": 410 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.015358561110770098, "min": 0.012239268567645923, "max": 0.024514615524094552, "count": 410 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.07129455804824829, "min": 0.05349739296361804, "max": 0.0858179870992899, "count": 410 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.07129455804824829, "min": 0.05349739296361804, "max": 0.0858179870992899, "count": 410 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.07738243732601405, "min": 0.058256584126502274, "max": 0.09445315822958947, "count": 410 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.07738243732601405, "min": 0.058256584126502274, "max": 0.09445315822958947, "count": 410 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.00025, "min": 0.00025, "max": 0.00025, "count": 410 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.00025, "min": 0.00025, "max": 0.00025, "count": 410 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.15, "min": 0.15, "max": 0.15, "count": 410 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.15, "min": 0.15, "max": 0.15, "count": 410 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.0075, "min": 0.0075, "max": 0.0075, "count": 410 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.0075, "min": 0.0075, "max": 0.0075, "count": 410 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1680315414", "python_version": "3.9.16 (main, Feb 7 2023, 23:31:36) \n[Clang 13.1.6 (clang-1316.0.21.2.5)]", "command_line_arguments": "/Users/anka/Desktop/hf_rl_course_unit_7/venv/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos-x --no-graphics --resume", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.11.0", "numpy_version": "1.21.2", "end_time_seconds": "1680355127" }, "total": 39713.07136125, "count": 1, "self": 0.1786857499901089, "children": { "run_training.setup": { "total": 0.014558417000000046, "count": 1, "self": 0.014558417000000046 }, "TrainerController.start_learning": { "total": 39712.878117083004, "count": 1, "self": 7.381975717806199, "children": { "TrainerController._reset_env": { "total": 4.275026204978197, "count": 67, "self": 4.275026204978197 }, "TrainerController.advance": { "total": 39701.10095036921, "count": 586893, "self": 6.583793859623256, "children": { "env_step": { "total": 29611.540615212303, "count": 586893, "self": 28397.38233668174, "children": { "SubprocessEnvManager._take_step": { "total": 1209.8429209536998, "count": 586893, "self": 30.833679700648418, "children": { "TorchPolicy.evaluate": { "total": 1179.0092412530514, "count": 1063110, "self": 1179.0092412530514 } } }, "workers": { "total": 4.315357576863366, "count": 586892, "self": 0.0, "children": { "worker_root": { "total": 39700.37010287445, "count": 586892, "is_parallel": true, "self": 12010.099248116821, "children": { "steps_from_proto": { "total": 0.10787853601039021, "count": 134, "is_parallel": true, "self": 0.01206490508244995, "children": { "_process_rank_one_or_two_observation": { "total": 0.09581363092794026, "count": 536, "is_parallel": true, "self": 0.09581363092794026 } } }, "UnityEnvironment.step": { "total": 27690.162976221614, "count": 586892, "is_parallel": true, "self": 70.42657904663429, "children": { "UnityEnvironment._generate_step_input": { "total": 489.7488024688307, "count": 586892, "is_parallel": true, "self": 489.7488024688307 }, "communicator.exchange": { "total": 26163.053725194375, "count": 586892, "is_parallel": true, "self": 26163.053725194375 }, "steps_from_proto": { "total": 966.9338695117734, "count": 1173784, "is_parallel": true, "self": 105.80709412985425, "children": { "_process_rank_one_or_two_observation": { "total": 861.1267753819192, "count": 4695136, "is_parallel": true, "self": 861.1267753819192 } } } } } } } } } } }, "trainer_advance": { "total": 10082.976541297285, "count": 586892, "self": 51.728877312485565, "children": { "process_trajectory": { "total": 1932.552331481859, "count": 586892, "self": 1930.6788891058588, "children": { "RLTrainer._checkpoint": { "total": 1.8734423760001846, "count": 17, "self": 1.8734423760001846 } } }, "_update_policy": { "total": 8098.695332502941, "count": 410, "self": 1009.7272583499107, "children": { "TorchPOCAOptimizer.update": { "total": 7088.96807415303, "count": 16400, "self": 7088.96807415303 } } } } } } }, "trainer_threads": { "total": 5.410038284026086e-07, "count": 1, "self": 5.410038284026086e-07 }, "TrainerController._save_models": { "total": 0.12016425000183517, "count": 1, "self": 0.0013245840018498711, "children": { "RLTrainer._checkpoint": { "total": 0.1188396659999853, "count": 1, "self": 0.1188396659999853 } } } } } } }