{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 3.208897352218628, "min": 3.167426347732544, "max": 3.295755386352539, "count": 50 }, "SoccerTwos.Policy.Entropy.sum": { "value": 72084.671875, "min": 3157.7578125, "max": 105464.1640625, "count": 50 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 723.8333333333334, "min": 400.5833333333333, "max": 999.0, "count": 50 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 17372.0, "min": 12368.0, "max": 28232.0, "count": 50 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1215.3275069722, "min": 1198.0140976835446, "max": 1216.0444001507303, "count": 46 }, "SoccerTwos.Self-play.ELO.sum": { "value": 9722.6200557776, "min": 2396.5239063568742, "max": 19400.876835675528, "count": 46 }, "SoccerTwos.Step.mean": { "value": 499466.0, "min": 9138.0, "max": 499466.0, "count": 50 }, "SoccerTwos.Step.sum": { "value": 499466.0, "min": 9138.0, "max": 499466.0, "count": 50 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.0007608662126585841, "min": -0.08160314708948135, "max": 0.0031201052479445934, "count": 50 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.009891260415315628, "min": -1.3871639966964722, "max": 0.03445547819137573, "count": 50 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.0007399116875603795, "min": -0.08157574385404587, "max": 0.0027977179270237684, "count": 50 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -0.009618852287530899, "min": -1.3866405487060547, "max": 0.03021223656833172, "count": 50 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 50 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 50 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.32898461532134277, "min": -0.4532631559199409, "max": 0.32513333757718405, "count": 50 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -4.276799999177456, "min": -8.611999962478876, "max": 7.261200055480003, "count": 50 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.32898461532134277, "min": -0.4532631559199409, "max": 0.32513333757718405, "count": 50 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -4.276799999177456, "min": -8.611999962478876, "max": 7.261200055480003, "count": 50 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 50 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 50 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.016160426136048046, "min": 0.013953804352786392, "max": 0.02250218946040453, "count": 23 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.016160426136048046, "min": 0.013953804352786392, "max": 0.02250218946040453, "count": 23 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0015458971553016453, "min": 0.0005459430918563158, "max": 0.007379330756763617, "count": 23 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0015458971553016453, "min": 0.0005459430918563158, "max": 0.007379330756763617, "count": 23 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.0015502585699626555, "min": 0.0005527794239848542, "max": 0.007383338843161861, "count": 23 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.0015502585699626555, "min": 0.0005527794239848542, "max": 0.007383338843161861, "count": 23 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 23 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 23 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 23 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 23 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 23 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 23 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1685366789", "python_version": "3.9.16 (main, Mar 8 2023, 10:39:24) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "D:\\Anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.0.1+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1685368269" }, "total": 1480.7160249, "count": 1, "self": 0.33426360000021305, "children": { "run_training.setup": { "total": 0.20417030000000036, "count": 1, "self": 0.20417030000000036 }, "TrainerController.start_learning": { "total": 1480.177591, "count": 1, "self": 0.7939259999991464, "children": { "TrainerController._reset_env": { "total": 4.3953076000000495, "count": 3, "self": 4.3953076000000495 }, "TrainerController.advance": { "total": 1474.8158319000008, "count": 32685, "self": 0.8752609000316625, "children": { "env_step": { "total": 663.036887699983, "count": 32685, "self": 526.0957164999761, "children": { "SubprocessEnvManager._take_step": { "total": 136.38368390000687, "count": 32685, "self": 4.607681099998558, "children": { "TorchPolicy.evaluate": { "total": 131.7760028000083, "count": 64830, "self": 131.7760028000083 } } }, "workers": { "total": 0.5574873000000959, "count": 32685, "self": 0.0, "children": { "worker_root": { "total": 1474.1840769999976, "count": 32685, "is_parallel": true, "self": 1051.4892170999976, "children": { "steps_from_proto": { "total": 0.00575940000008579, "count": 6, "is_parallel": true, "self": 0.0011389000000932015, "children": { "_process_rank_one_or_two_observation": { "total": 0.004620499999992589, "count": 24, "is_parallel": true, "self": 0.004620499999992589 } } }, "UnityEnvironment.step": { "total": 422.6891004999999, "count": 32685, "is_parallel": true, "self": 19.718969899969352, "children": { "UnityEnvironment._generate_step_input": { "total": 18.785363399996477, "count": 32685, "is_parallel": true, "self": 18.785363399996477 }, "communicator.exchange": { "total": 318.18374580001216, "count": 32685, "is_parallel": true, "self": 318.18374580001216 }, "steps_from_proto": { "total": 66.00102140002193, "count": 65370, "is_parallel": true, "self": 13.732501200086695, "children": { "_process_rank_one_or_two_observation": { "total": 52.26852019993523, "count": 261480, "is_parallel": true, "self": 52.26852019993523 } } } } } } } } } } }, "trainer_advance": { "total": 810.903683299986, "count": 32685, "self": 5.57341389999317, "children": { "process_trajectory": { "total": 111.55046469999257, "count": 32685, "self": 111.3059452999924, "children": { "RLTrainer._checkpoint": { "total": 0.24451940000017203, "count": 1, "self": 0.24451940000017203 } } }, "_update_policy": { "total": 693.7798047000002, "count": 23, "self": 84.76063819999786, "children": { "TorchPOCAOptimizer.update": { "total": 609.0191665000024, "count": 690, "self": 609.0191665000024 } } } } } } }, "TrainerController._save_models": { "total": 0.17252549999989242, "count": 1, "self": 3.2899999951041536e-05, "children": { "RLTrainer._checkpoint": { "total": 0.17249259999994138, "count": 1, "self": 0.17249259999994138 } } } } } } }