{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8345582485198975, "min": 1.8345582485198975, "max": 3.29571533203125, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 37513.046875, "min": 23554.9921875, "max": 113724.3046875, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 45.351851851851855, "min": 37.56, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19592.0, "min": 16280.0, "max": 24800.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1598.71792161027, "min": 1196.6261703485754, "max": 1607.8513640708836, "count": 495 }, "SoccerTwos.Self-play.ELO.sum": { "value": 345323.0710678183, "min": 2395.8083475909125, "max": 388103.431183394, "count": 495 }, "SoccerTwos.Step.mean": { "value": 4999966.0, "min": 9110.0, "max": 4999966.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999966.0, "min": 9110.0, "max": 4999966.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.045681633055210114, "min": -0.09758434444665909, "max": 0.18215981125831604, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 9.821551322937012, "min": -19.028947830200195, "max": 28.353229522705078, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.049040328711271286, "min": -0.09659469127655029, "max": 0.18255679309368134, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 10.543670654296875, "min": -18.178857803344727, "max": 28.05614471435547, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.13328744173049928, "min": -0.4922352959127987, "max": 0.6207773584239887, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 28.656799972057343, "min": -63.51879966259003, "max": 65.80239999294281, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.13328744173049928, "min": -0.4922352959127987, "max": 0.6207773584239887, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 28.656799972057343, "min": -63.51879966259003, "max": 65.80239999294281, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.014742925442988053, "min": 0.011436986700088407, "max": 0.022447683431285743, "count": 240 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.014742925442988053, "min": 0.011436986700088407, "max": 0.022447683431285743, "count": 240 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.1022038884460926, "min": 0.0008298497724657257, "max": 0.12449491620063782, "count": 240 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.1022038884460926, "min": 0.0008298497724657257, "max": 0.12449491620063782, "count": 240 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.1036163051923116, "min": 0.0008341283629609582, "max": 0.12725822851061822, "count": 240 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.1036163051923116, "min": 0.0008341283629609582, "max": 0.12725822851061822, "count": 240 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 240 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 240 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 240 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 240 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 240 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 240 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1676168213", "python_version": "3.8.16 (default, Jan 17 2023, 16:42:09) \n[Clang 14.0.6 ]", "command_line_arguments": "/Users/pete.pittawat/opt/anaconda3/envs/hf-drl-course/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.app --run-id=SoccerTwos --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.8.1", "numpy_version": "1.21.2", "end_time_seconds": "1676187112" }, "total": 18899.134539717, "count": 1, "self": 0.45259521499974653, "children": { "run_training.setup": { "total": 0.03957640099999993, "count": 1, "self": 0.03957640099999993 }, "TrainerController.start_learning": { "total": 18898.642368101002, "count": 1, "self": 7.61635909060351, "children": { "TrainerController._reset_env": { "total": 4.582837958998065, "count": 25, "self": 4.582837958998065 }, "TrainerController.advance": { "total": 18886.2798147144, "count": 341641, "self": 7.892173009298858, "children": { "env_step": { "total": 5466.5364729704925, "count": 341641, "self": 4473.875193116512, "children": { "SubprocessEnvManager._take_step": { "total": 988.1228790321961, "count": 341641, "self": 40.42745911132329, "children": { "TorchPolicy.evaluate": { "total": 947.6954199208728, "count": 630384, "self": 947.6954199208728 } } }, "workers": { "total": 4.5384008217849425, "count": 341641, "self": 0.0, "children": { "worker_root": { "total": 18881.021428684817, "count": 341641, "is_parallel": true, "self": 15246.04621531512, "children": { "steps_from_proto": { "total": 0.051542525997126276, "count": 50, "is_parallel": true, "self": 0.010604309995462025, "children": { "_process_rank_one_or_two_observation": { "total": 0.04093821600166425, "count": 200, "is_parallel": true, "self": 0.04093821600166425 } } }, "UnityEnvironment.step": { "total": 3634.9236708437, "count": 341641, "is_parallel": true, "self": 193.6370189763602, "children": { "UnityEnvironment._generate_step_input": { "total": 125.1309502954296, "count": 341641, "is_parallel": true, "self": 125.1309502954296 }, "communicator.exchange": { "total": 2705.5918678845146, "count": 341641, "is_parallel": true, "self": 2705.5918678845146 }, "steps_from_proto": { "total": 610.5638336873956, "count": 683282, "is_parallel": true, "self": 126.25119566731627, "children": { "_process_rank_one_or_two_observation": { "total": 484.3126380200793, "count": 2733128, "is_parallel": true, "self": 484.3126380200793 } } } } } } } } } } }, "trainer_advance": { "total": 13411.851168734609, "count": 341641, "self": 46.26743475312469, "children": { "process_trajectory": { "total": 1768.981924547457, "count": 341641, "self": 1767.2585868154608, "children": { "RLTrainer._checkpoint": { "total": 1.7233377319962528, "count": 10, "self": 1.7233377319962528 } } }, "_update_policy": { "total": 11596.601809434027, "count": 240, "self": 754.1266790701047, "children": { "TorchPOCAOptimizer.update": { "total": 10842.475130363922, "count": 7200, "self": 10842.475130363922 } } } } } } }, "trainer_threads": { "total": 1.353000698145479e-06, "count": 1, "self": 1.353000698145479e-06 }, "TrainerController._save_models": { "total": 0.16335498400076176, "count": 1, "self": 0.0022506280001834966, "children": { "RLTrainer._checkpoint": { "total": 0.16110435600057826, "count": 1, "self": 0.16110435600057826 } } } } } } }