{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.7867881059646606, "min": 1.7753045558929443, "max": 3.2957375049591064, "count": 505 }, "SoccerTwos.Policy.Entropy.sum": { "value": 32476.66015625, "min": 32474.951171875, "max": 119972.7890625, "count": 505 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 54.30769230769231, "min": 42.531531531531535, "max": 999.0, "count": 505 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19768.0, "min": 13724.0, "max": 26524.0, "count": 505 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1574.0307408686213, "min": 1201.9865418121897, "max": 1589.7626448224648, "count": 503 }, "SoccerTwos.Self-play.ELO.sum": { "value": 286473.5948380891, "min": 2407.7507100120824, "max": 350126.4509267537, "count": 503 }, "SoccerTwos.Step.mean": { "value": 5049956.0, "min": 9280.0, "max": 5049956.0, "count": 505 }, "SoccerTwos.Step.sum": { "value": 5049956.0, "min": 9280.0, "max": 5049956.0, "count": 505 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.009472889825701714, "min": -0.10301780700683594, "max": 0.18000946938991547, "count": 505 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -1.7240660190582275, "min": -16.468795776367188, "max": 25.510509490966797, "count": 505 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.010175843723118305, "min": -0.10737248510122299, "max": 0.18018360435962677, "count": 505 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -1.852003574371338, "min": -16.32061767578125, "max": 23.243684768676758, "count": 505 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 505 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 505 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.08910549472976517, "min": -0.5384615384615384, "max": 0.5356640040874481, "count": 505 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 16.21720004081726, "min": -50.72160005569458, "max": 54.37079989910126, "count": 505 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.08910549472976517, "min": -0.5384615384615384, "max": 0.5356640040874481, "count": 505 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 16.21720004081726, "min": -50.72160005569458, "max": 54.37079989910126, "count": 505 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 505 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 505 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01447999767406145, "min": 0.011813740432747485, "max": 0.02383224367319296, "count": 244 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01447999767406145, "min": 0.011813740432747485, "max": 0.02383224367319296, "count": 244 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.11063176492849985, "min": 0.0016214567605250826, "max": 0.12025941461324692, "count": 244 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.11063176492849985, "min": 0.0016214567605250826, "max": 0.12025941461324692, "count": 244 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.11189820567766826, "min": 0.0016232936623661468, "max": 0.12271515553196272, "count": 244 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.11189820567766826, "min": 0.0016232936623661468, "max": 0.12271515553196272, "count": 244 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 244 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 244 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 244 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 244 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 244 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 244 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1680176989", "python_version": "3.9.16 (main, Mar 8 2023, 10:39:24) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\WooSeong\\anaconda3\\envs\\hfcourse\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos4 --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.0.0+cu118", "numpy_version": "1.21.2", "end_time_seconds": "1680199247" }, "total": 22258.463152300003, "count": 1, "self": 0.13924649999898975, "children": { "run_training.setup": { "total": 0.15325260000000007, "count": 1, "self": 0.15325260000000007 }, "TrainerController.start_learning": { "total": 22258.170653200003, "count": 1, "self": 7.005177698778425, "children": { "TrainerController._reset_env": { "total": 6.2183117999955115, "count": 26, "self": 6.2183117999955115 }, "TrainerController.advance": { "total": 22244.755225601228, "count": 346388, "self": 7.42111850242145, "children": { "env_step": { "total": 6415.790634899113, "count": 346388, "self": 3769.558152198421, "children": { "SubprocessEnvManager._take_step": { "total": 2641.8515483004594, "count": 346388, "self": 49.8298176989274, "children": { "TorchPolicy.evaluate": { "total": 2592.021730601532, "count": 635990, "self": 2592.021730601532 } } }, "workers": { "total": 4.380934400232464, "count": 346387, "self": 0.0, "children": { "worker_root": { "total": 22243.851631800106, "count": 346387, "is_parallel": true, "self": 19256.399175100203, "children": { "steps_from_proto": { "total": 0.037188900002417036, "count": 52, "is_parallel": true, "self": 0.007830400017094341, "children": { "_process_rank_one_or_two_observation": { "total": 0.029358499985322695, "count": 208, "is_parallel": true, "self": 0.029358499985322695 } } }, "UnityEnvironment.step": { "total": 2987.4152677999004, "count": 346387, "is_parallel": true, "self": 146.199240800167, "children": { "UnityEnvironment._generate_step_input": { "total": 114.76311209962118, "count": 346387, "is_parallel": true, "self": 114.76311209962118 }, "communicator.exchange": { "total": 2258.4872728993228, "count": 346387, "is_parallel": true, "self": 2258.4872728993228 }, "steps_from_proto": { "total": 467.9656420007896, "count": 692774, "is_parallel": true, "self": 100.17413229970703, "children": { "_process_rank_one_or_two_observation": { "total": 367.7915097010826, "count": 2771096, "is_parallel": true, "self": 367.7915097010826 } } } } } } } } } } }, "trainer_advance": { "total": 15821.543472199693, "count": 346387, "self": 50.5853824984315, "children": { "process_trajectory": { "total": 14442.577576801266, "count": 346387, "self": 14440.08503390127, "children": { "RLTrainer._checkpoint": { "total": 2.4925428999960104, "count": 10, "self": 2.4925428999960104 } } }, "_update_policy": { "total": 1328.3805128999961, "count": 244, "self": 688.7566654999027, "children": { "TorchPOCAOptimizer.update": { "total": 639.6238474000934, "count": 7329, "self": 639.6238474000934 } } } } } } }, "trainer_threads": { "total": 1.8000027921516448e-06, "count": 1, "self": 1.8000027921516448e-06 }, "TrainerController._save_models": { "total": 0.19193629999790573, "count": 1, "self": 0.0033741999977792148, "children": { "RLTrainer._checkpoint": { "total": 0.18856210000012652, "count": 1, "self": 0.18856210000012652 } } } } } } }