{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.246558904647827, "min": 2.180464267730713, "max": 3.2956936359405518, "count": 467 }, "SoccerTwos.Policy.Entropy.sum": { "value": 17541.1328125, "min": 12763.185546875, "max": 130325.0546875, "count": 467 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 999.0, "min": 431.7, "max": 999.0, "count": 467 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19980.0, "min": 16444.0, "max": 23888.0, "count": 467 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1164.4766612136252, "min": 1164.4539061327255, "max": 1212.146926227059, "count": 235 }, "SoccerTwos.Self-play.ELO.sum": { "value": 2328.9533224272504, "min": 2328.907812265451, "max": 19380.76733320289, "count": 235 }, "SoccerTwos.Step.mean": { "value": 4669986.0, "min": 9556.0, "max": 4669986.0, "count": 467 }, "SoccerTwos.Step.sum": { "value": 4669986.0, "min": 9556.0, "max": 4669986.0, "count": 467 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 4.44004763267003e-05, "min": -0.04444378241896629, "max": 0.017183450981974602, "count": 467 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 0.00044400474871508777, "min": -0.6234198808670044, "max": 0.22842879593372345, "count": 467 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 5.451093238661997e-05, "min": -0.04154488816857338, "max": 0.020218756049871445, "count": 467 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 0.0005451093311421573, "min": -0.6231732964515686, "max": 0.2628438174724579, "count": 467 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 467 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 467 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.0, "min": -0.5714285714285714, "max": 0.27735384610983044, "count": 467 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 0.0, "min": -8.108399987220764, "max": 3.6055999994277954, "count": 467 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.0, "min": -0.5714285714285714, "max": 0.27735384610983044, "count": 467 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 0.0, "min": -8.108399987220764, "max": 3.6055999994277954, "count": 467 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 467 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 467 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01915971445851028, "min": 0.010397530502814334, "max": 0.022740037518087775, "count": 215 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01915971445851028, "min": 0.010397530502814334, "max": 0.022740037518087775, "count": 215 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 1.1836813603357163e-08, "min": 8.404479305094507e-09, "max": 0.007623026318227252, "count": 215 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 1.1836813603357163e-08, "min": 8.404479305094507e-09, "max": 0.007623026318227252, "count": 215 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 1.5459194605919416e-08, "min": 1.1199814646071598e-08, "max": 0.007787136780098081, "count": 215 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 1.5459194605919416e-08, "min": 1.1199814646071598e-08, "max": 0.007787136780098081, "count": 215 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 215 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 215 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 215 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 215 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 215 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 215 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1676745156", "python_version": "3.9.16 (main, Jan 11 2023, 10:02:19) \n[Clang 14.0.6 ]", "command_line_arguments": "/Users/benjamyu/miniconda3/envs/rl-hf/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.11.0", "numpy_version": "1.21.2", "end_time_seconds": "1676765523" }, "total": 20366.928450792, "count": 1, "self": 1.21946124999522, "children": { "run_training.setup": { "total": 0.0632994999999994, "count": 1, "self": 0.0632994999999994 }, "TrainerController.start_learning": { "total": 20365.645690042, "count": 1, "self": 4.228794656544778, "children": { "TrainerController._reset_env": { "total": 7.885637377999636, "count": 24, "self": 7.885637377999636 }, "TrainerController.advance": { "total": 20353.318331049457, "count": 304896, "self": 4.072074226824043, "children": { "env_step": { "total": 5172.137980149988, "count": 304896, "self": 4321.1738946759, "children": { "SubprocessEnvManager._take_step": { "total": 847.6111284407115, "count": 304896, "self": 22.195572720167434, "children": { "TorchPolicy.evaluate": { "total": 825.415555720544, "count": 605274, "self": 825.415555720544 } } }, "workers": { "total": 3.3529570333767182, "count": 304896, "self": 0.0, "children": { "worker_root": { "total": 20322.668936004953, "count": 304896, "is_parallel": true, "self": 16547.030998394643, "children": { "steps_from_proto": { "total": 0.0380018359954466, "count": 48, "is_parallel": true, "self": 0.006850168996551886, "children": { "_process_rank_one_or_two_observation": { "total": 0.03115166699889471, "count": 192, "is_parallel": true, "self": 0.03115166699889471 } } }, "UnityEnvironment.step": { "total": 3775.5999357743153, "count": 304896, "is_parallel": true, "self": 131.12347890814226, "children": { "UnityEnvironment._generate_step_input": { "total": 88.00337538107925, "count": 304896, "is_parallel": true, "self": 88.00337538107925 }, "communicator.exchange": { "total": 3131.1549675839174, "count": 304896, "is_parallel": true, "self": 3131.1549675839174 }, "steps_from_proto": { "total": 425.3181139011764, "count": 609792, "is_parallel": true, "self": 69.90772824317503, "children": { "_process_rank_one_or_two_observation": { "total": 355.41038565800136, "count": 2439168, "is_parallel": true, "self": 355.41038565800136 } } } } } } } } } } }, "trainer_advance": { "total": 15177.108276672645, "count": 304896, "self": 45.48369586689296, "children": { "process_trajectory": { "total": 1733.126378299721, "count": 304896, "self": 1731.6378164237237, "children": { "RLTrainer._checkpoint": { "total": 1.4885618759972203, "count": 9, "self": 1.4885618759972203 } } }, "_update_policy": { "total": 13398.49820250603, "count": 216, "self": 584.6017068210185, "children": { "TorchPOCAOptimizer.update": { "total": 12813.896495685012, "count": 6462, "self": 12813.896495685012 } } } } } } }, "trainer_threads": { "total": 1.2079981388524175e-06, "count": 1, "self": 1.2079981388524175e-06 }, "TrainerController._save_models": { "total": 0.21292575000188663, "count": 1, "self": 0.0031720410006528255, "children": { "RLTrainer._checkpoint": { "total": 0.2097537090012338, "count": 1, "self": 0.2097537090012338 } } } } } } }