Alex48's picture
2.5M model
3a04e91
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 3.0820088386535645,
"min": 3.080142021179199,
"max": 3.1789779663085938,
"count": 50
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 55328.22265625,
"min": 30703.568359375,
"max": 168894.65625,
"count": 50
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 152.6875,
"min": 77.43076923076923,
"max": 602.4444444444445,
"count": 50
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19544.0,
"min": 17172.0,
"max": 22292.0,
"count": 50
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1271.6599122089142,
"min": 1234.019769996244,
"max": 1273.610556766995,
"count": 50
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 81386.23438137051,
"min": 17347.169238825554,
"max": 157919.77380727566,
"count": 50
},
"SoccerTwos.Step.mean": {
"value": 2499524.0,
"min": 2009958.0,
"max": 2499524.0,
"count": 50
},
"SoccerTwos.Step.sum": {
"value": 2499524.0,
"min": 2009958.0,
"max": 2499524.0,
"count": 50
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.018472956493496895,
"min": -0.0750662088394165,
"max": 0.10949645191431046,
"count": 50
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -1.2007421255111694,
"min": -3.9785091876983643,
"max": 8.321730613708496,
"count": 50
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.018297752365469933,
"min": -0.0762500911951065,
"max": 0.11113806813955307,
"count": 50
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -1.1893539428710938,
"min": -4.041254997253418,
"max": 8.446493148803711,
"count": 50
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 50
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 50
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.007852308566753681,
"min": -0.6564380955838022,
"max": 0.3738789487826197,
"count": 50
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -0.5104000568389893,
"min": -23.584999933838844,
"max": 28.414800107479095,
"count": 50
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.007852308566753681,
"min": -0.6564380955838022,
"max": 0.3738789487826197,
"count": 50
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -0.5104000568389893,
"min": -23.584999933838844,
"max": 28.414800107479095,
"count": 50
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.016747980133732198,
"min": 0.01471505636072834,
"max": 0.020302366859687025,
"count": 12
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.016747980133732198,
"min": 0.01471505636072834,
"max": 0.020302366859687025,
"count": 12
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.04490260016173124,
"min": 0.02003281582146883,
"max": 0.04490260016173124,
"count": 12
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.04490260016173124,
"min": 0.02003281582146883,
"max": 0.04490260016173124,
"count": 12
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.045197689719498155,
"min": 0.020334542123600842,
"max": 0.045197689719498155,
"count": 12
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.045197689719498155,
"min": 0.020334542123600842,
"max": 0.045197689719498155,
"count": 12
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 2.3929997607999495e-07,
"min": 2.3929997607999495e-07,
"max": 0.00018227368177264005,
"count": 12
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 2.3929997607999495e-07,
"min": 2.3929997607999495e-07,
"max": 0.00018227368177264005,
"count": 12
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.10002392,
"min": 0.10002392,
"max": 0.11822736,
"count": 12
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.10002392,
"min": 0.10002392,
"max": 0.11822736,
"count": 12
},
"SoccerTwos.Policy.Beta.mean": {
"value": 1.1193607999999976e-05,
"min": 1.1193607999999976e-05,
"max": 0.0009195452640000003,
"count": 12
},
"SoccerTwos.Policy.Beta.sum": {
"value": 1.1193607999999976e-05,
"min": 1.1193607999999976e-05,
"max": 0.0009195452640000003,
"count": 12
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1679443647",
"python_version": "3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]",
"command_line_arguments": "C:\\Users\\Alex\\.conda\\envs\\rl\\Scripts\\mlagents-learn config\\poca\\SoccerTwos-v14.yaml --env=training-envs-executables\\SoccerTwos\\SoccerTwos.exe --run-id SoccerTwos-v14 --resume --num-envs 4",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.13.1+cpu",
"numpy_version": "1.21.2",
"end_time_seconds": "1679444668"
},
"total": 1020.41041083,
"count": 1,
"self": 5.674179650000042,
"children": {
"run_training.setup": {
"total": 1.2012053130000004,
"count": 1,
"self": 1.2012053130000004
},
"TrainerController.start_learning": {
"total": 1013.5350258670001,
"count": 1,
"self": 0.9519695710039286,
"children": {
"TrainerController._reset_env": {
"total": 9.30957995899992,
"count": 6,
"self": 9.30957995899992
},
"TrainerController.advance": {
"total": 1003.1411077249961,
"count": 22333,
"self": 0.9486213009989797,
"children": {
"env_step": {
"total": 457.5472085699962,
"count": 22333,
"self": 163.36864032799485,
"children": {
"SubprocessEnvManager._take_step": {
"total": 293.54175182599204,
"count": 36474,
"self": 8.090062221005155,
"children": {
"TorchPolicy.evaluate": {
"total": 285.4516896049869,
"count": 70660,
"self": 285.4516896049869
}
}
},
"workers": {
"total": 0.6368164160093066,
"count": 22333,
"self": 0.0,
"children": {
"worker_root": {
"total": 4015.303667475977,
"count": 36461,
"is_parallel": true,
"self": 3114.639849649964,
"children": {
"steps_from_proto": {
"total": 0.0529860279998573,
"count": 42,
"is_parallel": true,
"self": 0.01028839500015799,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.04269763299969931,
"count": 168,
"is_parallel": true,
"self": 0.04269763299969931
}
}
},
"UnityEnvironment.step": {
"total": 900.610831798013,
"count": 36461,
"is_parallel": true,
"self": 29.133655422004495,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 25.389325713002457,
"count": 36461,
"is_parallel": true,
"self": 25.389325713002457
},
"communicator.exchange": {
"total": 737.9791143640006,
"count": 36461,
"is_parallel": true,
"self": 737.9791143640006
},
"steps_from_proto": {
"total": 108.10873629900551,
"count": 72922,
"is_parallel": true,
"self": 20.798446526975013,
"children": {
"_process_rank_one_or_two_observation": {
"total": 87.3102897720305,
"count": 291688,
"is_parallel": true,
"self": 87.3102897720305
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 544.6452778540009,
"count": 22333,
"self": 6.472290260995692,
"children": {
"process_trajectory": {
"total": 129.88502738300502,
"count": 22333,
"self": 129.60068238800505,
"children": {
"RLTrainer._checkpoint": {
"total": 0.28434499499996946,
"count": 1,
"self": 0.28434499499996946
}
}
},
"_update_policy": {
"total": 408.2879602100002,
"count": 12,
"self": 69.46690098100015,
"children": {
"TorchPOCAOptimizer.update": {
"total": 338.82105922900007,
"count": 480,
"self": 338.82105922900007
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.049999789567664e-07,
"count": 1,
"self": 9.049999789567664e-07
},
"TrainerController._save_models": {
"total": 0.13236770700007128,
"count": 1,
"self": 0.0021686460000864827,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1301990609999848,
"count": 1,
"self": 0.1301990609999848
}
}
}
}
}
}
}