daripaez's picture
Second push after 6.5M training steps
bcf4886
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.621476411819458,
"min": 1.599655270576477,
"max": 1.9477595090866089,
"count": 150
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 34660.6796875,
"min": 30395.7421875,
"max": 41759.96484375,
"count": 150
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 66.56756756756756,
"min": 50.19191919191919,
"max": 95.0,
"count": 150
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19704.0,
"min": 18652.0,
"max": 20716.0,
"count": 150
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1553.832368792811,
"min": 1438.6034688325988,
"max": 1558.6803524496643,
"count": 150
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 229967.19058133604,
"min": 153464.30024476539,
"max": 290506.93140322325,
"count": 150
},
"SoccerTwos.Step.mean": {
"value": 6499974.0,
"min": 5009904.0,
"max": 6499974.0,
"count": 150
},
"SoccerTwos.Step.sum": {
"value": 6499974.0,
"min": 5009904.0,
"max": 6499974.0,
"count": 150
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.02823818288743496,
"min": -0.057631928473711014,
"max": 0.13623426854610443,
"count": 150
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -4.151012897491455,
"min": -8.648056030273438,
"max": 22.206186294555664,
"count": 150
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.03926898166537285,
"min": -0.06547373533248901,
"max": 0.13748744130134583,
"count": 150
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -5.77254056930542,
"min": -9.543401718139648,
"max": 22.41045379638672,
"count": 150
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 150
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 150
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.18199727810969968,
"min": -0.2652120010058085,
"max": 0.2832584614937122,
"count": 150
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -26.753599882125854,
"min": -39.78180015087128,
"max": 40.7763996720314,
"count": 150
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.18199727810969968,
"min": -0.2652120010058085,
"max": 0.2832584614937122,
"count": 150
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -26.753599882125854,
"min": -39.78180015087128,
"max": 40.7763996720314,
"count": 150
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 150
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 150
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.01814786336872203,
"min": 0.015074035020613033,
"max": 0.024353331723250448,
"count": 72
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.01814786336872203,
"min": 0.015074035020613033,
"max": 0.024353331723250448,
"count": 72
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.09703146864970526,
"min": 0.06234600332876047,
"max": 0.0975521872440974,
"count": 72
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.09703146864970526,
"min": 0.06234600332876047,
"max": 0.0975521872440974,
"count": 72
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.09953429227073987,
"min": 0.0642737081895272,
"max": 0.09958388383189837,
"count": 72
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.09953429227073987,
"min": 0.0642737081895272,
"max": 0.09958388383189837,
"count": 72
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0006,
"min": 0.0006,
"max": 0.0006,
"count": 72
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0006,
"min": 0.0006,
"max": 0.0006,
"count": 72
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 72
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 72
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.003000000000000001,
"min": 0.003000000000000001,
"max": 0.003000000000000001,
"count": 72
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.003000000000000001,
"min": 0.003000000000000001,
"max": 0.003000000000000001,
"count": 72
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1677373403",
"python_version": "3.9.16 (main, Jan 11 2023, 16:16:36) [MSC v.1916 64 bit (AMD64)]",
"command_line_arguments": "C:\\Users\\paezd\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos-v1 --no-graphics --resume",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.13.1+cpu",
"numpy_version": "1.21.2",
"end_time_seconds": "1677385506"
},
"total": 12103.1833268,
"count": 1,
"self": 2.0050473999981477,
"children": {
"run_training.setup": {
"total": 0.3158820000000002,
"count": 1,
"self": 0.3158820000000002
},
"TrainerController.start_learning": {
"total": 12100.8623974,
"count": 1,
"self": 4.47683140008121,
"children": {
"TrainerController._reset_env": {
"total": 18.566587699999026,
"count": 9,
"self": 18.566587699999026
},
"TrainerController.advance": {
"total": 12077.57374429992,
"count": 102988,
"self": 4.422820999532632,
"children": {
"env_step": {
"total": 3476.5240094003584,
"count": 102988,
"self": 2640.1449541002703,
"children": {
"SubprocessEnvManager._take_step": {
"total": 833.3578471001533,
"count": 102988,
"self": 28.887965800097618,
"children": {
"TorchPolicy.evaluate": {
"total": 804.4698813000557,
"count": 188310,
"self": 804.4698813000557
}
}
},
"workers": {
"total": 3.0212081999349465,
"count": 102988,
"self": 0.0,
"children": {
"worker_root": {
"total": 12076.058875999917,
"count": 102988,
"is_parallel": true,
"self": 9961.220950799936,
"children": {
"steps_from_proto": {
"total": 0.1401172999994138,
"count": 18,
"is_parallel": true,
"self": 0.00887049999415268,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.13124680000526112,
"count": 72,
"is_parallel": true,
"self": 0.13124680000526112
}
}
},
"UnityEnvironment.step": {
"total": 2114.6978078999814,
"count": 102988,
"is_parallel": true,
"self": 135.72561270021993,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 121.5406726002225,
"count": 102988,
"is_parallel": true,
"self": 121.5406726002225
},
"communicator.exchange": {
"total": 1438.9251074,
"count": 102988,
"is_parallel": true,
"self": 1438.9251074
},
"steps_from_proto": {
"total": 418.5064151995391,
"count": 205976,
"is_parallel": true,
"self": 81.44533939962383,
"children": {
"_process_rank_one_or_two_observation": {
"total": 337.0610757999153,
"count": 823904,
"is_parallel": true,
"self": 337.0610757999153
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 8596.626913900029,
"count": 102988,
"self": 28.26380390000668,
"children": {
"process_trajectory": {
"total": 1681.3388057000157,
"count": 102988,
"self": 1678.296906700015,
"children": {
"RLTrainer._checkpoint": {
"total": 3.0418990000007398,
"count": 3,
"self": 3.0418990000007398
}
}
},
"_update_policy": {
"total": 6887.024304300006,
"count": 72,
"self": 376.37169940000695,
"children": {
"TorchPOCAOptimizer.update": {
"total": 6510.6526048999995,
"count": 2160,
"self": 6510.6526048999995
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.3999997463542968e-06,
"count": 1,
"self": 1.3999997463542968e-06
},
"TrainerController._save_models": {
"total": 0.24523260000023583,
"count": 1,
"self": 0.012290099999518134,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2329425000007177,
"count": 1,
"self": 0.2329425000007177
}
}
}
}
}
}
}