RamonAnkersmit's picture
First Push`
116a62b
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.8532294034957886,
"min": 1.7865139245986938,
"max": 3.295811414718628,
"count": 5000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 38487.8671875,
"min": 5800.6279296875,
"max": 115795.28125,
"count": 5000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 65.48,
"min": 43.442477876106196,
"max": 999.0,
"count": 5000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19644.0,
"min": 13432.0,
"max": 30056.0,
"count": 5000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1671.0176464921628,
"min": 1195.1253736477672,
"max": 1764.9751663169422,
"count": 4968
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 250652.64697382442,
"min": 2391.7283571473363,
"max": 381643.94451162743,
"count": 4968
},
"SoccerTwos.Step.mean": {
"value": 49999920.0,
"min": 9046.0,
"max": 49999920.0,
"count": 5000
},
"SoccerTwos.Step.sum": {
"value": 49999920.0,
"min": 9046.0,
"max": 49999920.0,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.03229290619492531,
"min": -0.14206135272979736,
"max": 0.1776188760995865,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -4.8762288093566895,
"min": -20.314773559570312,
"max": 28.028095245361328,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.03124230168759823,
"min": -0.14606380462646484,
"max": 0.1839049607515335,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -4.717587471008301,
"min": -20.88712501525879,
"max": 27.312236785888672,
"count": 5000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 5000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.005737748367107467,
"min": -0.5714285714285714,
"max": 0.6006048743317767,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 0.8664000034332275,
"min": -66.25439995527267,
"max": 66.174800157547,
"count": 5000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.005737748367107467,
"min": -0.5714285714285714,
"max": 0.6006048743317767,
"count": 5000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 0.8664000034332275,
"min": -66.25439995527267,
"max": 66.174800157547,
"count": 5000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 5000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 5000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.011845738316090622,
"min": 0.007440140519611305,
"max": 0.018005548412475036,
"count": 1216
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.011845738316090622,
"min": 0.007440140519611305,
"max": 0.018005548412475036,
"count": 1216
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.09918897251288096,
"min": 2.523424664104823e-05,
"max": 0.1382937341928482,
"count": 1216
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.09918897251288096,
"min": 2.523424664104823e-05,
"max": 0.1382937341928482,
"count": 1216
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.10142053862412771,
"min": 2.476414823225544e-05,
"max": 0.1417475367585818,
"count": 1216
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.10142053862412771,
"min": 2.476414823225544e-05,
"max": 0.1417475367585818,
"count": 1216
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 1216
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 1216
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.15000000000000002,
"min": 0.15000000000000002,
"max": 0.15000000000000002,
"count": 1216
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.15000000000000002,
"min": 0.15000000000000002,
"max": 0.15000000000000002,
"count": 1216
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 1216
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 1216
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1675622187",
"python_version": "3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]",
"command_line_arguments": "C:\\Users\\ramon\\anaconda3\\envs\\rl_football_gpu\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwosGpu50M --no-graphics --torch-device=cuda",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.13.1+cu117",
"numpy_version": "1.21.2",
"end_time_seconds": "1676229765"
},
"total": 607578.1839447999,
"count": 1,
"self": 6.685378099908121,
"children": {
"run_training.setup": {
"total": 0.6113058999999978,
"count": 1,
"self": 0.6113058999999978
},
"TrainerController.start_learning": {
"total": 607570.8872608,
"count": 1,
"self": 423.8880830210401,
"children": {
"TrainerController._reset_env": {
"total": 70.75092309986466,
"count": 143,
"self": 70.75092309986466
},
"TrainerController.advance": {
"total": 607074.5658364792,
"count": 3426394,
"self": 455.34990817680955,
"children": {
"env_step": {
"total": 443559.17826784507,
"count": 3426394,
"self": 282288.46389600344,
"children": {
"SubprocessEnvManager._take_step": {
"total": 160998.49036457582,
"count": 3426394,
"self": 2865.982247389882,
"children": {
"TorchPolicy.evaluate": {
"total": 158132.50811718593,
"count": 6269072,
"self": 158132.50811718593
}
}
},
"workers": {
"total": 272.2240072658506,
"count": 3426394,
"self": 0.0,
"children": {
"worker_root": {
"total": 606947.3217270984,
"count": 3426394,
"is_parallel": true,
"self": 381055.083762191,
"children": {
"steps_from_proto": {
"total": 1.6658315005754716,
"count": 286,
"is_parallel": true,
"self": 0.34884800013656303,
"children": {
"_process_rank_one_or_two_observation": {
"total": 1.3169835004389085,
"count": 1144,
"is_parallel": true,
"self": 1.3169835004389085
}
}
},
"UnityEnvironment.step": {
"total": 225890.57213340682,
"count": 3426394,
"is_parallel": true,
"self": 11142.004610689182,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 9678.473195383567,
"count": 3426394,
"is_parallel": true,
"self": 9678.473195383567
},
"communicator.exchange": {
"total": 165653.08175009323,
"count": 3426394,
"is_parallel": true,
"self": 165653.08175009323
},
"steps_from_proto": {
"total": 39417.01257724084,
"count": 6852788,
"is_parallel": true,
"self": 8358.596127365636,
"children": {
"_process_rank_one_or_two_observation": {
"total": 31058.416449875207,
"count": 27411152,
"is_parallel": true,
"self": 31058.416449875207
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 163060.03766045737,
"count": 3426394,
"self": 2095.0413615892176,
"children": {
"process_trajectory": {
"total": 78211.21985016807,
"count": 3426394,
"self": 78086.00886026803,
"children": {
"RLTrainer._checkpoint": {
"total": 125.21098990003793,
"count": 100,
"self": 125.21098990003793
}
}
},
"_update_policy": {
"total": 82753.7764487001,
"count": 1216,
"self": 57088.19903259386,
"children": {
"TorchPOCAOptimizer.update": {
"total": 25665.577416106236,
"count": 36480,
"self": 25665.577416106236
}
}
}
}
}
}
},
"trainer_threads": {
"total": 7.00005330145359e-06,
"count": 1,
"self": 7.00005330145359e-06
},
"TrainerController._save_models": {
"total": 1.682411199901253,
"count": 1,
"self": 0.3388715998735279,
"children": {
"RLTrainer._checkpoint": {
"total": 1.343539600027725,
"count": 1,
"self": 1.343539600027725
}
}
}
}
}
}
}