poca-SoccerTwos / run_logs /timers.json
mrbesher's picture
train the model 10M steps
e928db7 verified
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 3.2140536308288574,
"min": 3.1371610164642334,
"max": 3.295743227005005,
"count": 1000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 87216.5625,
"min": 14859.255859375,
"max": 123189.3359375,
"count": 1000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 689.75,
"min": 372.57142857142856,
"max": 999.0,
"count": 1000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 22072.0,
"min": 15984.0,
"max": 25980.0,
"count": 1000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1187.6966732241428,
"min": 1153.841539058977,
"max": 1200.1767817874518,
"count": 840
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 9501.573385793143,
"min": 2307.683078117954,
"max": 28693.76147327935,
"count": 840
},
"SoccerTwos.Step.mean": {
"value": 9999782.0,
"min": 9010.0,
"max": 9999782.0,
"count": 1000
},
"SoccerTwos.Step.sum": {
"value": 9999782.0,
"min": 9010.0,
"max": 9999782.0,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.01656908541917801,
"min": -0.0285890344530344,
"max": 0.018183618783950806,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -0.24853627383708954,
"min": -0.6358370780944824,
"max": 0.3091215193271637,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.014030274003744125,
"min": -0.02842695266008377,
"max": 0.006985607091337442,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -0.21045410633087158,
"min": -0.5299453735351562,
"max": 0.10471726953983307,
"count": 1000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.11269332567850748,
"min": -0.625,
"max": 0.43426666657129925,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 1.6903998851776123,
"min": -11.209600031375885,
"max": 7.816799998283386,
"count": 1000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.11269332567850748,
"min": -0.625,
"max": 0.43426666657129925,
"count": 1000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 1.6903998851776123,
"min": -11.209600031375885,
"max": 7.816799998283386,
"count": 1000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.009222518963118395,
"min": 0.0018380905045584466,
"max": 0.016796006510655086,
"count": 466
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.009222518963118395,
"min": 0.0018380905045584466,
"max": 0.016796006510655086,
"count": 466
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.002672747204390665,
"min": 8.787856226414684e-07,
"max": 0.01493195490911603,
"count": 466
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.002672747204390665,
"min": 8.787856226414684e-07,
"max": 0.01493195490911603,
"count": 466
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.0026594311154137054,
"min": 5.442700323025443e-07,
"max": 0.01496061822399497,
"count": 466
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.0026594311154137054,
"min": 5.442700323025443e-07,
"max": 0.01496061822399497,
"count": 466
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 466
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 466
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.19999999999999998,
"min": 0.19999999999999998,
"max": 0.19999999999999998,
"count": 466
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.19999999999999998,
"min": 0.19999999999999998,
"max": 0.19999999999999998,
"count": 466
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005,
"min": 0.005,
"max": 0.005,
"count": 466
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005,
"min": 0.005,
"max": 0.005,
"count": 466
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1714021025",
"python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
"command_line_arguments": "/opt/conda/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --force",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.3.0+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1714042856"
},
"total": 21830.642082236,
"count": 1,
"self": 0.3244513259996893,
"children": {
"run_training.setup": {
"total": 0.023126756000010573,
"count": 1,
"self": 0.023126756000010573
},
"TrainerController.start_learning": {
"total": 21830.294504154,
"count": 1,
"self": 17.681241414793476,
"children": {
"TrainerController._reset_env": {
"total": 7.963951644991766,
"count": 50,
"self": 7.963951644991766
},
"TrainerController.advance": {
"total": 21804.44504638521,
"count": 650905,
"self": 18.84673021924391,
"children": {
"env_step": {
"total": 16890.455054930666,
"count": 650905,
"self": 12782.116950416483,
"children": {
"SubprocessEnvManager._take_step": {
"total": 4097.01059741472,
"count": 650905,
"self": 126.73519772110421,
"children": {
"TorchPolicy.evaluate": {
"total": 3970.275399693616,
"count": 1291756,
"self": 3970.275399693616
}
}
},
"workers": {
"total": 11.327507099463446,
"count": 650905,
"self": 0.0,
"children": {
"worker_root": {
"total": 21802.707380226264,
"count": 650905,
"is_parallel": true,
"self": 11594.57311845469,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0045256380001319485,
"count": 2,
"is_parallel": true,
"self": 0.0011403580003843672,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0033852799997475813,
"count": 8,
"is_parallel": true,
"self": 0.0033852799997475813
}
}
},
"UnityEnvironment.step": {
"total": 0.0403048900000158,
"count": 1,
"is_parallel": true,
"self": 0.0009176909998132032,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0010829500001818815,
"count": 1,
"is_parallel": true,
"self": 0.0010829500001818815
},
"communicator.exchange": {
"total": 0.03505178000000342,
"count": 1,
"is_parallel": true,
"self": 0.03505178000000342
},
"steps_from_proto": {
"total": 0.0032524690000172995,
"count": 2,
"is_parallel": true,
"self": 0.0006490769999345503,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0026033920000827493,
"count": 8,
"is_parallel": true,
"self": 0.0026033920000827493
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 10207.980765756565,
"count": 650904,
"is_parallel": true,
"self": 507.85822469908635,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 446.99434314346354,
"count": 650904,
"is_parallel": true,
"self": 446.99434314346354
},
"communicator.exchange": {
"total": 7188.0503721799605,
"count": 650904,
"is_parallel": true,
"self": 7188.0503721799605
},
"steps_from_proto": {
"total": 2065.077825734055,
"count": 1301808,
"is_parallel": true,
"self": 359.3358679757,
"children": {
"_process_rank_one_or_two_observation": {
"total": 1705.7419577583548,
"count": 5207232,
"is_parallel": true,
"self": 1705.7419577583548
}
}
}
}
},
"steps_from_proto": {
"total": 0.1534960150088409,
"count": 98,
"is_parallel": true,
"self": 0.02746741501323413,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.12602859999560678,
"count": 392,
"is_parallel": true,
"self": 0.12602859999560678
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 4895.143261235304,
"count": 650905,
"self": 152.33847000950027,
"children": {
"process_trajectory": {
"total": 1271.8683250818606,
"count": 650905,
"self": 1267.4993024908624,
"children": {
"RLTrainer._checkpoint": {
"total": 4.369022590998156,
"count": 20,
"self": 4.369022590998156
}
}
},
"_update_policy": {
"total": 3470.936466143943,
"count": 466,
"self": 2705.017264349991,
"children": {
"TorchPOCAOptimizer.update": {
"total": 765.9192017939522,
"count": 2796,
"self": 765.9192017939522
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.200998667627573e-06,
"count": 1,
"self": 1.200998667627573e-06
},
"TrainerController._save_models": {
"total": 0.20426350800335058,
"count": 1,
"self": 0.001985904003959149,
"children": {
"RLTrainer._checkpoint": {
"total": 0.20227760399939143,
"count": 1,
"self": 0.20227760399939143
}
}
}
}
}
}
}