Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use yesbut/ppo-pyramids-training with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use yesbut/ppo-pyramids-training with ml-agents:
mlagents-load-from-hf --repo-id="yesbut/ppo-pyramids-training" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.41336187720298767, | |
| "min": 0.41336187720298767, | |
| "max": 1.4649927616119385, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 12381.0146484375, | |
| "min": 12381.0146484375, | |
| "max": 44442.01953125, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 989987.0, | |
| "min": 29909.0, | |
| "max": 989987.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 989987.0, | |
| "min": 29909.0, | |
| "max": 989987.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.4611879289150238, | |
| "min": -0.08653777837753296, | |
| "max": 0.5208510756492615, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 127.2878646850586, | |
| "min": -21.115217208862305, | |
| "max": 144.27574157714844, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.0827915370464325, | |
| "min": -0.020135240629315376, | |
| "max": 0.2369966059923172, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 22.8504638671875, | |
| "min": -5.456650257110596, | |
| "max": 57.116180419921875, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06954284608310302, | |
| "min": 0.06541757567417032, | |
| "max": 0.07559310506136778, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9735998451634422, | |
| "min": 0.5782482939776992, | |
| "max": 1.058303470859149, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.01547441167993496, | |
| "min": 0.0006549689626029119, | |
| "max": 0.016060077420358237, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.21664176351908943, | |
| "min": 0.009169565476440766, | |
| "max": 0.2248410838850153, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 7.481947506049992e-06, | |
| "min": 7.481947506049992e-06, | |
| "max": 0.0002947665392444875, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.0001047472650846999, | |
| "min": 0.0001047472650846999, | |
| "max": 0.0036331999889333997, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10249395000000001, | |
| "min": 0.10249395000000001, | |
| "max": 0.1982555125, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4349153000000001, | |
| "min": 1.4349153000000001, | |
| "max": 2.611066600000001, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.00025914560499999976, | |
| "min": 0.00025914560499999976, | |
| "max": 0.00982572569875, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.003628038469999997, | |
| "min": 0.003628038469999997, | |
| "max": 0.12112555333999998, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.014167736284434795, | |
| "min": 0.014167736284434795, | |
| "max": 0.3874890208244324, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.19834831357002258, | |
| "min": 0.19834831357002258, | |
| "max": 3.099912166595459, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 409.88461538461536, | |
| "min": 366.975, | |
| "max": 990.2666666666667, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 31971.0, | |
| "min": 17475.0, | |
| "max": 33463.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.458477897303445, | |
| "min": -0.9246250505093485, | |
| "max": 1.5830149767920374, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 112.30279809236526, | |
| "min": -29.588001616299152, | |
| "max": 129.21399794518948, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.458477897303445, | |
| "min": -0.9246250505093485, | |
| "max": 1.5830149767920374, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 112.30279809236526, | |
| "min": -29.588001616299152, | |
| "max": 129.21399794518948, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.060503237912894, | |
| "min": 0.0541872761779814, | |
| "max": 7.393877701212962, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 4.658749319292838, | |
| "min": 4.309561093163211, | |
| "max": 133.08979862183332, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1737039256", | |
| "python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.5.1+cu124", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1737041473" | |
| }, | |
| "total": 2217.250903149, | |
| "count": 1, | |
| "self": 0.6431621699998686, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.02193374899997025, | |
| "count": 1, | |
| "self": 0.02193374899997025 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 2216.5858072300002, | |
| "count": 1, | |
| "self": 1.4333999779391888, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 3.036220076999996, | |
| "count": 1, | |
| "self": 3.036220076999996 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 2212.027598170061, | |
| "count": 63806, | |
| "self": 1.5035490330851644, | |
| "children": { | |
| "env_step": { | |
| "total": 1513.254193532992, | |
| "count": 63806, | |
| "self": 1356.5088433809983, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 155.8851694540142, | |
| "count": 63806, | |
| "self": 4.693354754020561, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 151.19181469999364, | |
| "count": 62564, | |
| "self": 151.19181469999364 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.8601806979794446, | |
| "count": 63806, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 2211.2968053100417, | |
| "count": 63806, | |
| "is_parallel": true, | |
| "self": 968.987262491059, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.005746649000002435, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.004351683000095363, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0013949659999070718, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0013949659999070718 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.047842519000028005, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005632550000882475, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0004649149999522706, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0004649149999522706 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.045036253000034776, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.045036253000034776 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0017780959999527113, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005030499997928928, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0012750460001598185, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0012750460001598185 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 1242.3095428189827, | |
| "count": 63805, | |
| "is_parallel": true, | |
| "self": 32.21548038704668, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 22.983295438992513, | |
| "count": 63805, | |
| "is_parallel": true, | |
| "self": 22.983295438992513 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1090.358355256982, | |
| "count": 63805, | |
| "is_parallel": true, | |
| "self": 1090.358355256982 | |
| }, | |
| "steps_from_proto": { | |
| "total": 96.75241173596157, | |
| "count": 63805, | |
| "is_parallel": true, | |
| "self": 19.48196411892627, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 77.2704476170353, | |
| "count": 510440, | |
| "is_parallel": true, | |
| "self": 77.2704476170353 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 697.269855603984, | |
| "count": 63806, | |
| "self": 2.728400674074919, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 129.63196941690978, | |
| "count": 63806, | |
| "self": 129.37337858891, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.2585908279997966, | |
| "count": 2, | |
| "self": 0.2585908279997966 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 564.9094855129993, | |
| "count": 459, | |
| "self": 314.3957293169875, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 250.51375619601174, | |
| "count": 22791, | |
| "self": 250.51375619601174 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 8.219999472203199e-07, | |
| "count": 1, | |
| "self": 8.219999472203199e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.08858818299995619, | |
| "count": 1, | |
| "self": 0.0015602419998685946, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.0870279410000876, | |
| "count": 1, | |
| "self": 0.0870279410000876 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |