Forkits's picture
Trained Pyramids agent upload
8219146
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.20098835229873657,
"min": 0.1905842274427414,
"max": 0.3273382782936096,
"count": 41
},
"Pyramids.Policy.Entropy.sum": {
"value": 6016.787109375,
"min": 4658.0146484375,
"max": 9935.37109375,
"count": 41
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 242.45,
"min": 212.58778625954199,
"max": 347.2087912087912,
"count": 41
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29094.0,
"min": 9657.0,
"max": 32101.0,
"count": 41
},
"Pyramids.Step.mean": {
"value": 2999998.0,
"min": 1799931.0,
"max": 2999998.0,
"count": 41
},
"Pyramids.Step.sum": {
"value": 2999998.0,
"min": 1799931.0,
"max": 2999998.0,
"count": 41
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.7147245407104492,
"min": 0.5930624604225159,
"max": 0.8102691173553467,
"count": 41
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 207.98483276367188,
"min": 74.12632751464844,
"max": 240.64993286132812,
"count": 41
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.014350265264511108,
"min": -0.008211393840610981,
"max": 0.029343511909246445,
"count": 41
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 4.17592716217041,
"min": -2.3977270126342773,
"max": 8.421587944030762,
"count": 41
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.7242083196838698,
"min": 1.4989076720161751,
"max": 1.7874121977627733,
"count": 41
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 206.90499836206436,
"min": 62.34299957752228,
"max": 234.1509979069233,
"count": 41
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.7242083196838698,
"min": 1.4989076720161751,
"max": 1.7874121977627733,
"count": 41
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 206.90499836206436,
"min": 62.34299957752228,
"max": 234.1509979069233,
"count": 41
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.020705985120730474,
"min": 0.018862549592183194,
"max": 0.031432837420119666,
"count": 41
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.484718214487657,
"min": 0.7802353762090206,
"max": 3.111850904591847,
"count": 41
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.0707786365126681,
"min": 0.06456055554112286,
"max": 0.07226113428580848,
"count": 41
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9909009111773533,
"min": 0.4180760901654139,
"max": 1.0358603657017846,
"count": 41
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01765750899184717,
"min": 0.011768245224500007,
"max": 0.01780292463183327,
"count": 41
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.24720512588586036,
"min": 0.08460816983521606,
"max": 0.25633822100159404,
"count": 41
},
"Pyramids.Policy.LearningRate.mean": {
"value": 1.549992340511902e-06,
"min": 1.549992340511902e-06,
"max": 0.00012063059312315555,
"count": 41
},
"Pyramids.Policy.LearningRate.sum": {
"value": 2.1699892767166628e-05,
"min": 2.1699892767166628e-05,
"max": 0.0016875695374771332,
"count": 41
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10051663095238096,
"min": 0.10051663095238096,
"max": 0.14021017777777778,
"count": 41
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4072328333333335,
"min": 0.8412610666666667,
"max": 2.062522866666667,
"count": 41
},
"Pyramids.Policy.Beta.mean": {
"value": 6.161143214285705e-05,
"min": 6.161143214285705e-05,
"max": 0.004026996759999999,
"count": 41
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0008625600499999988,
"min": 0.0008625600499999988,
"max": 0.05634603438000001,
"count": 41
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.008187584578990936,
"min": 0.007832600735127926,
"max": 0.009348301216959953,
"count": 41
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.11462618410587311,
"min": 0.046995606273412704,
"max": 0.13564544916152954,
"count": 41
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 41
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 41
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1656126923",
"python_version": "3.8.13 (default, Mar 28 2022, 11:38:47) \n[GCC 7.5.0]",
"command_line_arguments": "/home/fork/anaconda3/bin/mlagents-learn config/ppo/PyramidsRND.yaml --env=trained-envs-executables/Linux/Pyramids/Pyramids --run-id=First Training --no-graphics --resume --torch-device cuda:0",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.23.0",
"end_time_seconds": "1656128907"
},
"total": 1983.833760952999,
"count": 1,
"self": 0.3226226089991542,
"children": {
"run_training.setup": {
"total": 0.01962070199988375,
"count": 1,
"self": 0.01962070199988375
},
"TrainerController.start_learning": {
"total": 1983.491517642,
"count": 1,
"self": 1.7160227289095928,
"children": {
"TrainerController._reset_env": {
"total": 2.8484658879988274,
"count": 1,
"self": 2.8484658879988274
},
"TrainerController.advance": {
"total": 1978.8429443240893,
"count": 79452,
"self": 1.793405745362179,
"children": {
"env_step": {
"total": 1253.4297236697494,
"count": 79452,
"self": 1128.186176941099,
"children": {
"SubprocessEnvManager._take_step": {
"total": 124.21656780874218,
"count": 79452,
"self": 4.788859733611389,
"children": {
"TorchPolicy.evaluate": {
"total": 119.4277080751308,
"count": 75910,
"self": 44.28934661207131,
"children": {
"TorchPolicy.sample_actions": {
"total": 75.13836146305948,
"count": 75910,
"self": 75.13836146305948
}
}
}
}
},
"workers": {
"total": 1.026978919908288,
"count": 79452,
"self": 0.0,
"children": {
"worker_root": {
"total": 1980.6225938820353,
"count": 79452,
"is_parallel": true,
"self": 965.2931823529743,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.001265589000468026,
"count": 1,
"is_parallel": true,
"self": 0.00041130900353891775,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0008542799969291082,
"count": 8,
"is_parallel": true,
"self": 0.0008542799969291082
}
}
},
"UnityEnvironment.step": {
"total": 0.031843670998568996,
"count": 1,
"is_parallel": true,
"self": 0.0002871819979191059,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00023742400117043871,
"count": 1,
"is_parallel": true,
"self": 0.00023742400117043871
},
"communicator.exchange": {
"total": 0.03044660399973509,
"count": 1,
"is_parallel": true,
"self": 0.03044660399973509
},
"steps_from_proto": {
"total": 0.0008724609997443622,
"count": 1,
"is_parallel": true,
"self": 0.0002752759992290521,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0005971850005153101,
"count": 8,
"is_parallel": true,
"self": 0.0005971850005153101
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1015.329411529061,
"count": 79451,
"is_parallel": true,
"self": 24.373401482240297,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 19.148204889042972,
"count": 79451,
"is_parallel": true,
"self": 19.148204889042972
},
"communicator.exchange": {
"total": 893.1059470398413,
"count": 79451,
"is_parallel": true,
"self": 893.1059470398413
},
"steps_from_proto": {
"total": 78.70185811793635,
"count": 79451,
"is_parallel": true,
"self": 19.52036945575128,
"children": {
"_process_rank_one_or_two_observation": {
"total": 59.181488662185075,
"count": 635608,
"is_parallel": true,
"self": 59.181488662185075
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 723.6198149089778,
"count": 79452,
"self": 3.109344066671838,
"children": {
"process_trajectory": {
"total": 161.9423343703038,
"count": 79452,
"self": 161.65118073230406,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2911536379997415,
"count": 3,
"self": 0.2911536379997415
}
}
},
"_update_policy": {
"total": 558.5681364720022,
"count": 574,
"self": 177.202636010099,
"children": {
"TorchPPOOptimizer.update": {
"total": 381.36550046190314,
"count": 27627,
"self": 381.36550046190314
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.040005923248827e-07,
"count": 1,
"self": 8.040005923248827e-07
},
"TrainerController._save_models": {
"total": 0.08408389700161933,
"count": 1,
"self": 0.0014261760024965042,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08265772099912283,
"count": 1,
"self": 0.08265772099912283
}
}
}
}
}
}
}