akanametov's picture
Trained SoccerTwos agent upload
e957cd0
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 3.13865327835083,
"min": 3.1184699535369873,
"max": 3.295724630355835,
"count": 174
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 75327.6796875,
"min": 8172.7509765625,
"max": 121404.875,
"count": 174
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 568.3333333333334,
"min": 415.0833333333333,
"max": 999.0,
"count": 174
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 20460.0,
"min": 12352.0,
"max": 28824.0,
"count": 174
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1208.6874788488428,
"min": 1195.0119942657307,
"max": 1211.7387103589842,
"count": 149
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 12086.874788488429,
"min": 2392.0913402347423,
"max": 21656.394109701538,
"count": 149
},
"SoccerTwos.Step.mean": {
"value": 1739006.0,
"min": 9428.0,
"max": 1739006.0,
"count": 174
},
"SoccerTwos.Step.sum": {
"value": 1739006.0,
"min": 9428.0,
"max": 1739006.0,
"count": 174
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.005756152793765068,
"min": -0.022118547931313515,
"max": 0.07352998852729797,
"count": 174
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -0.10361074656248093,
"min": -0.3876350522041321,
"max": 0.8983725905418396,
"count": 174
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.003773884614929557,
"min": -0.021019132807850838,
"max": 0.07347673922777176,
"count": 174
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -0.06792992353439331,
"min": -0.37691545486450195,
"max": 1.0024032592773438,
"count": 174
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 174
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 174
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.08762221866183811,
"min": -0.6985199999995529,
"max": 0.2861714208764689,
"count": 174
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -1.577199935913086,
"min": -13.97039999999106,
"max": 4.006399892270565,
"count": 174
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.08762221866183811,
"min": -0.6985199999995529,
"max": 0.2861714208764689,
"count": 174
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -1.577199935913086,
"min": -13.97039999999106,
"max": 4.006399892270565,
"count": 174
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 174
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 174
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.01524579579612085,
"min": 0.01284766798041043,
"max": 0.022898333434325954,
"count": 81
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.01524579579612085,
"min": 0.01284766798041043,
"max": 0.022898333434325954,
"count": 81
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.006111118981304268,
"min": 1.475393185046414e-06,
"max": 0.006771525507792831,
"count": 81
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.006111118981304268,
"min": 1.475393185046414e-06,
"max": 0.006771525507792831,
"count": 81
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.0063759771020462114,
"min": 1.693538663024204e-06,
"max": 0.006791041713828842,
"count": 81
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.0063759771020462114,
"min": 1.693538663024204e-06,
"max": 0.006791041713828842,
"count": 81
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 81
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 81
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 81
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 81
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 81
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 81
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1675412754",
"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/linux/SoccerTwos --run-id=SoccerTwos --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.21.6",
"end_time_seconds": "1675416108"
},
"total": 3353.8572786990003,
"count": 1,
"self": 0.4323556340009418,
"children": {
"run_training.setup": {
"total": 0.10472424199997477,
"count": 1,
"self": 0.10472424199997477
},
"TrainerController.start_learning": {
"total": 3353.3201988229994,
"count": 1,
"self": 2.086154936998355,
"children": {
"TrainerController._reset_env": {
"total": 11.20037014900015,
"count": 9,
"self": 11.20037014900015
},
"TrainerController.advance": {
"total": 3340.0320035490013,
"count": 113761,
"self": 2.5202104279446758,
"children": {
"env_step": {
"total": 2674.3521779700045,
"count": 113761,
"self": 2104.1555076680584,
"children": {
"SubprocessEnvManager._take_step": {
"total": 568.8089097259116,
"count": 113761,
"self": 16.39184769083488,
"children": {
"TorchPolicy.evaluate": {
"total": 552.4170620350767,
"count": 225792,
"self": 110.85575536414171,
"children": {
"TorchPolicy.sample_actions": {
"total": 441.561306670935,
"count": 225792,
"self": 441.561306670935
}
}
}
}
},
"workers": {
"total": 1.3877605760344522,
"count": 113760,
"self": 0.0,
"children": {
"worker_root": {
"total": 3347.3966001419344,
"count": 113760,
"is_parallel": true,
"self": 1544.8072518199463,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.005906793999997717,
"count": 2,
"is_parallel": true,
"self": 0.0029945040000711742,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002912289999926543,
"count": 8,
"is_parallel": true,
"self": 0.002912289999926543
}
}
},
"UnityEnvironment.step": {
"total": 0.03844282500006102,
"count": 1,
"is_parallel": true,
"self": 0.0009653170002366096,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0008431359999576671,
"count": 1,
"is_parallel": true,
"self": 0.0008431359999576671
},
"communicator.exchange": {
"total": 0.033245288999978584,
"count": 1,
"is_parallel": true,
"self": 0.033245288999978584
},
"steps_from_proto": {
"total": 0.00338908299988816,
"count": 2,
"is_parallel": true,
"self": 0.0007276889997456237,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0026613940001425362,
"count": 8,
"is_parallel": true,
"self": 0.0026613940001425362
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1802.5710184839886,
"count": 113759,
"is_parallel": true,
"self": 105.93082383185265,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 78.72828792108965,
"count": 113759,
"is_parallel": true,
"self": 78.72828792108965
},
"communicator.exchange": {
"total": 1224.1004175170137,
"count": 113759,
"is_parallel": true,
"self": 1224.1004175170137
},
"steps_from_proto": {
"total": 393.81148921403235,
"count": 227518,
"is_parallel": true,
"self": 71.60639723309589,
"children": {
"_process_rank_one_or_two_observation": {
"total": 322.20509198093646,
"count": 910072,
"is_parallel": true,
"self": 322.20509198093646
}
}
}
}
},
"steps_from_proto": {
"total": 0.018329837999544907,
"count": 16,
"is_parallel": true,
"self": 0.003950473998884263,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.014379364000660644,
"count": 64,
"is_parallel": true,
"self": 0.014379364000660644
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 663.159615151052,
"count": 113760,
"self": 17.41899373713113,
"children": {
"process_trajectory": {
"total": 198.91624469292117,
"count": 113760,
"self": 198.25468435792084,
"children": {
"RLTrainer._checkpoint": {
"total": 0.6615603350003312,
"count": 3,
"self": 0.6615603350003312
}
}
},
"_update_policy": {
"total": 446.8243767209997,
"count": 81,
"self": 266.771387442001,
"children": {
"TorchPOCAOptimizer.update": {
"total": 180.05298927899867,
"count": 2430,
"self": 180.05298927899867
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.264999809791334e-06,
"count": 1,
"self": 1.264999809791334e-06
},
"TrainerController._save_models": {
"total": 0.0016689229996700305,
"count": 1,
"self": 4.0902999899117276e-05,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0016280199997709133,
"count": 1,
"self": 0.0016280199997709133
}
}
}
}
}
}
}