Second Push with PPO

493f05a about 1 year ago

18.9 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 0.9153541326522827,
	"min": 0.9153541326522827,
	"max": 3.29571533203125,
	"count": 500
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 18629.287109375,
	"min": 13145.2314453125,
	"max": 129414.515625,
	"count": 500
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 183.1851851851852,
	"min": 126.23809523809524,
	"max": 999.0,
	"count": 500
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19784.0,
	"min": 16288.0,
	"max": 24516.0,
	"count": 500
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1307.64352444372,
	"min": 1194.1476205484157,
	"max": 1309.2882202775756,
	"count": 361
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 65382.176222186,
	"min": 2388.2952410968314,
	"max": 109172.03978153125,
	"count": 361
	},
	"SoccerTwos.Step.mean": {
	"value": 4999186.0,
	"min": 9274.0,
	"max": 4999186.0,
	"count": 500
	},
	"SoccerTwos.Step.sum": {
	"value": 4999186.0,
	"min": 9274.0,
	"max": 4999186.0,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.010785561054944992,
	"min": -0.07360236346721649,
	"max": 0.025284575298428535,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 0.582420289516449,
	"min": -3.4593112468719482,
	"max": 1.16034734249115,
	"count": 500
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 500
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.07471111058085053,
	"min": -0.6740705896826351,
	"max": 0.5166999936103821,
	"count": 500
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -4.034399971365929,
	"min": -23.275600016117096,
	"max": 20.34040015935898,
	"count": 500
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 500
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 500
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.017419481910959198,
	"min": 0.011446637076248104,
	"max": 0.024658437251734236,
	"count": 233
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.017419481910959198,
	"min": 0.011446637076248104,
	"max": 0.024658437251734236,
	"count": 233
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.033856865887840586,
	"min": 1.0313411136545862e-05,
	"max": 0.03894931549827258,
	"count": 233
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.033856865887840586,
	"min": 1.0313411136545862e-05,
	"max": 0.03894931549827258,
	"count": 233
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 233
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 233
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 233
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 233
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 9.999999999999998e-05,
	"min": 9.999999999999998e-05,
	"max": 9.999999999999998e-05,
	"count": 233
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 9.999999999999998e-05,
	"min": 9.999999999999998e-05,
	"max": 9.999999999999998e-05,
	"count": 233
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1685371306",
	"python_version": "3.10.6 (main, Oct 24 2022, 16:07:47) [GCC 11.2.0]",
	"command_line_arguments": "/home/abhishek/anaconda3/envs/swasti_icra_rush/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos_2",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1685393613"
	},
	"total": 22306.65333478,
	"count": 1,
	"self": 0.4270627129881177,
	"children": {
	"run_training.setup": {
	"total": 0.030367953004315495,
	"count": 1,
	"self": 0.030367953004315495
	},
	"TrainerController.start_learning": {
	"total": 22306.195904114007,
	"count": 1,
	"self": 13.268425302987453,
	"children": {
	"TrainerController._reset_env": {
	"total": 8.24428695201641,
	"count": 25,
	"self": 8.24428695201641
	},
	"TrainerController.advance": {
	"total": 22284.515690080996,
	"count": 326205,
	"self": 14.881455402908614,
	"children": {
	"env_step": {
	"total": 17317.932538829045,
	"count": 326205,
	"self": 14788.598816663347,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 2520.409768861762,
	"count": 326205,
	"self": 118.52726861099654,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 2401.8825002507656,
	"count": 644050,
	"self": 2401.8825002507656
	}
	}
	},
	"workers": {
	"total": 8.923953303936287,
	"count": 326205,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 22279.42205907659,
	"count": 326205,
	"is_parallel": true,
	"self": 9222.530670115884,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.00623098699725233,
	"count": 2,
	"is_parallel": true,
	"self": 0.0013409140083240345,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.004890072988928296,
	"count": 8,
	"is_parallel": true,
	"self": 0.004890072988928296
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.043433031998574734,
	"count": 1,
	"is_parallel": true,
	"self": 0.0010604150011204183,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.001187104993732646,
	"count": 1,
	"is_parallel": true,
	"self": 0.001187104993732646
	},
	"communicator.exchange": {
	"total": 0.038330529001541436,
	"count": 1,
	"is_parallel": true,
	"self": 0.038330529001541436
	},
	"steps_from_proto": {
	"total": 0.0028549830021802336,
	"count": 2,
	"is_parallel": true,
	"self": 0.0006769739993615076,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.002178009002818726,
	"count": 8,
	"is_parallel": true,
	"self": 0.002178009002818726
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 13056.791545148692,
	"count": 326204,
	"is_parallel": true,
	"self": 469.30854420507967,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 311.4045191811747,
	"count": 326204,
	"is_parallel": true,
	"self": 311.4045191811747
	},
	"communicator.exchange": {
	"total": 11008.626855160197,
	"count": 326204,
	"is_parallel": true,
	"self": 11008.626855160197
	},
	"steps_from_proto": {
	"total": 1267.4516266022401,
	"count": 652408,
	"is_parallel": true,
	"self": 246.88081760508067,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 1020.5708089971595,
	"count": 2609632,
	"is_parallel": true,
	"self": 1020.5708089971595
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.09984381201502401,
	"count": 48,
	"is_parallel": true,
	"self": 0.01956795902515296,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.08027585298987105,
	"count": 192,
	"is_parallel": true,
	"self": 0.08027585298987105
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 4951.701695849042,
	"count": 326205,
	"self": 126.8585418540315,
	"children": {
	"process_trajectory": {
	"total": 745.27830365095,
	"count": 326205,
	"self": 742.6751607059705,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 2.6031429449794814,
	"count": 10,
	"self": 2.6031429449794814
	}
	}
	},
	"_update_policy": {
	"total": 4079.5648503440607,
	"count": 233,
	"self": 1261.3051723325625,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 2818.259678011498,
	"count": 6990,
	"self": 2818.259678011498
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.3630051398649812e-06,
	"count": 1,
	"self": 1.3630051398649812e-06
	},
	"TrainerController._save_models": {
	"total": 0.16750041500199586,
	"count": 1,
	"self": 0.002384867999353446,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.16511554700264242,
	"count": 1,
	"self": 0.16511554700264242
	}
	}
	}
	}
	}
	}
	}