poca-SoccerTwos-2 / run_logs /timers.json

Second push after 6.5M training steps

bcf4886 almost 2 years ago

15.6 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.621476411819458,
	"min": 1.599655270576477,
	"max": 1.9477595090866089,
	"count": 150
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 34660.6796875,
	"min": 30395.7421875,
	"max": 41759.96484375,
	"count": 150
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 66.56756756756756,
	"min": 50.19191919191919,
	"max": 95.0,
	"count": 150
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19704.0,
	"min": 18652.0,
	"max": 20716.0,
	"count": 150
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1553.832368792811,
	"min": 1438.6034688325988,
	"max": 1558.6803524496643,
	"count": 150
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 229967.19058133604,
	"min": 153464.30024476539,
	"max": 290506.93140322325,
	"count": 150
	},
	"SoccerTwos.Step.mean": {
	"value": 6499974.0,
	"min": 5009904.0,
	"max": 6499974.0,
	"count": 150
	},
	"SoccerTwos.Step.sum": {
	"value": 6499974.0,
	"min": 5009904.0,
	"max": 6499974.0,
	"count": 150
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.02823818288743496,
	"min": -0.057631928473711014,
	"max": 0.13623426854610443,
	"count": 150
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -4.151012897491455,
	"min": -8.648056030273438,
	"max": 22.206186294555664,
	"count": 150
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.03926898166537285,
	"min": -0.06547373533248901,
	"max": 0.13748744130134583,
	"count": 150
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -5.77254056930542,
	"min": -9.543401718139648,
	"max": 22.41045379638672,
	"count": 150
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 150
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 150
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.18199727810969968,
	"min": -0.2652120010058085,
	"max": 0.2832584614937122,
	"count": 150
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -26.753599882125854,
	"min": -39.78180015087128,
	"max": 40.7763996720314,
	"count": 150
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.18199727810969968,
	"min": -0.2652120010058085,
	"max": 0.2832584614937122,
	"count": 150
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -26.753599882125854,
	"min": -39.78180015087128,
	"max": 40.7763996720314,
	"count": 150
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 150
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 150
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.01814786336872203,
	"min": 0.015074035020613033,
	"max": 0.024353331723250448,
	"count": 72
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.01814786336872203,
	"min": 0.015074035020613033,
	"max": 0.024353331723250448,
	"count": 72
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.09703146864970526,
	"min": 0.06234600332876047,
	"max": 0.0975521872440974,
	"count": 72
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.09703146864970526,
	"min": 0.06234600332876047,
	"max": 0.0975521872440974,
	"count": 72
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.09953429227073987,
	"min": 0.0642737081895272,
	"max": 0.09958388383189837,
	"count": 72
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.09953429227073987,
	"min": 0.0642737081895272,
	"max": 0.09958388383189837,
	"count": 72
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0006,
	"min": 0.0006,
	"max": 0.0006,
	"count": 72
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0006,
	"min": 0.0006,
	"max": 0.0006,
	"count": 72
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 72
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 72
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.003000000000000001,
	"min": 0.003000000000000001,
	"max": 0.003000000000000001,
	"count": 72
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.003000000000000001,
	"min": 0.003000000000000001,
	"max": 0.003000000000000001,
	"count": 72
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1677373403",
	"python_version": "3.9.16 (main, Jan 11 2023, 16:16:36) [MSC v.1916 64 bit (AMD64)]",
	"command_line_arguments": "C:\\Users\\paezd\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos-v1 --no-graphics --resume",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.13.1+cpu",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1677385506"
	},
	"total": 12103.1833268,
	"count": 1,
	"self": 2.0050473999981477,
	"children": {
	"run_training.setup": {
	"total": 0.3158820000000002,
	"count": 1,
	"self": 0.3158820000000002
	},
	"TrainerController.start_learning": {
	"total": 12100.8623974,
	"count": 1,
	"self": 4.47683140008121,
	"children": {
	"TrainerController._reset_env": {
	"total": 18.566587699999026,
	"count": 9,
	"self": 18.566587699999026
	},
	"TrainerController.advance": {
	"total": 12077.57374429992,
	"count": 102988,
	"self": 4.422820999532632,
	"children": {
	"env_step": {
	"total": 3476.5240094003584,
	"count": 102988,
	"self": 2640.1449541002703,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 833.3578471001533,
	"count": 102988,
	"self": 28.887965800097618,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 804.4698813000557,
	"count": 188310,
	"self": 804.4698813000557
	}
	}
	},
	"workers": {
	"total": 3.0212081999349465,
	"count": 102988,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 12076.058875999917,
	"count": 102988,
	"is_parallel": true,
	"self": 9961.220950799936,
	"children": {
	"steps_from_proto": {
	"total": 0.1401172999994138,
	"count": 18,
	"is_parallel": true,
	"self": 0.00887049999415268,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.13124680000526112,
	"count": 72,
	"is_parallel": true,
	"self": 0.13124680000526112
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2114.6978078999814,
	"count": 102988,
	"is_parallel": true,
	"self": 135.72561270021993,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 121.5406726002225,
	"count": 102988,
	"is_parallel": true,
	"self": 121.5406726002225
	},
	"communicator.exchange": {
	"total": 1438.9251074,
	"count": 102988,
	"is_parallel": true,
	"self": 1438.9251074
	},
	"steps_from_proto": {
	"total": 418.5064151995391,
	"count": 205976,
	"is_parallel": true,
	"self": 81.44533939962383,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 337.0610757999153,
	"count": 823904,
	"is_parallel": true,
	"self": 337.0610757999153
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 8596.626913900029,
	"count": 102988,
	"self": 28.26380390000668,
	"children": {
	"process_trajectory": {
	"total": 1681.3388057000157,
	"count": 102988,
	"self": 1678.296906700015,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 3.0418990000007398,
	"count": 3,
	"self": 3.0418990000007398
	}
	}
	},
	"_update_policy": {
	"total": 6887.024304300006,
	"count": 72,
	"self": 376.37169940000695,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 6510.6526048999995,
	"count": 2160,
	"self": 6510.6526048999995
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.3999997463542968e-06,
	"count": 1,
	"self": 1.3999997463542968e-06
	},
	"TrainerController._save_models": {
	"total": 0.24523260000023583,
	"count": 1,
	"self": 0.012290099999518134,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.2329425000007177,
	"count": 1,
	"self": 0.2329425000007177
	}
	}
	}
	}
	}
	}
	}