Armageddon

8M steps

253897a verified 7 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.075274705886841,
	"min": 1.9769662618637085,
	"max": 2.1356256008148193,
	"count": 100
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 44361.0703125,
	"min": 37098.953125,
	"max": 47907.8203125,
	"count": 100
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 61.65,
	"min": 58.48192771084337,
	"max": 92.0909090909091,
	"count": 100
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19728.0,
	"min": 18868.0,
	"max": 20624.0,
	"count": 100
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1524.5437885884257,
	"min": 1483.835614572962,
	"max": 1546.1724846117934,
	"count": 100
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 243927.00617414812,
	"min": 159117.6904437962,
	"max": 262436.1696640847,
	"count": 100
	},
	"SoccerTwos.Step.mean": {
	"value": 7999987.0,
	"min": 7009937.0,
	"max": 7999987.0,
	"count": 100
	},
	"SoccerTwos.Step.sum": {
	"value": 7999987.0,
	"min": 7009937.0,
	"max": 7999987.0,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.03398745507001877,
	"min": -0.09914720058441162,
	"max": 0.13625411689281464,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -5.437993049621582,
	"min": -14.673786163330078,
	"max": 19.211830139160156,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.03569308668375015,
	"min": -0.09992273896932602,
	"max": 0.1381271630525589,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -5.710893630981445,
	"min": -14.788565635681152,
	"max": 19.475929260253906,
	"count": 100
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 100
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.12503499761223794,
	"min": -0.2998081628157168,
	"max": 0.31528510692271783,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -20.00559961795807,
	"min": -44.07179993391037,
	"max": 44.45520007610321,
	"count": 100
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.12503499761223794,
	"min": -0.2998081628157168,
	"max": 0.31528510692271783,
	"count": 100
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -20.00559961795807,
	"min": -44.07179993391037,
	"max": 44.45520007610321,
	"count": 100
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.030147571766671415,
	"min": 0.01926161778101232,
	"max": 0.030572262380155736,
	"count": 96
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.030147571766671415,
	"min": 0.01926161778101232,
	"max": 0.030572262380155736,
	"count": 96
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.07682948814084133,
	"min": 0.06336750593036414,
	"max": 0.09296437638501326,
	"count": 96
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.07682948814084133,
	"min": 0.06336750593036414,
	"max": 0.09296437638501326,
	"count": 96
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.08455546280990044,
	"min": 0.06803815892587105,
	"max": 0.09906086499492327,
	"count": 96
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.08455546280990044,
	"min": 0.06803815892587105,
	"max": 0.09906086499492327,
	"count": 96
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.00010000000000000003,
	"min": 0.00010000000000000003,
	"max": 0.00010000000000000003,
	"count": 96
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.00010000000000000003,
	"min": 0.00010000000000000003,
	"max": 0.00010000000000000003,
	"count": 96
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.30000000000000004,
	"min": 0.30000000000000004,
	"max": 0.30000000000000004,
	"count": 96
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.30000000000000004,
	"min": 0.30000000000000004,
	"max": 0.30000000000000004,
	"count": 96
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 96
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 96
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1718528867",
	"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn config/poca/SoccerTwos.yaml --env=training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --resume",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.0+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1718531380"
	},
	"total": 2513.460860014,
	"count": 1,
	"self": 0.4401936670001305,
	"children": {
	"run_training.setup": {
	"total": 0.05471528700036288,
	"count": 1,
	"self": 0.05471528700036288
	},
	"TrainerController.start_learning": {
	"total": 2512.9659510599995,
	"count": 1,
	"self": 1.490757568070876,
	"children": {
	"TrainerController._reset_env": {
	"total": 3.2961464449999767,
	"count": 6,
	"self": 3.2961464449999767
	},
	"TrainerController.advance": {
	"total": 2507.874844134928,
	"count": 68266,
	"self": 1.6193601259265051,
	"children": {
	"env_step": {
	"total": 1724.4243835979332,
	"count": 68266,
	"self": 1315.2181860693017,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 408.35218750368676,
	"count": 68266,
	"self": 10.641594466639617,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 397.71059303704715,
	"count": 125734,
	"self": 397.71059303704715
	}
	}
	},
	"workers": {
	"total": 0.8540100249447278,
	"count": 68266,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2508.0644017046725,
	"count": 68266,
	"is_parallel": true,
	"self": 1395.3086283079301,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0045858560006308835,
	"count": 2,
	"is_parallel": true,
	"self": 0.0010015730003942735,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00358428300023661,
	"count": 8,
	"is_parallel": true,
	"self": 0.00358428300023661
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.08159472399984224,
	"count": 1,
	"is_parallel": true,
	"self": 0.0012141500001234817,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.000845034999656491,
	"count": 1,
	"is_parallel": true,
	"self": 0.000845034999656491
	},
	"communicator.exchange": {
	"total": 0.07599764299993694,
	"count": 1,
	"is_parallel": true,
	"self": 0.07599764299993694
	},
	"steps_from_proto": {
	"total": 0.003537896000125329,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007051100001262967,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0028327859999990324,
	"count": 8,
	"is_parallel": true,
	"self": 0.0028327859999990324
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.014440796003327705,
	"count": 10,
	"is_parallel": true,
	"self": 0.002628185005960404,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.011812610997367301,
	"count": 40,
	"is_parallel": true,
	"self": 0.011812610997367301
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1112.741332600739,
	"count": 68265,
	"is_parallel": true,
	"self": 69.125073950775,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 46.844077149783516,
	"count": 68265,
	"is_parallel": true,
	"self": 46.844077149783516
	},
	"communicator.exchange": {
	"total": 779.4261883112313,
	"count": 68265,
	"is_parallel": true,
	"self": 779.4261883112313
	},
	"steps_from_proto": {
	"total": 217.34599318894925,
	"count": 136530,
	"is_parallel": true,
	"self": 36.30777866155131,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 181.03821452739794,
	"count": 546120,
	"is_parallel": true,
	"self": 181.03821452739794
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 781.8311004110683,
	"count": 68266,
	"self": 11.795448302335899,
	"children": {
	"process_trajectory": {
	"total": 212.03827831372382,
	"count": 68266,
	"self": 211.50231669172354,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.5359616220002863,
	"count": 2,
	"self": 0.5359616220002863
	}
	}
	},
	"_update_policy": {
	"total": 557.9973737950086,
	"count": 96,
	"self": 306.94090527201297,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 251.0564685229956,
	"count": 5760,
	"self": 251.0564685229956
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 9.480008884565905e-07,
	"count": 1,
	"self": 9.480008884565905e-07
	},
	"TrainerController._save_models": {
	"total": 0.30420196399973065,
	"count": 1,
	"self": 0.0056103690003510565,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.2985915949993796,
	"count": 1,
	"self": 0.2985915949993796
	}
	}
	}
	}
	}
	}
	}