Armageddon

2M steps

672f46c verified 7 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.328767776489258,
	"min": 2.264260768890381,
	"max": 2.597733497619629,
	"count": 100
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 46128.234375,
	"min": 39270.3828125,
	"max": 55239.19140625,
	"count": 100
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 73.82089552238806,
	"min": 52.670212765957444,
	"max": 97.68627450980392,
	"count": 100
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19784.0,
	"min": 18692.0,
	"max": 20432.0,
	"count": 100
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1464.6681308739328,
	"min": 1340.6953797169615,
	"max": 1467.0538949370675,
	"count": 100
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 196265.52953710698,
	"min": 137347.69714895534,
	"max": 264689.652539715,
	"count": 100
	},
	"SoccerTwos.Step.mean": {
	"value": 1999965.0,
	"min": 1009992.0,
	"max": 1999965.0,
	"count": 100
	},
	"SoccerTwos.Step.sum": {
	"value": 1999965.0,
	"min": 1009992.0,
	"max": 1999965.0,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.016363728791475296,
	"min": -0.10192890465259552,
	"max": 0.1753804236650467,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -2.1763758659362793,
	"min": -17.124055862426758,
	"max": 23.541648864746094,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.014355599880218506,
	"min": -0.1014343872666359,
	"max": 0.1741393357515335,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -1.909294843673706,
	"min": -17.040977478027344,
	"max": 23.85595703125,
	"count": 100
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 100
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.10087518763721437,
	"min": -0.2200330554946395,
	"max": 0.35805128221837884,
	"count": 100
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -13.416399955749512,
	"min": -33.48040008544922,
	"max": 41.89200001955032,
	"count": 100
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.10087518763721437,
	"min": -0.2200330554946395,
	"max": 0.35805128221837884,
	"count": 100
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -13.416399955749512,
	"min": -33.48040008544922,
	"max": 41.89200001955032,
	"count": 100
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.024560289937411047,
	"min": 0.019290680067206266,
	"max": 0.030502982162094364,
	"count": 96
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.024560289937411047,
	"min": 0.019290680067206266,
	"max": 0.030502982162094364,
	"count": 96
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.0893861286342144,
	"min": 0.06532500262061755,
	"max": 0.10287798456847667,
	"count": 96
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.0893861286342144,
	"min": 0.06532500262061755,
	"max": 0.10287798456847667,
	"count": 96
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.09144253581762314,
	"min": 0.06658829804509878,
	"max": 0.10604632832109928,
	"count": 96
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.09144253581762314,
	"min": 0.06658829804509878,
	"max": 0.10604632832109928,
	"count": 96
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.00010000000000000003,
	"min": 0.00010000000000000003,
	"max": 0.00010000000000000003,
	"count": 96
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.00010000000000000003,
	"min": 0.00010000000000000003,
	"max": 0.00010000000000000003,
	"count": 96
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.30000000000000004,
	"min": 0.30000000000000004,
	"max": 0.30000000000000004,
	"count": 96
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.30000000000000004,
	"min": 0.30000000000000004,
	"max": 0.30000000000000004,
	"count": 96
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 96
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 96
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1718191559",
	"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn config/poca/SoccerTwos.yaml --env=training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --resume",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.0+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1718194236"
	},
	"total": 2677.5249057149995,
	"count": 1,
	"self": 0.4454774019995966,
	"children": {
	"run_training.setup": {
	"total": 0.05204561299979105,
	"count": 1,
	"self": 0.05204561299979105
	},
	"TrainerController.start_learning": {
	"total": 2677.0273827,
	"count": 1,
	"self": 1.826815037187771,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.400645704001363,
	"count": 6,
	"self": 2.400645704001363
	},
	"TrainerController.advance": {
	"total": 2672.4072354908103,
	"count": 68436,
	"self": 1.806063156755954,
	"children": {
	"env_step": {
	"total": 1855.4611228438935,
	"count": 68436,
	"self": 1422.0476411730265,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 432.360649040987,
	"count": 68436,
	"self": 11.123637882056755,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 421.2370111589303,
	"count": 125582,
	"self": 421.2370111589303
	}
	}
	},
	"workers": {
	"total": 1.0528326298799584,
	"count": 68436,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2671.1950876310816,
	"count": 68436,
	"is_parallel": true,
	"self": 1475.9357132810305,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.003691028000048391,
	"count": 2,
	"is_parallel": true,
	"self": 0.0009521160000076634,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0027389120000407274,
	"count": 8,
	"is_parallel": true,
	"self": 0.0027389120000407274
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.04350786299983156,
	"count": 1,
	"is_parallel": true,
	"self": 0.001218514000356663,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.000951165999595105,
	"count": 1,
	"is_parallel": true,
	"self": 0.000951165999595105
	},
	"communicator.exchange": {
	"total": 0.03761735099988073,
	"count": 1,
	"is_parallel": true,
	"self": 0.03761735099988073
	},
	"steps_from_proto": {
	"total": 0.0037208319999990636,
	"count": 2,
	"is_parallel": true,
	"self": 0.0006640930005232804,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.003056738999475783,
	"count": 8,
	"is_parallel": true,
	"self": 0.003056738999475783
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.012021573999390966,
	"count": 10,
	"is_parallel": true,
	"self": 0.0024884319968805357,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00953314200251043,
	"count": 40,
	"is_parallel": true,
	"self": 0.00953314200251043
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1195.2473527760517,
	"count": 68435,
	"is_parallel": true,
	"self": 72.91794219366466,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 50.31317736098026,
	"count": 68435,
	"is_parallel": true,
	"self": 50.31317736098026
	},
	"communicator.exchange": {
	"total": 836.5022911791707,
	"count": 68435,
	"is_parallel": true,
	"self": 836.5022911791707
	},
	"steps_from_proto": {
	"total": 235.51394204223607,
	"count": 136870,
	"is_parallel": true,
	"self": 39.84623565183392,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 195.66770639040215,
	"count": 547480,
	"is_parallel": true,
	"self": 195.66770639040215
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 815.1400494901609,
	"count": 68436,
	"self": 13.141514201188784,
	"children": {
	"process_trajectory": {
	"total": 230.12784679496872,
	"count": 68436,
	"self": 229.48975947596864,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.6380873190000784,
	"count": 2,
	"self": 0.6380873190000784
	}
	}
	},
	"_update_policy": {
	"total": 571.8706884940034,
	"count": 96,
	"self": 315.4452915630277,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 256.4253969309757,
	"count": 5760,
	"self": 256.4253969309757
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.2270002116565593e-06,
	"count": 1,
	"self": 1.2270002116565593e-06
	},
	"TrainerController._save_models": {
	"total": 0.3926852410004358,
	"count": 1,
	"self": 0.005133809000653855,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.3875514319997819,
	"count": 1,
	"self": 0.3875514319997819
	}
	}
	}
	}
	}
	}
	}