train the model 10M steps

e928db7 verified 2 months ago

No virus

20.1 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 3.2140536308288574,
	"min": 3.1371610164642334,
	"max": 3.295743227005005,
	"count": 1000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 87216.5625,
	"min": 14859.255859375,
	"max": 123189.3359375,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 689.75,
	"min": 372.57142857142856,
	"max": 999.0,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 22072.0,
	"min": 15984.0,
	"max": 25980.0,
	"count": 1000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1187.6966732241428,
	"min": 1153.841539058977,
	"max": 1200.1767817874518,
	"count": 840
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 9501.573385793143,
	"min": 2307.683078117954,
	"max": 28693.76147327935,
	"count": 840
	},
	"SoccerTwos.Step.mean": {
	"value": 9999782.0,
	"min": 9010.0,
	"max": 9999782.0,
	"count": 1000
	},
	"SoccerTwos.Step.sum": {
	"value": 9999782.0,
	"min": 9010.0,
	"max": 9999782.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.01656908541917801,
	"min": -0.0285890344530344,
	"max": 0.018183618783950806,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -0.24853627383708954,
	"min": -0.6358370780944824,
	"max": 0.3091215193271637,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.014030274003744125,
	"min": -0.02842695266008377,
	"max": 0.006985607091337442,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -0.21045410633087158,
	"min": -0.5299453735351562,
	"max": 0.10471726953983307,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.11269332567850748,
	"min": -0.625,
	"max": 0.43426666657129925,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 1.6903998851776123,
	"min": -11.209600031375885,
	"max": 7.816799998283386,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.11269332567850748,
	"min": -0.625,
	"max": 0.43426666657129925,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 1.6903998851776123,
	"min": -11.209600031375885,
	"max": 7.816799998283386,
	"count": 1000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.009222518963118395,
	"min": 0.0018380905045584466,
	"max": 0.016796006510655086,
	"count": 466
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.009222518963118395,
	"min": 0.0018380905045584466,
	"max": 0.016796006510655086,
	"count": 466
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.002672747204390665,
	"min": 8.787856226414684e-07,
	"max": 0.01493195490911603,
	"count": 466
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.002672747204390665,
	"min": 8.787856226414684e-07,
	"max": 0.01493195490911603,
	"count": 466
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.0026594311154137054,
	"min": 5.442700323025443e-07,
	"max": 0.01496061822399497,
	"count": 466
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.0026594311154137054,
	"min": 5.442700323025443e-07,
	"max": 0.01496061822399497,
	"count": 466
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 466
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 466
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.19999999999999998,
	"min": 0.19999999999999998,
	"max": 0.19999999999999998,
	"count": 466
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.19999999999999998,
	"min": 0.19999999999999998,
	"max": 0.19999999999999998,
	"count": 466
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005,
	"min": 0.005,
	"max": 0.005,
	"count": 466
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005,
	"min": 0.005,
	"max": 0.005,
	"count": 466
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1714021025",
	"python_version": "3.10.12 \| packaged by conda-forge \| (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
	"command_line_arguments": "/opt/conda/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --force",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.0+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1714042856"
	},
	"total": 21830.642082236,
	"count": 1,
	"self": 0.3244513259996893,
	"children": {
	"run_training.setup": {
	"total": 0.023126756000010573,
	"count": 1,
	"self": 0.023126756000010573
	},
	"TrainerController.start_learning": {
	"total": 21830.294504154,
	"count": 1,
	"self": 17.681241414793476,
	"children": {
	"TrainerController._reset_env": {
	"total": 7.963951644991766,
	"count": 50,
	"self": 7.963951644991766
	},
	"TrainerController.advance": {
	"total": 21804.44504638521,
	"count": 650905,
	"self": 18.84673021924391,
	"children": {
	"env_step": {
	"total": 16890.455054930666,
	"count": 650905,
	"self": 12782.116950416483,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 4097.01059741472,
	"count": 650905,
	"self": 126.73519772110421,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 3970.275399693616,
	"count": 1291756,
	"self": 3970.275399693616
	}
	}
	},
	"workers": {
	"total": 11.327507099463446,
	"count": 650905,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 21802.707380226264,
	"count": 650905,
	"is_parallel": true,
	"self": 11594.57311845469,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0045256380001319485,
	"count": 2,
	"is_parallel": true,
	"self": 0.0011403580003843672,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0033852799997475813,
	"count": 8,
	"is_parallel": true,
	"self": 0.0033852799997475813
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.0403048900000158,
	"count": 1,
	"is_parallel": true,
	"self": 0.0009176909998132032,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0010829500001818815,
	"count": 1,
	"is_parallel": true,
	"self": 0.0010829500001818815
	},
	"communicator.exchange": {
	"total": 0.03505178000000342,
	"count": 1,
	"is_parallel": true,
	"self": 0.03505178000000342
	},
	"steps_from_proto": {
	"total": 0.0032524690000172995,
	"count": 2,
	"is_parallel": true,
	"self": 0.0006490769999345503,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0026033920000827493,
	"count": 8,
	"is_parallel": true,
	"self": 0.0026033920000827493
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 10207.980765756565,
	"count": 650904,
	"is_parallel": true,
	"self": 507.85822469908635,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 446.99434314346354,
	"count": 650904,
	"is_parallel": true,
	"self": 446.99434314346354
	},
	"communicator.exchange": {
	"total": 7188.0503721799605,
	"count": 650904,
	"is_parallel": true,
	"self": 7188.0503721799605
	},
	"steps_from_proto": {
	"total": 2065.077825734055,
	"count": 1301808,
	"is_parallel": true,
	"self": 359.3358679757,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 1705.7419577583548,
	"count": 5207232,
	"is_parallel": true,
	"self": 1705.7419577583548
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.1534960150088409,
	"count": 98,
	"is_parallel": true,
	"self": 0.02746741501323413,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.12602859999560678,
	"count": 392,
	"is_parallel": true,
	"self": 0.12602859999560678
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 4895.143261235304,
	"count": 650905,
	"self": 152.33847000950027,
	"children": {
	"process_trajectory": {
	"total": 1271.8683250818606,
	"count": 650905,
	"self": 1267.4993024908624,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 4.369022590998156,
	"count": 20,
	"self": 4.369022590998156
	}
	}
	},
	"_update_policy": {
	"total": 3470.936466143943,
	"count": 466,
	"self": 2705.017264349991,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 765.9192017939522,
	"count": 2796,
	"self": 765.9192017939522
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.200998667627573e-06,
	"count": 1,
	"self": 1.200998667627573e-06
	},
	"TrainerController._save_models": {
	"total": 0.20426350800335058,
	"count": 1,
	"self": 0.001985904003959149,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.20227760399939143,
	"count": 1,
	"self": 0.20227760399939143
	}
	}
	}
	}
	}
	}
	}