update model

219e228 over 1 year ago

20.3 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.6099094152450562,
	"min": 1.5207527875900269,
	"max": 2.384139060974121,
	"count": 1000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 32404.255859375,
	"min": 27570.814453125,
	"max": 50887.06640625,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 63.177215189873415,
	"min": 37.93076923076923,
	"max": 77.25,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19964.0,
	"min": 18560.0,
	"max": 20964.0,
	"count": 1000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1630.5526151676804,
	"min": 1380.5516471633684,
	"max": 1658.2405356683296,
	"count": 1000
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 257627.31319649352,
	"min": 196305.03927465307,
	"max": 420312.2336165031,
	"count": 1000
	},
	"SoccerTwos.Step.mean": {
	"value": 14999989.0,
	"min": 5009930.0,
	"max": 14999989.0,
	"count": 1000
	},
	"SoccerTwos.Step.sum": {
	"value": 14999989.0,
	"min": 5009930.0,
	"max": 14999989.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.02849951945245266,
	"min": -0.10880010575056076,
	"max": 0.11400023102760315,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -4.502923965454102,
	"min": -19.710926055908203,
	"max": 22.08706283569336,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.024522004649043083,
	"min": -0.11366839706897736,
	"max": 0.10873111337423325,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -3.874476671218872,
	"min": -20.68764877319336,
	"max": 22.398609161376953,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.05733291069163552,
	"min": -0.32792437817919906,
	"max": 0.3206224626398341,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -9.058599889278412,
	"min": -65.91280001401901,
	"max": 59.95640051364899,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.05733291069163552,
	"min": -0.32792437817919906,
	"max": 0.3206224626398341,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -9.058599889278412,
	"min": -65.91280001401901,
	"max": 59.95640051364899,
	"count": 1000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.0149743233230159,
	"min": 0.011443423058760041,
	"max": 0.026090294665967424,
	"count": 486
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.0149743233230159,
	"min": 0.011443423058760041,
	"max": 0.026090294665967424,
	"count": 486
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.10135304778814316,
	"min": 0.08815950925151507,
	"max": 0.12917640407880146,
	"count": 486
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.10135304778814316,
	"min": 0.08815950925151507,
	"max": 0.12917640407880146,
	"count": 486
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.10274179776509602,
	"min": 0.08971414019664128,
	"max": 0.13080705429116884,
	"count": 486
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.10274179776509602,
	"min": 0.08971414019664128,
	"max": 0.13080705429116884,
	"count": 486
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 486
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 486
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 486
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 486
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 486
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 486
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1684779147",
	"python_version": "3.9.16 \| packaged by conda-forge \| (main, Feb 1 2023, 21:39:03) \n[GCC 11.3.0]",
	"command_line_arguments": "ml-agents/mlagents/trainers/learn.py ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --resume --torch-device=cuda",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.0.1+cu118",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1684784885"
	},
	"total": 5738.293310943998,
	"count": 1,
	"self": 0.16640525999901,
	"children": {
	"run_training.setup": {
	"total": 0.006291360001341673,
	"count": 1,
	"self": 0.006291360001341673
	},
	"TrainerController.start_learning": {
	"total": 5738.120614323998,
	"count": 1,
	"self": 5.686786572623532,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.5485932720075652,
	"count": 51,
	"self": 1.5485932720075652
	},
	"TrainerController.advance": {
	"total": 5730.810328043368,
	"count": 700812,
	"self": 5.292680743667006,
	"children": {
	"env_step": {
	"total": 4235.785418268359,
	"count": 700812,
	"self": 3243.6642487515273,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 989.048752669034,
	"count": 700812,
	"self": 36.69071319253635,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 952.3580394764977,
	"count": 1253978,
	"self": 952.3580394764977
	}
	}
	},
	"workers": {
	"total": 3.0724168477972853,
	"count": 700812,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 5730.651753197661,
	"count": 700812,
	"is_parallel": true,
	"self": 3042.8718115734773,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0015056480042403564,
	"count": 2,
	"is_parallel": true,
	"self": 0.00041794100252445787,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010877070017158985,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010877070017158985
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.011163593997480348,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002702169949770905,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.000198353998712264,
	"count": 1,
	"is_parallel": true,
	"self": 0.000198353998712264
	},
	"communicator.exchange": {
	"total": 0.009959303002688102,
	"count": 1,
	"is_parallel": true,
	"self": 0.009959303002688102
	},
	"steps_from_proto": {
	"total": 0.0007357200011028908,
	"count": 2,
	"is_parallel": true,
	"self": 0.0001429900039511267,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0005927299971517641,
	"count": 8,
	"is_parallel": true,
	"self": 0.0005927299971517641
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.03402553797059227,
	"count": 100,
	"is_parallel": true,
	"self": 0.006169039937958587,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.02785649803263368,
	"count": 400,
	"is_parallel": true,
	"self": 0.02785649803263368
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2687.7459160862127,
	"count": 700811,
	"is_parallel": true,
	"self": 164.7059383653468,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 87.95222477414063,
	"count": 700811,
	"is_parallel": true,
	"self": 87.95222477414063
	},
	"communicator.exchange": {
	"total": 2005.0622043327712,
	"count": 700811,
	"is_parallel": true,
	"self": 2005.0622043327712
	},
	"steps_from_proto": {
	"total": 430.025548613954,
	"count": 1401622,
	"is_parallel": true,
	"self": 77.20184635687838,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 352.82370225707564,
	"count": 5606488,
	"is_parallel": true,
	"self": 352.82370225707564
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1489.7322290313423,
	"count": 700812,
	"self": 38.19949424334118,
	"children": {
	"process_trajectory": {
	"total": 649.0925412990582,
	"count": 700812,
	"self": 647.4561120400504,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.6364292590078549,
	"count": 20,
	"self": 1.6364292590078549
	}
	}
	},
	"_update_policy": {
	"total": 802.4401934889429,
	"count": 486,
	"self": 560.5196833143345,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 241.92051017460835,
	"count": 14580,
	"self": 241.92051017460835
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.2300234781578183e-07,
	"count": 1,
	"self": 4.2300234781578183e-07
	},
	"TrainerController._save_models": {
	"total": 0.07490601299650734,
	"count": 1,
	"self": 0.0016170749950106256,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.07328893800149672,
	"count": 1,
	"self": 0.07328893800149672
	}
	}
	}
	}
	}
	}
	}