First Push

8c6a737 verified 2 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.6403228044509888,
	"min": 1.5934513807296753,
	"max": 3.2957165241241455,
	"count": 9999
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 17111.84765625,
	"min": 2063.12744140625,
	"max": 105462.828125,
	"count": 9999
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 92.6896551724138,
	"min": 42.08620689655172,
	"max": 999.0,
	"count": 9999
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 10752.0,
	"min": 6360.0,
	"max": 26120.0,
	"count": 9999
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1897.4883414858139,
	"min": 1195.3066567684584,
	"max": 1923.8329193841842,
	"count": 9784
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 110054.3238061772,
	"min": 2390.613313536917,
	"max": 206197.91650201185,
	"count": 9784
	},
	"SoccerTwos.Step.mean": {
	"value": 49999732.0,
	"min": 4078.0,
	"max": 49999732.0,
	"count": 10000
	},
	"SoccerTwos.Step.sum": {
	"value": 49999732.0,
	"min": 4078.0,
	"max": 49999732.0,
	"count": 10000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.07850736379623413,
	"min": -0.17166879773139954,
	"max": 0.23496553301811218,
	"count": 10000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -4.47491979598999,
	"min": -13.403179168701172,
	"max": 21.11491584777832,
	"count": 10000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.07782874256372452,
	"min": -0.17562894523143768,
	"max": 0.23829385638237,
	"count": 10000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -4.4362382888793945,
	"min": -13.46865177154541,
	"max": 21.446447372436523,
	"count": 10000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 10000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 10000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.30045964843348455,
	"min": -1.0,
	"max": 0.715462502092123,
	"count": 10000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -17.126199960708618,
	"min": -43.7535999417305,
	"max": 44.99379986524582,
	"count": 10000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.30045964843348455,
	"min": -1.0,
	"max": 0.715462502092123,
	"count": 10000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -17.126199960708618,
	"min": -43.7535999417305,
	"max": 44.99379986524582,
	"count": 10000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 10000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 10000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.024088964767215656,
	"min": 0.015893725545320193,
	"max": 0.0269867768289987,
	"count": 487
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.024088964767215656,
	"min": 0.015893725545320193,
	"max": 0.0269867768289987,
	"count": 487
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.06855518159766992,
	"min": 0.0005633096817473415,
	"max": 0.08216111165781816,
	"count": 487
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.06855518159766992,
	"min": 0.0005633096817473415,
	"max": 0.08216111165781816,
	"count": 487
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.0712700811525186,
	"min": 0.0006828123525095482,
	"max": 0.08849437306324641,
	"count": 487
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.0712700811525186,
	"min": 0.0006828123525095482,
	"max": 0.08849437306324641,
	"count": 487
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.00030000000000000003,
	"min": 0.00030000000000000003,
	"max": 0.00030000000000000003,
	"count": 487
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.00030000000000000003,
	"min": 0.00030000000000000003,
	"max": 0.00030000000000000003,
	"count": 487
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.19999999999999996,
	"min": 0.19999999999999996,
	"max": 0.19999999999999996,
	"count": 487
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.19999999999999996,
	"min": 0.19999999999999996,
	"max": 0.19999999999999996,
	"count": 487
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005,
	"min": 0.005,
	"max": 0.005,
	"count": 487
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005,
	"min": 0.005,
	"max": 0.005,
	"count": 487
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1736979011",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/home/jake/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos --run-id=SoccerTwosV1 --no-graphics",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.5.1+cu124",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1737042753"
	},
	"total": 63742.176217397966,
	"count": 1,
	"self": 10.004164291953202,
	"children": {
	"run_training.setup": {
	"total": 0.009325877006631345,
	"count": 1,
	"self": 0.009325877006631345
	},
	"TrainerController.start_learning": {
	"total": 63732.162727229006,
	"count": 1,
	"self": 37.988382418232504,
	"children": {
	"TrainerController._reset_env": {
	"total": 5.989011199388187,
	"count": 250,
	"self": 5.989011199388187
	},
	"TrainerController.advance": {
	"total": 63688.07022734132,
	"count": 3397057,
	"self": 37.626744013337884,
	"children": {
	"env_step": {
	"total": 50256.522446545714,
	"count": 3397057,
	"self": 30449.064977659087,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 19781.671317706874,
	"count": 3397057,
	"self": 270.7169931862736,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 19510.9543245206,
	"count": 6290090,
	"self": 19510.9543245206
	}
	}
	},
	"workers": {
	"total": 25.78615117975278,
	"count": 3397057,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 63650.172107425984,
	"count": 3397057,
	"is_parallel": true,
	"self": 37492.45927646983,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0017958719981834292,
	"count": 2,
	"is_parallel": true,
	"self": 0.000766964047215879,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010289079509675503,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010289079509675503
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.01545187400188297,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002709131222218275,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00020565098384395242,
	"count": 1,
	"is_parallel": true,
	"self": 0.00020565098384395242
	},
	"communicator.exchange": {
	"total": 0.014144759974442422,
	"count": 1,
	"is_parallel": true,
	"self": 0.014144759974442422
	},
	"steps_from_proto": {
	"total": 0.000830549921374768,
	"count": 2,
	"is_parallel": true,
	"self": 0.0002017549704760313,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0006287949508987367,
	"count": 8,
	"is_parallel": true,
	"self": 0.0006287949508987367
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 26157.526439543988,
	"count": 3397056,
	"is_parallel": true,
	"self": 844.6909932030248,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 513.4647607574589,
	"count": 3397056,
	"is_parallel": true,
	"self": 513.4647607574589
	},
	"communicator.exchange": {
	"total": 22464.49313636101,
	"count": 3397056,
	"is_parallel": true,
	"self": 22464.49313636101
	},
	"steps_from_proto": {
	"total": 2334.877549222496,
	"count": 6794112,
	"is_parallel": true,
	"self": 485.19663266302086,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 1849.680916559475,
	"count": 27176448,
	"is_parallel": true,
	"self": 1849.680916559475
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.1863914121640846,
	"count": 498,
	"is_parallel": true,
	"self": 0.03845423803431913,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.14793717412976548,
	"count": 1992,
	"is_parallel": true,
	"self": 0.14793717412976548
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 13393.921036782267,
	"count": 3397057,
	"self": 283.00673896825174,
	"children": {
	"process_trajectory": {
	"total": 8087.140385998588,
	"count": 3397057,
	"self": 8072.611676132423,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 14.528709866164718,
	"count": 100,
	"self": 14.528709866164718
	}
	}
	},
	"_update_policy": {
	"total": 5023.773911815428,
	"count": 487,
	"self": 2790.993342328409,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 2232.7805694870185,
	"count": 73050,
	"self": 2232.7805694870185
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.520406946539879e-07,
	"count": 1,
	"self": 4.520406946539879e-07
	},
	"TrainerController._save_models": {
	"total": 0.1151058180257678,
	"count": 1,
	"self": 0.002308555005583912,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.11279726302018389,
	"count": 1,
	"self": 0.11279726302018389
	}
	}
	}
	}
	}
	}
	}