RL Course Huggy training with unaltered hyperparameters

30f4b68 over 1 year ago

17.4 kB

	{
	"name": "root",
	"gauges": {
	"Huggy.Policy.Entropy.mean": {
	"value": 1.4082895517349243,
	"min": 1.4082895517349243,
	"max": 1.4283841848373413,
	"count": 40
	},
	"Huggy.Policy.Entropy.sum": {
	"value": 71089.046875,
	"min": 69400.234375,
	"max": 75671.7421875,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.mean": {
	"value": 99.56626506024097,
	"min": 73.78923766816143,
	"max": 406.18548387096774,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.sum": {
	"value": 49584.0,
	"min": 49260.0,
	"max": 50367.0,
	"count": 40
	},
	"Huggy.Step.mean": {
	"value": 1999998.0,
	"min": 49593.0,
	"max": 1999998.0,
	"count": 40
	},
	"Huggy.Step.sum": {
	"value": 1999998.0,
	"min": 49593.0,
	"max": 1999998.0,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.mean": {
	"value": 2.400049924850464,
	"min": -0.008008349686861038,
	"max": 2.506079912185669,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1195.224853515625,
	"min": -0.9770187139511108,
	"max": 1640.710205078125,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.mean": {
	"value": 3.658458121689448,
	"min": 1.9253230507745118,
	"max": 4.06927930105191,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.sum": {
	"value": 1821.912144601345,
	"min": 234.88941219449043,
	"max": 2593.123188138008,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.mean": {
	"value": 3.658458121689448,
	"min": 1.9253230507745118,
	"max": 4.06927930105191,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.sum": {
	"value": 1821.912144601345,
	"min": 234.88941219449043,
	"max": 2593.123188138008,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.mean": {
	"value": 0.018393828983729085,
	"min": 0.01334685300438044,
	"max": 0.020955035622076443,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.sum": {
	"value": 0.055181486951187256,
	"min": 0.02669370600876088,
	"max": 0.055181486951187256,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.mean": {
	"value": 0.04728402727180057,
	"min": 0.02498480547219515,
	"max": 0.06280782675991456,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.sum": {
	"value": 0.1418520818154017,
	"min": 0.0499696109443903,
	"max": 0.18467632954319319,
	"count": 40
	},
	"Huggy.Policy.LearningRate.mean": {
	"value": 3.5473488175833345e-06,
	"min": 3.5473488175833345e-06,
	"max": 0.000295281226572925,
	"count": 40
	},
	"Huggy.Policy.LearningRate.sum": {
	"value": 1.0642046452750003e-05,
	"min": 1.0642046452750003e-05,
	"max": 0.00084416506861165,
	"count": 40
	},
	"Huggy.Policy.Epsilon.mean": {
	"value": 0.10118241666666666,
	"min": 0.10118241666666666,
	"max": 0.198427075,
	"count": 40
	},
	"Huggy.Policy.Epsilon.sum": {
	"value": 0.30354725,
	"min": 0.20761380000000002,
	"max": 0.5813883500000001,
	"count": 40
	},
	"Huggy.Policy.Beta.mean": {
	"value": 6.900259166666671e-05,
	"min": 6.900259166666671e-05,
	"max": 0.004921511042500002,
	"count": 40
	},
	"Huggy.Policy.Beta.sum": {
	"value": 0.00020700777500000012,
	"min": 0.00020700777500000012,
	"max": 0.014071278665000003,
	"count": 40
	},
	"Huggy.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	},
	"Huggy.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1696972091",
	"python_version": "3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.0.1+cu118",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1696976641"
	},
	"total": 4550.475618324999,
	"count": 1,
	"self": 0.703714654999203,
	"children": {
	"run_training.setup": {
	"total": 0.06835360100001253,
	"count": 1,
	"self": 0.06835360100001253
	},
	"TrainerController.start_learning": {
	"total": 4549.703550069,
	"count": 1,
	"self": 8.7179147192237,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.060523761000013,
	"count": 1,
	"self": 1.060523761000013
	},
	"TrainerController.advance": {
	"total": 4539.806648957777,
	"count": 232827,
	"self": 8.791913871789802,
	"children": {
	"env_step": {
	"total": 2862.441346406943,
	"count": 232827,
	"self": 2399.3870169378197,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 457.52774218513355,
	"count": 232827,
	"self": 30.46612475425286,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 427.0616174308807,
	"count": 223127,
	"self": 427.0616174308807
	}
	}
	},
	"workers": {
	"total": 5.526587283989528,
	"count": 232827,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 4535.365509181896,
	"count": 232827,
	"is_parallel": true,
	"self": 2611.50194717994,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0012589910000144755,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003712650000124995,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.000887726000001976,
	"count": 2,
	"is_parallel": true,
	"self": 0.000887726000001976
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.039940495999985615,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004504270000040833,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00026092100000596474,
	"count": 1,
	"is_parallel": true,
	"self": 0.00026092100000596474
	},
	"communicator.exchange": {
	"total": 0.038195975999997245,
	"count": 1,
	"is_parallel": true,
	"self": 0.038195975999997245
	},
	"steps_from_proto": {
	"total": 0.0010331719999783218,
	"count": 1,
	"is_parallel": true,
	"self": 0.00030411599996682526,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0007290560000114965,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007290560000114965
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1923.8635620019563,
	"count": 232826,
	"is_parallel": true,
	"self": 60.192265424314655,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 92.94172612486193,
	"count": 232826,
	"is_parallel": true,
	"self": 92.94172612486193
	},
	"communicator.exchange": {
	"total": 1621.1989336478096,
	"count": 232826,
	"is_parallel": true,
	"self": 1621.1989336478096
	},
	"steps_from_proto": {
	"total": 149.53063680497024,
	"count": 232826,
	"is_parallel": true,
	"self": 49.40733984407092,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 100.12329696089932,
	"count": 465652,
	"is_parallel": true,
	"self": 100.12329696089932
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1668.573388679044,
	"count": 232827,
	"self": 12.794686383012504,
	"children": {
	"process_trajectory": {
	"total": 245.78478593403364,
	"count": 232827,
	"self": 244.5044351940342,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.2803507399994487,
	"count": 10,
	"self": 1.2803507399994487
	}
	}
	},
	"_update_policy": {
	"total": 1409.993916361998,
	"count": 97,
	"self": 448.68876859299826,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 961.3051477689996,
	"count": 2910,
	"self": 961.3051477689996
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.0559997463133186e-06,
	"count": 1,
	"self": 1.0559997463133186e-06
	},
	"TrainerController._save_models": {
	"total": 0.11846157499985566,
	"count": 1,
	"self": 0.0054400259996327804,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.11302154900022288,
	"count": 1,
	"self": 0.11302154900022288
	}
	}
	}
	}
	}
	}
	}