First model using PPO

5a12aaf over 2 years ago

18.7 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.22929097712039948,
	"min": 0.22147920727729797,
	"max": 1.4221351146697998,
	"count": 50
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 6838.3740234375,
	"min": 6690.44384765625,
	"max": 43141.890625,
	"count": 50
	},
	"Pyramids.Step.mean": {
	"value": 1499980.0,
	"min": 29952.0,
	"max": 1499980.0,
	"count": 50
	},
	"Pyramids.Step.sum": {
	"value": 1499980.0,
	"min": 29952.0,
	"max": 1499980.0,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.7262492179870605,
	"min": -0.12259096652269363,
	"max": 0.7778822183609009,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 212.791015625,
	"min": -29.544422149658203,
	"max": 229.47525024414062,
	"count": 50
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.017438266426324844,
	"min": -0.03241197392344475,
	"max": 0.3020504117012024,
	"count": 50
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 5.10941219329834,
	"min": -8.62158489227295,
	"max": 71.58594512939453,
	"count": 50
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06806238504941575,
	"min": 0.06455917499456633,
	"max": 0.0738861463420714,
	"count": 50
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.9528733906918205,
	"min": 0.4872314462469337,
	"max": 1.0771813474888867,
	"count": 50
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.014367812083773536,
	"min": 9.688859022965231e-05,
	"max": 0.01645606311822554,
	"count": 50
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.2011493691728295,
	"min": 0.0013564402632151324,
	"max": 0.24551326486135963,
	"count": 50
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 3.1416703813809517e-06,
	"min": 3.1416703813809517e-06,
	"max": 0.00029676708679192377,
	"count": 50
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 4.398338533933333e-05,
	"min": 4.398338533933333e-05,
	"max": 0.0036545653818115995,
	"count": 50
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10104719047619048,
	"min": 0.10104719047619048,
	"max": 0.19892236190476195,
	"count": 50
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4146606666666668,
	"min": 1.3794090666666667,
	"max": 2.6181884000000006,
	"count": 50
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.00011461432857142858,
	"min": 0.00011461432857142858,
	"max": 0.009892343954285714,
	"count": 50
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0016046006,
	"min": 0.0016046006,
	"max": 0.12183702115999999,
	"count": 50
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.010168899782001972,
	"min": 0.010024704039096832,
	"max": 0.45344170928001404,
	"count": 50
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.14236459136009216,
	"min": 0.14034585654735565,
	"max": 3.1740920543670654,
	"count": 50
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 251.75,
	"min": 228.50393700787401,
	"max": 999.0,
	"count": 50
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 30210.0,
	"min": 15984.0,
	"max": 32781.0,
	"count": 50
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.7494297351472634,
	"min": -1.0000000521540642,
	"max": 1.770267701759113,
	"count": 50
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 211.68099795281887,
	"min": -32.000001668930054,
	"max": 224.82399812340736,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.7494297351472634,
	"min": -1.0000000521540642,
	"max": 1.770267701759113,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 211.68099795281887,
	"min": -32.000001668930054,
	"max": 224.82399812340736,
	"count": 50
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.02620454387095168,
	"min": 0.024720477220198087,
	"max": 9.579654056578875,
	"count": 50
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 3.1707498083851533,
	"min": 3.0282883491308894,
	"max": 153.274464905262,
	"count": 50
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1685866891",
	"python_version": "3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=PyramidsRND --no-graphics",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1685870442"
	},
	"total": 3550.9206053549997,
	"count": 1,
	"self": 0.5753164249999827,
	"children": {
	"run_training.setup": {
	"total": 0.042803211999853374,
	"count": 1,
	"self": 0.042803211999853374
	},
	"TrainerController.start_learning": {
	"total": 3550.302485718,
	"count": 1,
	"self": 2.2334032709973144,
	"children": {
	"TrainerController._reset_env": {
	"total": 3.6784017749996565,
	"count": 1,
	"self": 3.6784017749996565
	},
	"TrainerController.advance": {
	"total": 3544.291227039002,
	"count": 96710,
	"self": 2.2578433160128952,
	"children": {
	"env_step": {
	"total": 2607.718581934908,
	"count": 96710,
	"self": 2435.4058637560693,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 170.97957328087705,
	"count": 96710,
	"self": 7.344103831932443,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 163.6354694489446,
	"count": 93814,
	"self": 163.6354694489446
	}
	}
	},
	"workers": {
	"total": 1.3331448979615743,
	"count": 96710,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 3542.1563782999174,
	"count": 96710,
	"is_parallel": true,
	"self": 1285.950373642822,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0018310929999643122,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005710499990527751,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.001260043000911537,
	"count": 8,
	"is_parallel": true,
	"self": 0.001260043000911537
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.04741607699997985,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005924960005359026,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0005117929999869375,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005117929999869375
	},
	"communicator.exchange": {
	"total": 0.044369594999807305,
	"count": 1,
	"is_parallel": true,
	"self": 0.044369594999807305
	},
	"steps_from_proto": {
	"total": 0.0019421929996497056,
	"count": 1,
	"is_parallel": true,
	"self": 0.00039331900097749894,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0015488739986722067,
	"count": 8,
	"is_parallel": true,
	"self": 0.0015488739986722067
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2256.2060046570955,
	"count": 96709,
	"is_parallel": true,
	"self": 50.8518132531608,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 36.58727748590809,
	"count": 96709,
	"is_parallel": true,
	"self": 36.58727748590809
	},
	"communicator.exchange": {
	"total": 2006.9956509900571,
	"count": 96709,
	"is_parallel": true,
	"self": 2006.9956509900571
	},
	"steps_from_proto": {
	"total": 161.77126292796947,
	"count": 96709,
	"is_parallel": true,
	"self": 33.15267456284437,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 128.6185883651251,
	"count": 773672,
	"is_parallel": true,
	"self": 128.6185883651251
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 934.3148017880812,
	"count": 96710,
	"self": 4.4016466042608045,
	"children": {
	"process_trajectory": {
	"total": 170.44975956981625,
	"count": 96710,
	"self": 170.0590800878158,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.39067948200045066,
	"count": 3,
	"self": 0.39067948200045066
	}
	}
	},
	"_update_policy": {
	"total": 759.4633956140042,
	"count": 683,
	"self": 485.4348679558998,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 274.0285276581044,
	"count": 34194,
	"self": 274.0285276581044
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.0680005289032124e-06,
	"count": 1,
	"self": 1.0680005289032124e-06
	},
	"TrainerController._save_models": {
	"total": 0.09945256500031974,
	"count": 1,
	"self": 0.001604840000254626,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.09784772500006511,
	"count": 1,
	"self": 0.09784772500006511
	}
	}
	}
	}
	}
	}
	}