7.5 M steps model

094460a over 1 year ago

No virus

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.1323177814483643,
	"min": 2.0925920009613037,
	"max": 3.2958126068115234,
	"count": 750
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 41759.3125,
	"min": 18373.73046875,
	"max": 135839.296875,
	"count": 750
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 54.022222222222226,
	"min": 46.308411214953274,
	"max": 999.0,
	"count": 750
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19448.0,
	"min": 14020.0,
	"max": 28512.0,
	"count": 750
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1442.7982170999321,
	"min": 1175.8767981610217,
	"max": 1442.7982170999321,
	"count": 396
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 259703.6790779878,
	"min": 2353.2776183580017,
	"max": 298886.27177008847,
	"count": 396
	},
	"SoccerTwos.Step.mean": {
	"value": 7499947.0,
	"min": 9272.0,
	"max": 7499947.0,
	"count": 750
	},
	"SoccerTwos.Step.sum": {
	"value": 7499947.0,
	"min": 9272.0,
	"max": 7499947.0,
	"count": 750
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.08528007566928864,
	"min": -0.08902060240507126,
	"max": 0.16198357939720154,
	"count": 750
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 15.435693740844727,
	"min": -12.81896686553955,
	"max": 28.185142517089844,
	"count": 750
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.09465673565864563,
	"min": -0.08676186203956604,
	"max": 0.17199347913265228,
	"count": 750
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 17.132869720458984,
	"min": -12.493707656860352,
	"max": 29.926864624023438,
	"count": 750
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 750
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 750
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.1420729272273364,
	"min": -0.4910444418589274,
	"max": 0.4178551661557165,
	"count": 750
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 25.715199828147888,
	"min": -37.96260005235672,
	"max": 58.661200165748596,
	"count": 750
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.1420729272273364,
	"min": -0.4910444418589274,
	"max": 0.4178551661557165,
	"count": 750
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 25.715199828147888,
	"min": -37.96260005235672,
	"max": 58.661200165748596,
	"count": 750
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 750
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 750
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.013896760379429907,
	"min": 0.011988342530094087,
	"max": 0.02492272947759678,
	"count": 350
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.013896760379429907,
	"min": 0.011988342530094087,
	"max": 0.02492272947759678,
	"count": 350
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.0895109735429287,
	"min": 3.223394425451905e-09,
	"max": 0.09912891238927841,
	"count": 350
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.0895109735429287,
	"min": 3.223394425451905e-09,
	"max": 0.09912891238927841,
	"count": 350
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.09168058733145396,
	"min": 3.2062370462308347e-09,
	"max": 0.10227111205458642,
	"count": 350
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.09168058733145396,
	"min": 3.2062370462308347e-09,
	"max": 0.10227111205458642,
	"count": 350
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 350
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 350
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 350
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 350
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 350
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 350
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1675478238",
	"python_version": "3.8.16 (default, Jan 17 2023, 23:13:24) \n[GCC 11.2.0]",
	"command_line_arguments": "/home/keshan/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --force",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.8.1+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1675487805"
	},
	"total": 9567.807344482,
	"count": 1,
	"self": 0.5436419460002071,
	"children": {
	"run_training.setup": {
	"total": 0.008059904999981882,
	"count": 1,
	"self": 0.008059904999981882
	},
	"TrainerController.start_learning": {
	"total": 9567.255642631,
	"count": 1,
	"self": 9.405608434972237,
	"children": {
	"TrainerController._reset_env": {
	"total": 4.7793283859953135,
	"count": 38,
	"self": 4.7793283859953135
	},
	"TrainerController.advance": {
	"total": 9552.876563591033,
	"count": 494663,
	"self": 9.747094787091555,
	"children": {
	"env_step": {
	"total": 6833.046095705896,
	"count": 494663,
	"self": 4830.561937479352,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 1996.823041652454,
	"count": 494663,
	"self": 53.83810939587397,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 1942.9849322565801,
	"count": 963856,
	"self": 1942.9849322565801
	}
	}
	},
	"workers": {
	"total": 5.661116574089419,
	"count": 494663,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 9551.001550163073,
	"count": 494663,
	"is_parallel": true,
	"self": 5748.170677710079,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0019828280001092935,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007948390004912653,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0011879889996180282,
	"count": 8,
	"is_parallel": true,
	"self": 0.0011879889996180282
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.017460377000020344,
	"count": 1,
	"is_parallel": true,
	"self": 0.00040050200004770886,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0003641219999508394,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003641219999508394
	},
	"communicator.exchange": {
	"total": 0.015327287999980399,
	"count": 1,
	"is_parallel": true,
	"self": 0.015327287999980399
	},
	"steps_from_proto": {
	"total": 0.0013684650000413967,
	"count": 2,
	"is_parallel": true,
	"self": 0.0002882799999497365,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010801850000916602,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010801850000916602
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 3802.782827069997,
	"count": 494662,
	"is_parallel": true,
	"self": 235.98976235914142,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 168.4696023440173,
	"count": 494662,
	"is_parallel": true,
	"self": 168.4696023440173
	},
	"communicator.exchange": {
	"total": 2696.776058992281,
	"count": 494662,
	"is_parallel": true,
	"self": 2696.776058992281
	},
	"steps_from_proto": {
	"total": 701.5474033745572,
	"count": 989324,
	"is_parallel": true,
	"self": 150.17200161879146,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 551.3754017557658,
	"count": 3957296,
	"is_parallel": true,
	"self": 551.3754017557658
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.04804538299708838,
	"count": 74,
	"is_parallel": true,
	"self": 0.010409197989019958,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.037636185008068423,
	"count": 296,
	"is_parallel": true,
	"self": 0.037636185008068423
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 2710.0833730980457,
	"count": 494663,
	"self": 63.375225928014515,
	"children": {
	"process_trajectory": {
	"total": 710.8399682470418,
	"count": 494663,
	"self": 706.4292853540411,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 4.41068289300074,
	"count": 15,
	"self": 4.41068289300074
	}
	}
	},
	"_update_policy": {
	"total": 1935.8681789229895,
	"count": 350,
	"self": 816.7539139299679,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 1119.1142649930216,
	"count": 10500,
	"self": 1119.1142649930216
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 5.410001904238015e-07,
	"count": 1,
	"self": 5.410001904238015e-07
	},
	"TrainerController._save_models": {
	"total": 0.1941416779991414,
	"count": 1,
	"self": 0.005221129998972174,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.18892054800016922,
	"count": 1,
	"self": 0.18892054800016922
	}
	}
	}
	}
	}
	}
	}