dfm794's picture
2x l2 res lin
25407e7
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 0.657804012298584,
"min": 0.6439549922943115,
"max": 0.9220815896987915,
"count": 4000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 13324.4775390625,
"min": 9013.736328125,
"max": 31204.041015625,
"count": 4000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 70.84057971014492,
"min": 37.41984732824427,
"max": 118.0952380952381,
"count": 4000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19552.0,
"min": 16444.0,
"max": 22664.0,
"count": 4000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1785.7870992616793,
"min": 1665.5704364111905,
"max": 1848.444050226308,
"count": 4000
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 246438.61969811172,
"min": 149123.18746302405,
"max": 451733.0875595351,
"count": 4000
},
"SoccerTwos.Step.mean": {
"value": 99999950.0,
"min": 60009926.0,
"max": 99999950.0,
"count": 4000
},
"SoccerTwos.Step.sum": {
"value": 99999950.0,
"min": 60009926.0,
"max": 99999950.0,
"count": 4000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.02403123490512371,
"min": -0.12348798662424088,
"max": 0.05139973759651184,
"count": 4000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -3.316310405731201,
"min": -20.992958068847656,
"max": 7.915559768676758,
"count": 4000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.023414287716150284,
"min": -0.12312929332256317,
"max": 0.05094476044178009,
"count": 4000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -3.2311716079711914,
"min": -20.93198013305664,
"max": 7.845493316650391,
"count": 4000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 4000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 4000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.04548695899438167,
"min": -0.4755757586522536,
"max": 0.3431596885355868,
"count": 4000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -6.27720034122467,
"min": -62.77600014209747,
"max": 51.101200103759766,
"count": 4000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.04548695899438167,
"min": -0.4755757586522536,
"max": 0.3431596885355868,
"count": 4000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -6.27720034122467,
"min": -62.77600014209747,
"max": 51.101200103759766,
"count": 4000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 4000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 4000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.012369273196478995,
"min": 0.00639950543481973,
"max": 0.018580751745806385,
"count": 973
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.012369273196478995,
"min": 0.00639950543481973,
"max": 0.018580751745806385,
"count": 973
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.09552693540851274,
"min": 0.07807423248887062,
"max": 0.11450178325176238,
"count": 973
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.09552693540851274,
"min": 0.07807423248887062,
"max": 0.11450178325176238,
"count": 973
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.09561899180213611,
"min": 0.07871665557225545,
"max": 0.11573187485337258,
"count": 973
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.09561899180213611,
"min": 0.07871665557225545,
"max": 0.11573187485337258,
"count": 973
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 1.0323999147947604e-08,
"min": 1.0323999147947604e-08,
"max": 0.00047950611604116196,
"count": 973
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 1.0323999147947604e-08,
"min": 1.0323999147947604e-08,
"max": 0.00047950611604116196,
"count": 973
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.100000852,
"min": 0.100000852,
"max": 0.13995883800000003,
"count": 973
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.100000852,
"min": 0.100000852,
"max": 0.13995883800000003,
"count": 973
},
"SoccerTwos.Policy.Beta.mean": {
"value": 1.0042514799999778e-05,
"min": 1.0042514799999778e-05,
"max": 0.0020039460162,
"count": 973
},
"SoccerTwos.Policy.Beta.sum": {
"value": 1.0042514799999778e-05,
"min": 1.0042514799999778e-05,
"max": 0.0020039460162,
"count": 973
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1676307162",
"python_version": "3.9.16 (main, Jan 11 2023, 16:05:54) \n[GCC 11.2.0]",
"command_line_arguments": "/home/dfm/anaconda3/envs/hf-rl-scr2s/bin/mlagents-learn ./config/poca/SoccerTwos-2x-12-resnet-l2-linear.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --results-dir results-2x-12-resnet-l2-linear --num-envs=7 --resume --run-id=SoccerTwos",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1676346654"
},
"total": 39491.56532316399,
"count": 1,
"self": 0.37074283976107836,
"children": {
"run_training.setup": {
"total": 0.02767869713716209,
"count": 1,
"self": 0.02767869713716209
},
"TrainerController.start_learning": {
"total": 39491.16690162709,
"count": 1,
"self": 33.59568607318215,
"children": {
"TrainerController._reset_env": {
"total": 27.265360637335107,
"count": 201,
"self": 27.265360637335107
},
"TrainerController.advance": {
"total": 39430.04803913669,
"count": 930879,
"self": 27.246289231115952,
"children": {
"env_step": {
"total": 20429.227719880873,
"count": 930879,
"self": 7700.056544515071,
"children": {
"SubprocessEnvManager._take_step": {
"total": 12695.632731482387,
"count": 2814983,
"self": 386.68545385845937,
"children": {
"TorchPolicy.evaluate": {
"total": 12308.947277623927,
"count": 5174268,
"self": 12308.947277623927
}
}
},
"workers": {
"total": 33.53844388341531,
"count": 930879,
"self": 0.0,
"children": {
"worker_root": {
"total": 276178.97632250306,
"count": 2814280,
"is_parallel": true,
"self": 212880.03933615354,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.003266595071181655,
"count": 2,
"is_parallel": true,
"self": 0.0009026317857205868,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002363963285461068,
"count": 8,
"is_parallel": true,
"self": 0.002363963285461068
}
}
},
"UnityEnvironment.step": {
"total": 0.04986436199396849,
"count": 1,
"is_parallel": true,
"self": 0.001236666925251484,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0010303319431841373,
"count": 1,
"is_parallel": true,
"self": 0.0010303319431841373
},
"communicator.exchange": {
"total": 0.0449853150639683,
"count": 1,
"is_parallel": true,
"self": 0.0449853150639683
},
"steps_from_proto": {
"total": 0.0026120480615645647,
"count": 2,
"is_parallel": true,
"self": 0.0004060741048306227,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002205973956733942,
"count": 8,
"is_parallel": true,
"self": 0.002205973956733942
}
}
}
}
}
}
},
"steps_from_proto": {
"total": 5.926313872681931,
"count": 2800,
"is_parallel": true,
"self": 0.8897827882319689,
"children": {
"_process_rank_one_or_two_observation": {
"total": 5.036531084449962,
"count": 11200,
"is_parallel": true,
"self": 5.036531084449962
}
}
},
"UnityEnvironment.step": {
"total": 63293.01067247684,
"count": 2814279,
"is_parallel": true,
"self": 5000.369204876246,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 2897.470194905065,
"count": 2814279,
"is_parallel": true,
"self": 2897.470194905065
},
"communicator.exchange": {
"total": 42666.08549953322,
"count": 2814279,
"is_parallel": true,
"self": 42666.08549953322
},
"steps_from_proto": {
"total": 12729.085773162311,
"count": 5628558,
"is_parallel": true,
"self": 1811.1029579455499,
"children": {
"_process_rank_one_or_two_observation": {
"total": 10917.982815216761,
"count": 22514232,
"is_parallel": true,
"self": 10917.982815216761
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 18973.574030024698,
"count": 930879,
"self": 424.5758848800324,
"children": {
"process_trajectory": {
"total": 7482.0648319923785,
"count": 930879,
"self": 7462.430964581668,
"children": {
"RLTrainer._checkpoint": {
"total": 19.63386741071008,
"count": 80,
"self": 19.63386741071008
}
}
},
"_update_policy": {
"total": 11066.933313152287,
"count": 973,
"self": 7924.29891131958,
"children": {
"TorchPOCAOptimizer.update": {
"total": 3142.634401832707,
"count": 29190,
"self": 3142.634401832707
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.0898802429437637e-06,
"count": 1,
"self": 1.0898802429437637e-06
},
"TrainerController._save_models": {
"total": 0.2578146900050342,
"count": 1,
"self": 0.0019481771159917116,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2558665128890425,
"count": 1,
"self": 0.2558665128890425
}
}
}
}
}
}
}