{ "best_metric": 1.3073620796203613, "best_model_checkpoint": "mobilebert_sa_pre-training-complete/checkpoint-300000", "epoch": 41.98740377886634, "global_step": 300000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.882544181393798e-05, "loss": 1.6028, "step": 7145 }, { "epoch": 1.0, "eval_accuracy": 0.6935334549025108, "eval_loss": 1.4525387287139893, "eval_runtime": 1.4716, "eval_samples_per_second": 325.49, "eval_steps_per_second": 10.193, "step": 7145 }, { "epoch": 2.0, "learning_rate": 4.763421140380127e-05, "loss": 1.5524, "step": 14290 }, { "epoch": 2.0, "eval_accuracy": 0.6992782005371531, "eval_loss": 1.437490463256836, "eval_runtime": 1.5211, "eval_samples_per_second": 314.9, "eval_steps_per_second": 9.861, "step": 14290 }, { "epoch": 3.0, "learning_rate": 4.6442980993664556e-05, "loss": 1.5323, "step": 21435 }, { "epoch": 3.0, "eval_accuracy": 0.6993441976976554, "eval_loss": 1.4193694591522217, "eval_runtime": 1.4759, "eval_samples_per_second": 324.542, "eval_steps_per_second": 10.163, "step": 21435 }, { "epoch": 4.0, "learning_rate": 4.5251750583527844e-05, "loss": 1.5191, "step": 28580 }, { "epoch": 4.0, "eval_accuracy": 0.7026513032777716, "eval_loss": 1.4109910726547241, "eval_runtime": 1.4968, "eval_samples_per_second": 320.019, "eval_steps_per_second": 10.021, "step": 28580 }, { "epoch": 5.0, "learning_rate": 4.406052017339113e-05, "loss": 1.5025, "step": 35725 }, { "epoch": 5.0, "eval_accuracy": 0.7013675690761931, "eval_loss": 1.4167572259902954, "eval_runtime": 1.4782, "eval_samples_per_second": 324.039, "eval_steps_per_second": 10.147, "step": 35725 }, { "epoch": 6.0, "learning_rate": 4.286928976325442e-05, "loss": 1.4902, "step": 42870 }, { "epoch": 6.0, "eval_accuracy": 0.7011720396863318, "eval_loss": 1.3931331634521484, "eval_runtime": 1.4734, "eval_samples_per_second": 325.107, "eval_steps_per_second": 10.181, "step": 42870 }, { "epoch": 7.0, "learning_rate": 4.167805935311771e-05, "loss": 1.4813, "step": 50015 }, { "epoch": 7.0, "eval_accuracy": 0.7056545531078995, "eval_loss": 1.3738043308258057, "eval_runtime": 1.4644, "eval_samples_per_second": 327.106, "eval_steps_per_second": 10.243, "step": 50015 }, { "epoch": 8.0, "learning_rate": 4.0486828942981e-05, "loss": 1.4751, "step": 57160 }, { "epoch": 8.0, "eval_accuracy": 0.6995995407320283, "eval_loss": 1.4237422943115234, "eval_runtime": 1.459, "eval_samples_per_second": 328.317, "eval_steps_per_second": 10.281, "step": 57160 }, { "epoch": 9.0, "learning_rate": 3.929559853284429e-05, "loss": 1.4689, "step": 64305 }, { "epoch": 9.0, "eval_accuracy": 0.704691011235955, "eval_loss": 1.3969331979751587, "eval_runtime": 1.6056, "eval_samples_per_second": 298.322, "eval_steps_per_second": 9.342, "step": 64305 }, { "epoch": 10.0, "learning_rate": 3.8104368122707576e-05, "loss": 1.4626, "step": 71450 }, { "epoch": 10.0, "eval_accuracy": 0.7067709060449532, "eval_loss": 1.391621470451355, "eval_runtime": 1.4719, "eval_samples_per_second": 325.421, "eval_steps_per_second": 10.191, "step": 71450 }, { "epoch": 11.0, "learning_rate": 3.691313771257086e-05, "loss": 1.4566, "step": 78595 }, { "epoch": 11.0, "eval_accuracy": 0.7071985535088711, "eval_loss": 1.3686023950576782, "eval_runtime": 1.4629, "eval_samples_per_second": 327.432, "eval_steps_per_second": 10.254, "step": 78595 }, { "epoch": 12.0, "learning_rate": 3.572190730243415e-05, "loss": 1.451, "step": 85740 }, { "epoch": 12.0, "eval_accuracy": 0.7060222091689743, "eval_loss": 1.3811498880386353, "eval_runtime": 1.4641, "eval_samples_per_second": 327.173, "eval_steps_per_second": 10.246, "step": 85740 }, { "epoch": 13.0, "learning_rate": 3.453067689229744e-05, "loss": 1.4478, "step": 92885 }, { "epoch": 13.0, "eval_accuracy": 0.7091579355840124, "eval_loss": 1.3597520589828491, "eval_runtime": 1.4632, "eval_samples_per_second": 327.355, "eval_steps_per_second": 10.251, "step": 92885 }, { "epoch": 14.0, "learning_rate": 3.3339446482160726e-05, "loss": 1.4441, "step": 100030 }, { "epoch": 14.0, "eval_accuracy": 0.7054075191330094, "eval_loss": 1.3789618015289307, "eval_runtime": 1.4621, "eval_samples_per_second": 327.608, "eval_steps_per_second": 10.259, "step": 100030 }, { "epoch": 15.0, "learning_rate": 3.214821607202401e-05, "loss": 1.4379, "step": 107175 }, { "epoch": 15.0, "eval_accuracy": 0.7065809145017066, "eval_loss": 1.379388451576233, "eval_runtime": 1.5875, "eval_samples_per_second": 301.725, "eval_steps_per_second": 9.449, "step": 107175 }, { "epoch": 16.0, "learning_rate": 3.09569856618873e-05, "loss": 1.4353, "step": 114320 }, { "epoch": 16.0, "eval_accuracy": 0.710198236648509, "eval_loss": 1.3609341382980347, "eval_runtime": 1.4593, "eval_samples_per_second": 328.244, "eval_steps_per_second": 10.279, "step": 114320 }, { "epoch": 17.0, "learning_rate": 2.976575525175058e-05, "loss": 1.43, "step": 121465 }, { "epoch": 17.0, "eval_accuracy": 0.7083252258512857, "eval_loss": 1.3685261011123657, "eval_runtime": 1.4875, "eval_samples_per_second": 322.019, "eval_steps_per_second": 10.084, "step": 121465 }, { "epoch": 18.0, "learning_rate": 2.857452484161387e-05, "loss": 1.4278, "step": 128610 }, { "epoch": 18.0, "eval_accuracy": 0.7036037555518075, "eval_loss": 1.3953258991241455, "eval_runtime": 1.4616, "eval_samples_per_second": 327.715, "eval_steps_per_second": 10.262, "step": 128610 }, { "epoch": 19.0, "learning_rate": 2.7383294431477156e-05, "loss": 1.4219, "step": 135755 }, { "epoch": 19.0, "eval_accuracy": 0.7085320020194088, "eval_loss": 1.3756214380264282, "eval_runtime": 1.4616, "eval_samples_per_second": 327.73, "eval_steps_per_second": 10.263, "step": 135755 }, { "epoch": 20.0, "learning_rate": 2.6192064021340444e-05, "loss": 1.4197, "step": 142900 }, { "epoch": 20.0, "eval_accuracy": 0.7089573167311684, "eval_loss": 1.3597127199172974, "eval_runtime": 1.4718, "eval_samples_per_second": 325.445, "eval_steps_per_second": 10.191, "step": 142900 }, { "epoch": 21.0, "learning_rate": 2.5000833611203735e-05, "loss": 1.4169, "step": 150045 }, { "epoch": 21.0, "eval_accuracy": 0.7060544426179265, "eval_loss": 1.367296576499939, "eval_runtime": 1.4625, "eval_samples_per_second": 327.518, "eval_steps_per_second": 10.256, "step": 150045 }, { "epoch": 22.0, "learning_rate": 2.3809603201067022e-05, "loss": 1.4146, "step": 157190 }, { "epoch": 22.0, "eval_accuracy": 0.707288269036104, "eval_loss": 1.3753403425216675, "eval_runtime": 1.4573, "eval_samples_per_second": 328.688, "eval_steps_per_second": 10.293, "step": 157190 }, { "epoch": 23.0, "learning_rate": 2.2618372790930313e-05, "loss": 1.4109, "step": 164335 }, { "epoch": 23.0, "eval_accuracy": 0.7081938623386121, "eval_loss": 1.3696134090423584, "eval_runtime": 1.4581, "eval_samples_per_second": 328.502, "eval_steps_per_second": 10.287, "step": 164335 }, { "epoch": 24.0, "learning_rate": 2.14271423807936e-05, "loss": 1.4073, "step": 171480 }, { "epoch": 24.0, "eval_accuracy": 0.7092472511981956, "eval_loss": 1.356264352798462, "eval_runtime": 1.4561, "eval_samples_per_second": 328.957, "eval_steps_per_second": 10.301, "step": 171480 }, { "epoch": 25.0, "learning_rate": 2.0235911970656888e-05, "loss": 1.4054, "step": 178625 }, { "epoch": 25.0, "eval_accuracy": 0.7103286516069584, "eval_loss": 1.371171474456787, "eval_runtime": 1.475, "eval_samples_per_second": 324.736, "eval_steps_per_second": 10.169, "step": 178625 }, { "epoch": 26.0, "learning_rate": 1.9044681560520176e-05, "loss": 1.402, "step": 185770 }, { "epoch": 26.0, "eval_accuracy": 0.7112762628520339, "eval_loss": 1.3528329133987427, "eval_runtime": 1.467, "eval_samples_per_second": 326.525, "eval_steps_per_second": 10.225, "step": 185770 }, { "epoch": 27.0, "learning_rate": 1.7853451150383463e-05, "loss": 1.4001, "step": 192915 }, { "epoch": 27.0, "eval_accuracy": 0.712307605886979, "eval_loss": 1.336666226387024, "eval_runtime": 1.4596, "eval_samples_per_second": 328.179, "eval_steps_per_second": 10.277, "step": 192915 }, { "epoch": 28.0, "learning_rate": 1.666222074024675e-05, "loss": 1.397, "step": 200060 }, { "epoch": 28.0, "eval_accuracy": 0.7117655307810966, "eval_loss": 1.3508223295211792, "eval_runtime": 1.458, "eval_samples_per_second": 328.539, "eval_steps_per_second": 10.288, "step": 200060 }, { "epoch": 29.0, "learning_rate": 1.5470990330110038e-05, "loss": 1.3955, "step": 207205 }, { "epoch": 29.0, "eval_accuracy": 0.7116529947185077, "eval_loss": 1.3571882247924805, "eval_runtime": 1.6349, "eval_samples_per_second": 292.987, "eval_steps_per_second": 9.175, "step": 207205 }, { "epoch": 30.0, "learning_rate": 1.4279759919973326e-05, "loss": 1.3937, "step": 214350 }, { "epoch": 30.0, "eval_accuracy": 0.7095319458838688, "eval_loss": 1.356575846672058, "eval_runtime": 1.4657, "eval_samples_per_second": 326.804, "eval_steps_per_second": 10.234, "step": 214350 }, { "epoch": 31.0, "learning_rate": 1.3088529509836615e-05, "loss": 1.3901, "step": 221495 }, { "epoch": 31.0, "eval_accuracy": 0.7116992819935238, "eval_loss": 1.3515229225158691, "eval_runtime": 1.461, "eval_samples_per_second": 327.859, "eval_steps_per_second": 10.267, "step": 221495 }, { "epoch": 32.0, "learning_rate": 1.18972990996999e-05, "loss": 1.3874, "step": 228640 }, { "epoch": 32.0, "eval_accuracy": 0.7118393529493795, "eval_loss": 1.3445274829864502, "eval_runtime": 1.4728, "eval_samples_per_second": 325.229, "eval_steps_per_second": 10.185, "step": 228640 }, { "epoch": 33.0, "learning_rate": 1.0706068689563188e-05, "loss": 1.386, "step": 235785 }, { "epoch": 33.0, "eval_accuracy": 0.7097090095131505, "eval_loss": 1.361108660697937, "eval_runtime": 1.4621, "eval_samples_per_second": 327.607, "eval_steps_per_second": 10.259, "step": 235785 }, { "epoch": 34.0, "learning_rate": 9.514838279426476e-06, "loss": 1.3833, "step": 242930 }, { "epoch": 34.0, "eval_accuracy": 0.7086746246959827, "eval_loss": 1.350243091583252, "eval_runtime": 1.4812, "eval_samples_per_second": 323.387, "eval_steps_per_second": 10.127, "step": 242930 }, { "epoch": 35.0, "learning_rate": 8.323607869289763e-06, "loss": 1.3822, "step": 250075 }, { "epoch": 35.0, "eval_accuracy": 0.7108018854610629, "eval_loss": 1.3657063245773315, "eval_runtime": 1.4712, "eval_samples_per_second": 325.58, "eval_steps_per_second": 10.196, "step": 250075 }, { "epoch": 36.0, "learning_rate": 7.132377459153051e-06, "loss": 1.3797, "step": 257220 }, { "epoch": 36.0, "eval_accuracy": 0.7107789319595755, "eval_loss": 1.3575541973114014, "eval_runtime": 1.4667, "eval_samples_per_second": 326.589, "eval_steps_per_second": 10.227, "step": 257220 }, { "epoch": 37.0, "learning_rate": 5.941147049016339e-06, "loss": 1.3793, "step": 264365 }, { "epoch": 37.0, "eval_accuracy": 0.710604865960802, "eval_loss": 1.3471879959106445, "eval_runtime": 1.4747, "eval_samples_per_second": 324.802, "eval_steps_per_second": 10.171, "step": 264365 }, { "epoch": 38.0, "learning_rate": 4.749916638879627e-06, "loss": 1.3763, "step": 271510 }, { "epoch": 38.0, "eval_accuracy": 0.7155870445344129, "eval_loss": 1.3322880268096924, "eval_runtime": 1.4923, "eval_samples_per_second": 320.979, "eval_steps_per_second": 10.052, "step": 271510 }, { "epoch": 39.0, "learning_rate": 3.5586862287429143e-06, "loss": 1.3762, "step": 278655 }, { "epoch": 39.0, "eval_accuracy": 0.7144579664629017, "eval_loss": 1.3325406312942505, "eval_runtime": 1.6301, "eval_samples_per_second": 293.852, "eval_steps_per_second": 9.202, "step": 278655 }, { "epoch": 40.0, "learning_rate": 2.3674558186062022e-06, "loss": 1.3748, "step": 285800 }, { "epoch": 40.0, "eval_accuracy": 0.7138002117109589, "eval_loss": 1.3242748975753784, "eval_runtime": 1.4707, "eval_samples_per_second": 325.685, "eval_steps_per_second": 10.199, "step": 285800 }, { "epoch": 41.0, "learning_rate": 1.17622540846949e-06, "loss": 1.3733, "step": 292945 }, { "epoch": 41.0, "eval_accuracy": 0.7170023313951855, "eval_loss": 1.3217717409133911, "eval_runtime": 1.459, "eval_samples_per_second": 328.301, "eval_steps_per_second": 10.281, "step": 292945 }, { "epoch": 41.99, "learning_rate": 0.0, "loss": 1.3722, "step": 300000 }, { "epoch": 41.99, "eval_accuracy": 0.7186174960946218, "eval_loss": 1.3073620796203613, "eval_runtime": 1.4662, "eval_samples_per_second": 326.688, "eval_steps_per_second": 10.23, "step": 300000 }, { "epoch": 41.99, "step": 300000, "total_flos": 9.562938924439962e+17, "train_loss": 1.4300982942708333, "train_runtime": 103608.4476, "train_samples_per_second": 92.657, "train_steps_per_second": 2.896 } ], "max_steps": 300000, "num_train_epochs": 42, "total_flos": 9.562938924439962e+17, "trial_name": null, "trial_params": null }