gokuls's picture
End of training
561d147
{
"best_metric": 1.3073620796203613,
"best_model_checkpoint": "mobilebert_sa_pre-training-complete/checkpoint-300000",
"epoch": 41.98740377886634,
"global_step": 300000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.882544181393798e-05,
"loss": 1.6028,
"step": 7145
},
{
"epoch": 1.0,
"eval_accuracy": 0.6935334549025108,
"eval_loss": 1.4525387287139893,
"eval_runtime": 1.4716,
"eval_samples_per_second": 325.49,
"eval_steps_per_second": 10.193,
"step": 7145
},
{
"epoch": 2.0,
"learning_rate": 4.763421140380127e-05,
"loss": 1.5524,
"step": 14290
},
{
"epoch": 2.0,
"eval_accuracy": 0.6992782005371531,
"eval_loss": 1.437490463256836,
"eval_runtime": 1.5211,
"eval_samples_per_second": 314.9,
"eval_steps_per_second": 9.861,
"step": 14290
},
{
"epoch": 3.0,
"learning_rate": 4.6442980993664556e-05,
"loss": 1.5323,
"step": 21435
},
{
"epoch": 3.0,
"eval_accuracy": 0.6993441976976554,
"eval_loss": 1.4193694591522217,
"eval_runtime": 1.4759,
"eval_samples_per_second": 324.542,
"eval_steps_per_second": 10.163,
"step": 21435
},
{
"epoch": 4.0,
"learning_rate": 4.5251750583527844e-05,
"loss": 1.5191,
"step": 28580
},
{
"epoch": 4.0,
"eval_accuracy": 0.7026513032777716,
"eval_loss": 1.4109910726547241,
"eval_runtime": 1.4968,
"eval_samples_per_second": 320.019,
"eval_steps_per_second": 10.021,
"step": 28580
},
{
"epoch": 5.0,
"learning_rate": 4.406052017339113e-05,
"loss": 1.5025,
"step": 35725
},
{
"epoch": 5.0,
"eval_accuracy": 0.7013675690761931,
"eval_loss": 1.4167572259902954,
"eval_runtime": 1.4782,
"eval_samples_per_second": 324.039,
"eval_steps_per_second": 10.147,
"step": 35725
},
{
"epoch": 6.0,
"learning_rate": 4.286928976325442e-05,
"loss": 1.4902,
"step": 42870
},
{
"epoch": 6.0,
"eval_accuracy": 0.7011720396863318,
"eval_loss": 1.3931331634521484,
"eval_runtime": 1.4734,
"eval_samples_per_second": 325.107,
"eval_steps_per_second": 10.181,
"step": 42870
},
{
"epoch": 7.0,
"learning_rate": 4.167805935311771e-05,
"loss": 1.4813,
"step": 50015
},
{
"epoch": 7.0,
"eval_accuracy": 0.7056545531078995,
"eval_loss": 1.3738043308258057,
"eval_runtime": 1.4644,
"eval_samples_per_second": 327.106,
"eval_steps_per_second": 10.243,
"step": 50015
},
{
"epoch": 8.0,
"learning_rate": 4.0486828942981e-05,
"loss": 1.4751,
"step": 57160
},
{
"epoch": 8.0,
"eval_accuracy": 0.6995995407320283,
"eval_loss": 1.4237422943115234,
"eval_runtime": 1.459,
"eval_samples_per_second": 328.317,
"eval_steps_per_second": 10.281,
"step": 57160
},
{
"epoch": 9.0,
"learning_rate": 3.929559853284429e-05,
"loss": 1.4689,
"step": 64305
},
{
"epoch": 9.0,
"eval_accuracy": 0.704691011235955,
"eval_loss": 1.3969331979751587,
"eval_runtime": 1.6056,
"eval_samples_per_second": 298.322,
"eval_steps_per_second": 9.342,
"step": 64305
},
{
"epoch": 10.0,
"learning_rate": 3.8104368122707576e-05,
"loss": 1.4626,
"step": 71450
},
{
"epoch": 10.0,
"eval_accuracy": 0.7067709060449532,
"eval_loss": 1.391621470451355,
"eval_runtime": 1.4719,
"eval_samples_per_second": 325.421,
"eval_steps_per_second": 10.191,
"step": 71450
},
{
"epoch": 11.0,
"learning_rate": 3.691313771257086e-05,
"loss": 1.4566,
"step": 78595
},
{
"epoch": 11.0,
"eval_accuracy": 0.7071985535088711,
"eval_loss": 1.3686023950576782,
"eval_runtime": 1.4629,
"eval_samples_per_second": 327.432,
"eval_steps_per_second": 10.254,
"step": 78595
},
{
"epoch": 12.0,
"learning_rate": 3.572190730243415e-05,
"loss": 1.451,
"step": 85740
},
{
"epoch": 12.0,
"eval_accuracy": 0.7060222091689743,
"eval_loss": 1.3811498880386353,
"eval_runtime": 1.4641,
"eval_samples_per_second": 327.173,
"eval_steps_per_second": 10.246,
"step": 85740
},
{
"epoch": 13.0,
"learning_rate": 3.453067689229744e-05,
"loss": 1.4478,
"step": 92885
},
{
"epoch": 13.0,
"eval_accuracy": 0.7091579355840124,
"eval_loss": 1.3597520589828491,
"eval_runtime": 1.4632,
"eval_samples_per_second": 327.355,
"eval_steps_per_second": 10.251,
"step": 92885
},
{
"epoch": 14.0,
"learning_rate": 3.3339446482160726e-05,
"loss": 1.4441,
"step": 100030
},
{
"epoch": 14.0,
"eval_accuracy": 0.7054075191330094,
"eval_loss": 1.3789618015289307,
"eval_runtime": 1.4621,
"eval_samples_per_second": 327.608,
"eval_steps_per_second": 10.259,
"step": 100030
},
{
"epoch": 15.0,
"learning_rate": 3.214821607202401e-05,
"loss": 1.4379,
"step": 107175
},
{
"epoch": 15.0,
"eval_accuracy": 0.7065809145017066,
"eval_loss": 1.379388451576233,
"eval_runtime": 1.5875,
"eval_samples_per_second": 301.725,
"eval_steps_per_second": 9.449,
"step": 107175
},
{
"epoch": 16.0,
"learning_rate": 3.09569856618873e-05,
"loss": 1.4353,
"step": 114320
},
{
"epoch": 16.0,
"eval_accuracy": 0.710198236648509,
"eval_loss": 1.3609341382980347,
"eval_runtime": 1.4593,
"eval_samples_per_second": 328.244,
"eval_steps_per_second": 10.279,
"step": 114320
},
{
"epoch": 17.0,
"learning_rate": 2.976575525175058e-05,
"loss": 1.43,
"step": 121465
},
{
"epoch": 17.0,
"eval_accuracy": 0.7083252258512857,
"eval_loss": 1.3685261011123657,
"eval_runtime": 1.4875,
"eval_samples_per_second": 322.019,
"eval_steps_per_second": 10.084,
"step": 121465
},
{
"epoch": 18.0,
"learning_rate": 2.857452484161387e-05,
"loss": 1.4278,
"step": 128610
},
{
"epoch": 18.0,
"eval_accuracy": 0.7036037555518075,
"eval_loss": 1.3953258991241455,
"eval_runtime": 1.4616,
"eval_samples_per_second": 327.715,
"eval_steps_per_second": 10.262,
"step": 128610
},
{
"epoch": 19.0,
"learning_rate": 2.7383294431477156e-05,
"loss": 1.4219,
"step": 135755
},
{
"epoch": 19.0,
"eval_accuracy": 0.7085320020194088,
"eval_loss": 1.3756214380264282,
"eval_runtime": 1.4616,
"eval_samples_per_second": 327.73,
"eval_steps_per_second": 10.263,
"step": 135755
},
{
"epoch": 20.0,
"learning_rate": 2.6192064021340444e-05,
"loss": 1.4197,
"step": 142900
},
{
"epoch": 20.0,
"eval_accuracy": 0.7089573167311684,
"eval_loss": 1.3597127199172974,
"eval_runtime": 1.4718,
"eval_samples_per_second": 325.445,
"eval_steps_per_second": 10.191,
"step": 142900
},
{
"epoch": 21.0,
"learning_rate": 2.5000833611203735e-05,
"loss": 1.4169,
"step": 150045
},
{
"epoch": 21.0,
"eval_accuracy": 0.7060544426179265,
"eval_loss": 1.367296576499939,
"eval_runtime": 1.4625,
"eval_samples_per_second": 327.518,
"eval_steps_per_second": 10.256,
"step": 150045
},
{
"epoch": 22.0,
"learning_rate": 2.3809603201067022e-05,
"loss": 1.4146,
"step": 157190
},
{
"epoch": 22.0,
"eval_accuracy": 0.707288269036104,
"eval_loss": 1.3753403425216675,
"eval_runtime": 1.4573,
"eval_samples_per_second": 328.688,
"eval_steps_per_second": 10.293,
"step": 157190
},
{
"epoch": 23.0,
"learning_rate": 2.2618372790930313e-05,
"loss": 1.4109,
"step": 164335
},
{
"epoch": 23.0,
"eval_accuracy": 0.7081938623386121,
"eval_loss": 1.3696134090423584,
"eval_runtime": 1.4581,
"eval_samples_per_second": 328.502,
"eval_steps_per_second": 10.287,
"step": 164335
},
{
"epoch": 24.0,
"learning_rate": 2.14271423807936e-05,
"loss": 1.4073,
"step": 171480
},
{
"epoch": 24.0,
"eval_accuracy": 0.7092472511981956,
"eval_loss": 1.356264352798462,
"eval_runtime": 1.4561,
"eval_samples_per_second": 328.957,
"eval_steps_per_second": 10.301,
"step": 171480
},
{
"epoch": 25.0,
"learning_rate": 2.0235911970656888e-05,
"loss": 1.4054,
"step": 178625
},
{
"epoch": 25.0,
"eval_accuracy": 0.7103286516069584,
"eval_loss": 1.371171474456787,
"eval_runtime": 1.475,
"eval_samples_per_second": 324.736,
"eval_steps_per_second": 10.169,
"step": 178625
},
{
"epoch": 26.0,
"learning_rate": 1.9044681560520176e-05,
"loss": 1.402,
"step": 185770
},
{
"epoch": 26.0,
"eval_accuracy": 0.7112762628520339,
"eval_loss": 1.3528329133987427,
"eval_runtime": 1.467,
"eval_samples_per_second": 326.525,
"eval_steps_per_second": 10.225,
"step": 185770
},
{
"epoch": 27.0,
"learning_rate": 1.7853451150383463e-05,
"loss": 1.4001,
"step": 192915
},
{
"epoch": 27.0,
"eval_accuracy": 0.712307605886979,
"eval_loss": 1.336666226387024,
"eval_runtime": 1.4596,
"eval_samples_per_second": 328.179,
"eval_steps_per_second": 10.277,
"step": 192915
},
{
"epoch": 28.0,
"learning_rate": 1.666222074024675e-05,
"loss": 1.397,
"step": 200060
},
{
"epoch": 28.0,
"eval_accuracy": 0.7117655307810966,
"eval_loss": 1.3508223295211792,
"eval_runtime": 1.458,
"eval_samples_per_second": 328.539,
"eval_steps_per_second": 10.288,
"step": 200060
},
{
"epoch": 29.0,
"learning_rate": 1.5470990330110038e-05,
"loss": 1.3955,
"step": 207205
},
{
"epoch": 29.0,
"eval_accuracy": 0.7116529947185077,
"eval_loss": 1.3571882247924805,
"eval_runtime": 1.6349,
"eval_samples_per_second": 292.987,
"eval_steps_per_second": 9.175,
"step": 207205
},
{
"epoch": 30.0,
"learning_rate": 1.4279759919973326e-05,
"loss": 1.3937,
"step": 214350
},
{
"epoch": 30.0,
"eval_accuracy": 0.7095319458838688,
"eval_loss": 1.356575846672058,
"eval_runtime": 1.4657,
"eval_samples_per_second": 326.804,
"eval_steps_per_second": 10.234,
"step": 214350
},
{
"epoch": 31.0,
"learning_rate": 1.3088529509836615e-05,
"loss": 1.3901,
"step": 221495
},
{
"epoch": 31.0,
"eval_accuracy": 0.7116992819935238,
"eval_loss": 1.3515229225158691,
"eval_runtime": 1.461,
"eval_samples_per_second": 327.859,
"eval_steps_per_second": 10.267,
"step": 221495
},
{
"epoch": 32.0,
"learning_rate": 1.18972990996999e-05,
"loss": 1.3874,
"step": 228640
},
{
"epoch": 32.0,
"eval_accuracy": 0.7118393529493795,
"eval_loss": 1.3445274829864502,
"eval_runtime": 1.4728,
"eval_samples_per_second": 325.229,
"eval_steps_per_second": 10.185,
"step": 228640
},
{
"epoch": 33.0,
"learning_rate": 1.0706068689563188e-05,
"loss": 1.386,
"step": 235785
},
{
"epoch": 33.0,
"eval_accuracy": 0.7097090095131505,
"eval_loss": 1.361108660697937,
"eval_runtime": 1.4621,
"eval_samples_per_second": 327.607,
"eval_steps_per_second": 10.259,
"step": 235785
},
{
"epoch": 34.0,
"learning_rate": 9.514838279426476e-06,
"loss": 1.3833,
"step": 242930
},
{
"epoch": 34.0,
"eval_accuracy": 0.7086746246959827,
"eval_loss": 1.350243091583252,
"eval_runtime": 1.4812,
"eval_samples_per_second": 323.387,
"eval_steps_per_second": 10.127,
"step": 242930
},
{
"epoch": 35.0,
"learning_rate": 8.323607869289763e-06,
"loss": 1.3822,
"step": 250075
},
{
"epoch": 35.0,
"eval_accuracy": 0.7108018854610629,
"eval_loss": 1.3657063245773315,
"eval_runtime": 1.4712,
"eval_samples_per_second": 325.58,
"eval_steps_per_second": 10.196,
"step": 250075
},
{
"epoch": 36.0,
"learning_rate": 7.132377459153051e-06,
"loss": 1.3797,
"step": 257220
},
{
"epoch": 36.0,
"eval_accuracy": 0.7107789319595755,
"eval_loss": 1.3575541973114014,
"eval_runtime": 1.4667,
"eval_samples_per_second": 326.589,
"eval_steps_per_second": 10.227,
"step": 257220
},
{
"epoch": 37.0,
"learning_rate": 5.941147049016339e-06,
"loss": 1.3793,
"step": 264365
},
{
"epoch": 37.0,
"eval_accuracy": 0.710604865960802,
"eval_loss": 1.3471879959106445,
"eval_runtime": 1.4747,
"eval_samples_per_second": 324.802,
"eval_steps_per_second": 10.171,
"step": 264365
},
{
"epoch": 38.0,
"learning_rate": 4.749916638879627e-06,
"loss": 1.3763,
"step": 271510
},
{
"epoch": 38.0,
"eval_accuracy": 0.7155870445344129,
"eval_loss": 1.3322880268096924,
"eval_runtime": 1.4923,
"eval_samples_per_second": 320.979,
"eval_steps_per_second": 10.052,
"step": 271510
},
{
"epoch": 39.0,
"learning_rate": 3.5586862287429143e-06,
"loss": 1.3762,
"step": 278655
},
{
"epoch": 39.0,
"eval_accuracy": 0.7144579664629017,
"eval_loss": 1.3325406312942505,
"eval_runtime": 1.6301,
"eval_samples_per_second": 293.852,
"eval_steps_per_second": 9.202,
"step": 278655
},
{
"epoch": 40.0,
"learning_rate": 2.3674558186062022e-06,
"loss": 1.3748,
"step": 285800
},
{
"epoch": 40.0,
"eval_accuracy": 0.7138002117109589,
"eval_loss": 1.3242748975753784,
"eval_runtime": 1.4707,
"eval_samples_per_second": 325.685,
"eval_steps_per_second": 10.199,
"step": 285800
},
{
"epoch": 41.0,
"learning_rate": 1.17622540846949e-06,
"loss": 1.3733,
"step": 292945
},
{
"epoch": 41.0,
"eval_accuracy": 0.7170023313951855,
"eval_loss": 1.3217717409133911,
"eval_runtime": 1.459,
"eval_samples_per_second": 328.301,
"eval_steps_per_second": 10.281,
"step": 292945
},
{
"epoch": 41.99,
"learning_rate": 0.0,
"loss": 1.3722,
"step": 300000
},
{
"epoch": 41.99,
"eval_accuracy": 0.7186174960946218,
"eval_loss": 1.3073620796203613,
"eval_runtime": 1.4662,
"eval_samples_per_second": 326.688,
"eval_steps_per_second": 10.23,
"step": 300000
},
{
"epoch": 41.99,
"step": 300000,
"total_flos": 9.562938924439962e+17,
"train_loss": 1.4300982942708333,
"train_runtime": 103608.4476,
"train_samples_per_second": 92.657,
"train_steps_per_second": 2.896
}
],
"max_steps": 300000,
"num_train_epochs": 42,
"total_flos": 9.562938924439962e+17,
"trial_name": null,
"trial_params": null
}