roberta-large-movie-genre / trainer_state.json
Shiro's picture
Upload 13 files
514d950
raw
history blame contribute delete
No virus
62.3 kB
{
"best_metric": 0.7719072164948454,
"best_model_checkpoint": "roberta-large-movies/checkpoint-72500",
"epoch": 30.0,
"global_step": 83910,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"learning_rate": 4.970504111548088e-05,
"loss": 1.7698,
"step": 500
},
{
"epoch": 0.18,
"eval_accuracy": 0.6738421395955643,
"eval_loss": 1.6167851686477661,
"eval_runtime": 0.8246,
"eval_samples_per_second": 606.37,
"eval_steps_per_second": 38.808,
"step": 500
},
{
"epoch": 0.36,
"learning_rate": 4.94082946013586e-05,
"loss": 1.7761,
"step": 1000
},
{
"epoch": 0.36,
"eval_accuracy": 0.6829508196721311,
"eval_loss": 1.6522468328475952,
"eval_runtime": 0.7873,
"eval_samples_per_second": 635.049,
"eval_steps_per_second": 40.643,
"step": 1000
},
{
"epoch": 0.54,
"learning_rate": 4.9110356334167565e-05,
"loss": 1.7626,
"step": 1500
},
{
"epoch": 0.54,
"eval_accuracy": 0.6660117878192534,
"eval_loss": 1.6534239053726196,
"eval_runtime": 0.7869,
"eval_samples_per_second": 635.425,
"eval_steps_per_second": 40.667,
"step": 1500
},
{
"epoch": 0.72,
"learning_rate": 4.8812418066976524e-05,
"loss": 1.7602,
"step": 2000
},
{
"epoch": 0.72,
"eval_accuracy": 0.6787299419597133,
"eval_loss": 1.6575504541397095,
"eval_runtime": 0.7882,
"eval_samples_per_second": 634.385,
"eval_steps_per_second": 40.601,
"step": 2000
},
{
"epoch": 0.89,
"learning_rate": 4.851447979978549e-05,
"loss": 1.7587,
"step": 2500
},
{
"epoch": 0.89,
"eval_accuracy": 0.6772697150430749,
"eval_loss": 1.6266298294067383,
"eval_runtime": 0.7893,
"eval_samples_per_second": 633.509,
"eval_steps_per_second": 40.545,
"step": 2500
},
{
"epoch": 1.07,
"learning_rate": 4.821654153259445e-05,
"loss": 1.7047,
"step": 3000
},
{
"epoch": 1.07,
"eval_accuracy": 0.6851971557853911,
"eval_loss": 1.605985164642334,
"eval_runtime": 0.8181,
"eval_samples_per_second": 611.179,
"eval_steps_per_second": 39.115,
"step": 3000
},
{
"epoch": 1.25,
"learning_rate": 4.791860326540341e-05,
"loss": 1.6782,
"step": 3500
},
{
"epoch": 1.25,
"eval_accuracy": 0.6906354515050167,
"eval_loss": 1.599035382270813,
"eval_runtime": 0.8184,
"eval_samples_per_second": 610.967,
"eval_steps_per_second": 39.102,
"step": 3500
},
{
"epoch": 1.43,
"learning_rate": 4.7620664998212375e-05,
"loss": 1.6733,
"step": 4000
},
{
"epoch": 1.43,
"eval_accuracy": 0.6967426710097719,
"eval_loss": 1.5377483367919922,
"eval_runtime": 0.819,
"eval_samples_per_second": 610.521,
"eval_steps_per_second": 39.073,
"step": 4000
},
{
"epoch": 1.61,
"learning_rate": 4.7322726731021334e-05,
"loss": 1.6664,
"step": 4500
},
{
"epoch": 1.61,
"eval_accuracy": 0.6746607762701168,
"eval_loss": 1.6434643268585205,
"eval_runtime": 0.7966,
"eval_samples_per_second": 627.631,
"eval_steps_per_second": 40.168,
"step": 4500
},
{
"epoch": 1.79,
"learning_rate": 4.70247884638303e-05,
"loss": 1.6719,
"step": 5000
},
{
"epoch": 1.79,
"eval_accuracy": 0.6907181571815718,
"eval_loss": 1.483905553817749,
"eval_runtime": 0.7989,
"eval_samples_per_second": 625.841,
"eval_steps_per_second": 40.054,
"step": 5000
},
{
"epoch": 1.97,
"learning_rate": 4.672685019663926e-05,
"loss": 1.6502,
"step": 5500
},
{
"epoch": 1.97,
"eval_accuracy": 0.6896661367249602,
"eval_loss": 1.535127878189087,
"eval_runtime": 0.823,
"eval_samples_per_second": 607.558,
"eval_steps_per_second": 38.884,
"step": 5500
},
{
"epoch": 2.15,
"learning_rate": 4.642891192944822e-05,
"loss": 1.6233,
"step": 6000
},
{
"epoch": 2.15,
"eval_accuracy": 0.6763219939373526,
"eval_loss": 1.6817570924758911,
"eval_runtime": 0.7881,
"eval_samples_per_second": 634.403,
"eval_steps_per_second": 40.602,
"step": 6000
},
{
"epoch": 2.32,
"learning_rate": 4.6130973662257184e-05,
"loss": 1.6127,
"step": 6500
},
{
"epoch": 2.32,
"eval_accuracy": 0.685335059889932,
"eval_loss": 1.5865211486816406,
"eval_runtime": 0.787,
"eval_samples_per_second": 635.291,
"eval_steps_per_second": 40.659,
"step": 6500
},
{
"epoch": 2.5,
"learning_rate": 4.5833035395066143e-05,
"loss": 1.6274,
"step": 7000
},
{
"epoch": 2.5,
"eval_accuracy": 0.7003633961017509,
"eval_loss": 1.5004233121871948,
"eval_runtime": 0.8009,
"eval_samples_per_second": 624.318,
"eval_steps_per_second": 39.956,
"step": 7000
},
{
"epoch": 2.68,
"learning_rate": 4.553628888094387e-05,
"loss": 1.601,
"step": 7500
},
{
"epoch": 2.68,
"eval_accuracy": 0.6929970129439097,
"eval_loss": 1.452188491821289,
"eval_runtime": 0.7898,
"eval_samples_per_second": 633.056,
"eval_steps_per_second": 40.516,
"step": 7500
},
{
"epoch": 2.86,
"learning_rate": 4.523835061375284e-05,
"loss": 1.6123,
"step": 8000
},
{
"epoch": 2.86,
"eval_accuracy": 0.689419795221843,
"eval_loss": 1.5370689630508423,
"eval_runtime": 0.8546,
"eval_samples_per_second": 585.05,
"eval_steps_per_second": 37.443,
"step": 8000
},
{
"epoch": 3.04,
"learning_rate": 4.4940412346561796e-05,
"loss": 1.6074,
"step": 8500
},
{
"epoch": 3.04,
"eval_accuracy": 0.6952157912345266,
"eval_loss": 1.5342369079589844,
"eval_runtime": 0.8214,
"eval_samples_per_second": 608.68,
"eval_steps_per_second": 38.956,
"step": 8500
},
{
"epoch": 3.22,
"learning_rate": 4.4642474079370755e-05,
"loss": 1.563,
"step": 9000
},
{
"epoch": 3.22,
"eval_accuracy": 0.6875834445927904,
"eval_loss": 1.568178415298462,
"eval_runtime": 0.8488,
"eval_samples_per_second": 589.06,
"eval_steps_per_second": 37.7,
"step": 9000
},
{
"epoch": 3.4,
"learning_rate": 4.4344535812179714e-05,
"loss": 1.5746,
"step": 9500
},
{
"epoch": 3.4,
"eval_accuracy": 0.6957663275352806,
"eval_loss": 1.5704632997512817,
"eval_runtime": 0.852,
"eval_samples_per_second": 586.84,
"eval_steps_per_second": 37.558,
"step": 9500
},
{
"epoch": 3.58,
"learning_rate": 4.404778929805745e-05,
"loss": 1.5539,
"step": 10000
},
{
"epoch": 3.58,
"eval_accuracy": 0.7040711597673623,
"eval_loss": 1.4710707664489746,
"eval_runtime": 0.85,
"eval_samples_per_second": 588.248,
"eval_steps_per_second": 37.648,
"step": 10000
},
{
"epoch": 3.75,
"learning_rate": 4.374985103086641e-05,
"loss": 1.578,
"step": 10500
},
{
"epoch": 3.75,
"eval_accuracy": 0.6888888888888889,
"eval_loss": 1.5465725660324097,
"eval_runtime": 0.8902,
"eval_samples_per_second": 561.645,
"eval_steps_per_second": 35.945,
"step": 10500
},
{
"epoch": 3.93,
"learning_rate": 4.345191276367537e-05,
"loss": 1.5492,
"step": 11000
},
{
"epoch": 3.93,
"eval_accuracy": 0.6968894771674388,
"eval_loss": 1.4628891944885254,
"eval_runtime": 0.8368,
"eval_samples_per_second": 597.487,
"eval_steps_per_second": 38.239,
"step": 11000
},
{
"epoch": 4.11,
"learning_rate": 4.3153974496484326e-05,
"loss": 1.5291,
"step": 11500
},
{
"epoch": 4.11,
"eval_accuracy": 0.7200132538104705,
"eval_loss": 1.4264894723892212,
"eval_runtime": 0.8798,
"eval_samples_per_second": 568.319,
"eval_steps_per_second": 36.372,
"step": 11500
},
{
"epoch": 4.29,
"learning_rate": 4.285603622929329e-05,
"loss": 1.5079,
"step": 12000
},
{
"epoch": 4.29,
"eval_accuracy": 0.6966074313408723,
"eval_loss": 1.5052707195281982,
"eval_runtime": 0.8186,
"eval_samples_per_second": 610.796,
"eval_steps_per_second": 39.091,
"step": 12000
},
{
"epoch": 4.47,
"learning_rate": 4.255809796210226e-05,
"loss": 1.5283,
"step": 12500
},
{
"epoch": 4.47,
"eval_accuracy": 0.6902654867256637,
"eval_loss": 1.5257039070129395,
"eval_runtime": 0.8002,
"eval_samples_per_second": 624.861,
"eval_steps_per_second": 39.991,
"step": 12500
},
{
"epoch": 4.65,
"learning_rate": 4.226015969491122e-05,
"loss": 1.5141,
"step": 13000
},
{
"epoch": 4.65,
"eval_accuracy": 0.6949898442789438,
"eval_loss": 1.5063292980194092,
"eval_runtime": 0.8654,
"eval_samples_per_second": 577.759,
"eval_steps_per_second": 36.977,
"step": 13000
},
{
"epoch": 4.83,
"learning_rate": 4.1962221427720176e-05,
"loss": 1.4979,
"step": 13500
},
{
"epoch": 4.83,
"eval_accuracy": 0.6955945677376615,
"eval_loss": 1.5636450052261353,
"eval_runtime": 0.8149,
"eval_samples_per_second": 613.582,
"eval_steps_per_second": 39.269,
"step": 13500
},
{
"epoch": 5.01,
"learning_rate": 4.1664283160529136e-05,
"loss": 1.5294,
"step": 14000
},
{
"epoch": 5.01,
"eval_accuracy": 0.6835193696651346,
"eval_loss": 1.587847113609314,
"eval_runtime": 0.8296,
"eval_samples_per_second": 602.733,
"eval_steps_per_second": 38.575,
"step": 14000
},
{
"epoch": 5.18,
"learning_rate": 4.13663448933381e-05,
"loss": 1.4641,
"step": 14500
},
{
"epoch": 5.18,
"eval_accuracy": 0.6962067807989258,
"eval_loss": 1.5574804544448853,
"eval_runtime": 0.81,
"eval_samples_per_second": 617.287,
"eval_steps_per_second": 39.506,
"step": 14500
},
{
"epoch": 5.36,
"learning_rate": 4.106840662614707e-05,
"loss": 1.4754,
"step": 15000
},
{
"epoch": 5.36,
"eval_accuracy": 0.7006847081838931,
"eval_loss": 1.4779187440872192,
"eval_runtime": 0.8312,
"eval_samples_per_second": 601.557,
"eval_steps_per_second": 38.5,
"step": 15000
},
{
"epoch": 5.54,
"learning_rate": 4.077046835895603e-05,
"loss": 1.4696,
"step": 15500
},
{
"epoch": 5.54,
"eval_accuracy": 0.6965271015903928,
"eval_loss": 1.451996922492981,
"eval_runtime": 0.7909,
"eval_samples_per_second": 632.19,
"eval_steps_per_second": 40.46,
"step": 15500
},
{
"epoch": 5.72,
"learning_rate": 4.0472530091764986e-05,
"loss": 1.4655,
"step": 16000
},
{
"epoch": 5.72,
"eval_accuracy": 0.683049147442327,
"eval_loss": 1.6320295333862305,
"eval_runtime": 0.8309,
"eval_samples_per_second": 601.76,
"eval_steps_per_second": 38.513,
"step": 16000
},
{
"epoch": 5.9,
"learning_rate": 4.0174591824573945e-05,
"loss": 1.4792,
"step": 16500
},
{
"epoch": 5.9,
"eval_accuracy": 0.7134165866154338,
"eval_loss": 1.415226697921753,
"eval_runtime": 0.8575,
"eval_samples_per_second": 583.097,
"eval_steps_per_second": 37.318,
"step": 16500
},
{
"epoch": 6.08,
"learning_rate": 3.98772494339173e-05,
"loss": 1.4379,
"step": 17000
},
{
"epoch": 6.08,
"eval_accuracy": 0.7041935483870968,
"eval_loss": 1.4900156259536743,
"eval_runtime": 0.8413,
"eval_samples_per_second": 594.352,
"eval_steps_per_second": 38.039,
"step": 17000
},
{
"epoch": 6.26,
"learning_rate": 3.957931116672626e-05,
"loss": 1.4281,
"step": 17500
},
{
"epoch": 6.26,
"eval_accuracy": 0.6989864864864865,
"eval_loss": 1.5407416820526123,
"eval_runtime": 0.8677,
"eval_samples_per_second": 576.232,
"eval_steps_per_second": 36.879,
"step": 17500
},
{
"epoch": 6.44,
"learning_rate": 3.928137289953522e-05,
"loss": 1.436,
"step": 18000
},
{
"epoch": 6.44,
"eval_accuracy": 0.6914175506268081,
"eval_loss": 1.534258246421814,
"eval_runtime": 0.843,
"eval_samples_per_second": 593.143,
"eval_steps_per_second": 37.961,
"step": 18000
},
{
"epoch": 6.61,
"learning_rate": 3.8983434632344176e-05,
"loss": 1.4342,
"step": 18500
},
{
"epoch": 6.61,
"eval_accuracy": 0.7023696682464455,
"eval_loss": 1.5323561429977417,
"eval_runtime": 0.7874,
"eval_samples_per_second": 635.024,
"eval_steps_per_second": 40.642,
"step": 18500
},
{
"epoch": 6.79,
"learning_rate": 3.868549636515314e-05,
"loss": 1.4176,
"step": 19000
},
{
"epoch": 6.79,
"eval_accuracy": 0.7132913490222075,
"eval_loss": 1.4485751390457153,
"eval_runtime": 0.8567,
"eval_samples_per_second": 583.665,
"eval_steps_per_second": 37.355,
"step": 19000
},
{
"epoch": 6.97,
"learning_rate": 3.838755809796211e-05,
"loss": 1.4308,
"step": 19500
},
{
"epoch": 6.97,
"eval_accuracy": 0.7031503734978889,
"eval_loss": 1.4598056077957153,
"eval_runtime": 0.79,
"eval_samples_per_second": 632.872,
"eval_steps_per_second": 40.504,
"step": 19500
},
{
"epoch": 7.15,
"learning_rate": 3.809021570730545e-05,
"loss": 1.4014,
"step": 20000
},
{
"epoch": 7.15,
"eval_accuracy": 0.6938435940099834,
"eval_loss": 1.575023889541626,
"eval_runtime": 0.8292,
"eval_samples_per_second": 603.024,
"eval_steps_per_second": 38.594,
"step": 20000
},
{
"epoch": 7.33,
"learning_rate": 3.779227744011441e-05,
"loss": 1.3661,
"step": 20500
},
{
"epoch": 7.33,
"eval_accuracy": 0.6985221674876847,
"eval_loss": 1.5403505563735962,
"eval_runtime": 0.8319,
"eval_samples_per_second": 601.063,
"eval_steps_per_second": 38.468,
"step": 20500
},
{
"epoch": 7.51,
"learning_rate": 3.7494935049457754e-05,
"loss": 1.3857,
"step": 21000
},
{
"epoch": 7.51,
"eval_accuracy": 0.7037155669442665,
"eval_loss": 1.4692307710647583,
"eval_runtime": 0.8177,
"eval_samples_per_second": 611.5,
"eval_steps_per_second": 39.136,
"step": 21000
},
{
"epoch": 7.69,
"learning_rate": 3.719699678226672e-05,
"loss": 1.3846,
"step": 21500
},
{
"epoch": 7.69,
"eval_accuracy": 0.6941445861956166,
"eval_loss": 1.5511342287063599,
"eval_runtime": 0.7898,
"eval_samples_per_second": 633.076,
"eval_steps_per_second": 40.517,
"step": 21500
},
{
"epoch": 7.87,
"learning_rate": 3.689905851507568e-05,
"loss": 1.3867,
"step": 22000
},
{
"epoch": 7.87,
"eval_accuracy": 0.6925124792013311,
"eval_loss": 1.5321439504623413,
"eval_runtime": 0.8379,
"eval_samples_per_second": 596.713,
"eval_steps_per_second": 38.19,
"step": 22000
},
{
"epoch": 8.04,
"learning_rate": 3.660112024788464e-05,
"loss": 1.3658,
"step": 22500
},
{
"epoch": 8.04,
"eval_accuracy": 0.7020917678812416,
"eval_loss": 1.5499885082244873,
"eval_runtime": 0.8209,
"eval_samples_per_second": 609.075,
"eval_steps_per_second": 38.981,
"step": 22500
},
{
"epoch": 8.22,
"learning_rate": 3.6303181980693604e-05,
"loss": 1.3406,
"step": 23000
},
{
"epoch": 8.22,
"eval_accuracy": 0.6959503592423253,
"eval_loss": 1.523918628692627,
"eval_runtime": 0.8298,
"eval_samples_per_second": 602.525,
"eval_steps_per_second": 38.562,
"step": 23000
},
{
"epoch": 8.4,
"learning_rate": 3.600524371350256e-05,
"loss": 1.3405,
"step": 23500
},
{
"epoch": 8.4,
"eval_accuracy": 0.7055256064690026,
"eval_loss": 1.4414023160934448,
"eval_runtime": 0.8516,
"eval_samples_per_second": 587.105,
"eval_steps_per_second": 37.575,
"step": 23500
},
{
"epoch": 8.58,
"learning_rate": 3.570730544631153e-05,
"loss": 1.3373,
"step": 24000
},
{
"epoch": 8.58,
"eval_accuracy": 0.6784238957737527,
"eval_loss": 1.599377155303955,
"eval_runtime": 0.791,
"eval_samples_per_second": 632.109,
"eval_steps_per_second": 40.455,
"step": 24000
},
{
"epoch": 8.76,
"learning_rate": 3.540936717912049e-05,
"loss": 1.3527,
"step": 24500
},
{
"epoch": 8.76,
"eval_accuracy": 0.6970387243735763,
"eval_loss": 1.5105814933776855,
"eval_runtime": 0.8594,
"eval_samples_per_second": 581.797,
"eval_steps_per_second": 37.235,
"step": 24500
},
{
"epoch": 8.94,
"learning_rate": 3.511142891192945e-05,
"loss": 1.3436,
"step": 25000
},
{
"epoch": 8.94,
"eval_accuracy": 0.7079758500158881,
"eval_loss": 1.471426010131836,
"eval_runtime": 0.8427,
"eval_samples_per_second": 593.355,
"eval_steps_per_second": 37.975,
"step": 25000
},
{
"epoch": 9.12,
"learning_rate": 3.4813490644738414e-05,
"loss": 1.3069,
"step": 25500
},
{
"epoch": 9.12,
"eval_accuracy": 0.6953099376844867,
"eval_loss": 1.4990392923355103,
"eval_runtime": 0.8575,
"eval_samples_per_second": 583.12,
"eval_steps_per_second": 37.32,
"step": 25500
},
{
"epoch": 9.3,
"learning_rate": 3.451555237754737e-05,
"loss": 1.2969,
"step": 26000
},
{
"epoch": 9.3,
"eval_accuracy": 0.6964285714285714,
"eval_loss": 1.4809668064117432,
"eval_runtime": 0.8312,
"eval_samples_per_second": 601.512,
"eval_steps_per_second": 38.497,
"step": 26000
},
{
"epoch": 9.47,
"learning_rate": 3.421761411035634e-05,
"loss": 1.3009,
"step": 26500
},
{
"epoch": 9.47,
"eval_accuracy": 0.6875602700096431,
"eval_loss": 1.5964903831481934,
"eval_runtime": 0.8752,
"eval_samples_per_second": 571.296,
"eval_steps_per_second": 36.563,
"step": 26500
},
{
"epoch": 9.65,
"learning_rate": 3.392086759623406e-05,
"loss": 1.3227,
"step": 27000
},
{
"epoch": 9.65,
"eval_accuracy": 0.7013662979830839,
"eval_loss": 1.429559588432312,
"eval_runtime": 0.7904,
"eval_samples_per_second": 632.561,
"eval_steps_per_second": 40.484,
"step": 27000
},
{
"epoch": 9.83,
"learning_rate": 3.3622929329043025e-05,
"loss": 1.3259,
"step": 27500
},
{
"epoch": 9.83,
"eval_accuracy": 0.7189224277831873,
"eval_loss": 1.413652777671814,
"eval_runtime": 0.8134,
"eval_samples_per_second": 614.697,
"eval_steps_per_second": 39.341,
"step": 27500
},
{
"epoch": 10.01,
"learning_rate": 3.3324991061851985e-05,
"loss": 1.3131,
"step": 28000
},
{
"epoch": 10.01,
"eval_accuracy": 0.7019570099454604,
"eval_loss": 1.534200668334961,
"eval_runtime": 0.8056,
"eval_samples_per_second": 620.653,
"eval_steps_per_second": 39.722,
"step": 28000
},
{
"epoch": 10.19,
"learning_rate": 3.3027052794660944e-05,
"loss": 1.271,
"step": 28500
},
{
"epoch": 10.19,
"eval_accuracy": 0.711340206185567,
"eval_loss": 1.470828890800476,
"eval_runtime": 0.7815,
"eval_samples_per_second": 639.779,
"eval_steps_per_second": 40.946,
"step": 28500
},
{
"epoch": 10.37,
"learning_rate": 3.272911452746991e-05,
"loss": 1.2684,
"step": 29000
},
{
"epoch": 10.37,
"eval_accuracy": 0.7045747422680413,
"eval_loss": 1.4341672658920288,
"eval_runtime": 0.7954,
"eval_samples_per_second": 628.629,
"eval_steps_per_second": 40.232,
"step": 29000
},
{
"epoch": 10.55,
"learning_rate": 3.2431176260278876e-05,
"loss": 1.2767,
"step": 29500
},
{
"epoch": 10.55,
"eval_accuracy": 0.709353000335233,
"eval_loss": 1.4703407287597656,
"eval_runtime": 0.8179,
"eval_samples_per_second": 611.351,
"eval_steps_per_second": 39.126,
"step": 29500
},
{
"epoch": 10.73,
"learning_rate": 3.2133237993087835e-05,
"loss": 1.2861,
"step": 30000
},
{
"epoch": 10.73,
"eval_accuracy": 0.7308937823834197,
"eval_loss": 1.3323109149932861,
"eval_runtime": 0.7855,
"eval_samples_per_second": 636.523,
"eval_steps_per_second": 40.737,
"step": 30000
},
{
"epoch": 10.9,
"learning_rate": 3.1835299725896794e-05,
"loss": 1.2617,
"step": 30500
},
{
"epoch": 10.9,
"eval_accuracy": 0.7003344481605351,
"eval_loss": 1.4562044143676758,
"eval_runtime": 0.7951,
"eval_samples_per_second": 628.826,
"eval_steps_per_second": 40.245,
"step": 30500
},
{
"epoch": 11.08,
"learning_rate": 3.153736145870575e-05,
"loss": 1.2551,
"step": 31000
},
{
"epoch": 11.08,
"eval_accuracy": 0.7169689119170984,
"eval_loss": 1.4361472129821777,
"eval_runtime": 0.8647,
"eval_samples_per_second": 578.22,
"eval_steps_per_second": 37.006,
"step": 31000
},
{
"epoch": 11.26,
"learning_rate": 3.124001906804911e-05,
"loss": 1.2404,
"step": 31500
},
{
"epoch": 11.26,
"eval_accuracy": 0.7034617896799478,
"eval_loss": 1.4536628723144531,
"eval_runtime": 0.7907,
"eval_samples_per_second": 632.325,
"eval_steps_per_second": 40.469,
"step": 31500
},
{
"epoch": 11.44,
"learning_rate": 3.0942080800858066e-05,
"loss": 1.2562,
"step": 32000
},
{
"epoch": 11.44,
"eval_accuracy": 0.7132209980557356,
"eval_loss": 1.4038574695587158,
"eval_runtime": 0.7924,
"eval_samples_per_second": 631.001,
"eval_steps_per_second": 40.384,
"step": 32000
},
{
"epoch": 11.62,
"learning_rate": 3.0644142533667025e-05,
"loss": 1.2489,
"step": 32500
},
{
"epoch": 11.62,
"eval_accuracy": 0.706418918918919,
"eval_loss": 1.4372212886810303,
"eval_runtime": 0.8024,
"eval_samples_per_second": 623.122,
"eval_steps_per_second": 39.88,
"step": 32500
},
{
"epoch": 11.8,
"learning_rate": 3.0346204266475984e-05,
"loss": 1.2406,
"step": 33000
},
{
"epoch": 11.8,
"eval_accuracy": 0.7087442472057857,
"eval_loss": 1.4926137924194336,
"eval_runtime": 0.8525,
"eval_samples_per_second": 586.532,
"eval_steps_per_second": 37.538,
"step": 33000
},
{
"epoch": 11.98,
"learning_rate": 3.0048265999284947e-05,
"loss": 1.2285,
"step": 33500
},
{
"epoch": 11.98,
"eval_accuracy": 0.7152005392652511,
"eval_loss": 1.4080321788787842,
"eval_runtime": 0.8108,
"eval_samples_per_second": 616.703,
"eval_steps_per_second": 39.469,
"step": 33500
},
{
"epoch": 12.16,
"learning_rate": 2.9750327732093913e-05,
"loss": 1.2213,
"step": 34000
},
{
"epoch": 12.16,
"eval_accuracy": 0.7170240415854451,
"eval_loss": 1.403072476387024,
"eval_runtime": 0.8459,
"eval_samples_per_second": 591.089,
"eval_steps_per_second": 37.83,
"step": 34000
},
{
"epoch": 12.33,
"learning_rate": 2.9452389464902875e-05,
"loss": 1.1998,
"step": 34500
},
{
"epoch": 12.33,
"eval_accuracy": 0.7222584856396866,
"eval_loss": 1.3541438579559326,
"eval_runtime": 0.7909,
"eval_samples_per_second": 632.16,
"eval_steps_per_second": 40.458,
"step": 34500
},
{
"epoch": 12.51,
"learning_rate": 2.9154451197711835e-05,
"loss": 1.2184,
"step": 35000
},
{
"epoch": 12.51,
"eval_accuracy": 0.7308441558441559,
"eval_loss": 1.3629957437515259,
"eval_runtime": 0.8716,
"eval_samples_per_second": 573.677,
"eval_steps_per_second": 36.715,
"step": 35000
},
{
"epoch": 12.69,
"learning_rate": 2.8856512930520797e-05,
"loss": 1.2195,
"step": 35500
},
{
"epoch": 12.69,
"eval_accuracy": 0.7281362594169669,
"eval_loss": 1.312456488609314,
"eval_runtime": 0.852,
"eval_samples_per_second": 586.847,
"eval_steps_per_second": 37.558,
"step": 35500
},
{
"epoch": 12.87,
"learning_rate": 2.8558574663329756e-05,
"loss": 1.2178,
"step": 36000
},
{
"epoch": 12.87,
"eval_accuracy": 0.7119236883942767,
"eval_loss": 1.4257023334503174,
"eval_runtime": 0.8597,
"eval_samples_per_second": 581.571,
"eval_steps_per_second": 37.221,
"step": 36000
},
{
"epoch": 13.05,
"learning_rate": 2.8260636396138722e-05,
"loss": 1.1918,
"step": 36500
},
{
"epoch": 13.05,
"eval_accuracy": 0.7152686762778506,
"eval_loss": 1.4108035564422607,
"eval_runtime": 0.9192,
"eval_samples_per_second": 543.96,
"eval_steps_per_second": 34.813,
"step": 36500
},
{
"epoch": 13.23,
"learning_rate": 2.7963294005482066e-05,
"loss": 1.1664,
"step": 37000
},
{
"epoch": 13.23,
"eval_accuracy": 0.7226588081204977,
"eval_loss": 1.3577048778533936,
"eval_runtime": 0.7887,
"eval_samples_per_second": 633.948,
"eval_steps_per_second": 40.573,
"step": 37000
},
{
"epoch": 13.41,
"learning_rate": 2.7665355738291028e-05,
"loss": 1.1754,
"step": 37500
},
{
"epoch": 13.41,
"eval_accuracy": 0.720593191776205,
"eval_loss": 1.377700924873352,
"eval_runtime": 0.8445,
"eval_samples_per_second": 592.06,
"eval_steps_per_second": 37.892,
"step": 37500
},
{
"epoch": 13.59,
"learning_rate": 2.7367417471099987e-05,
"loss": 1.1855,
"step": 38000
},
{
"epoch": 13.59,
"eval_accuracy": 0.7354008578027054,
"eval_loss": 1.350059151649475,
"eval_runtime": 0.8109,
"eval_samples_per_second": 616.607,
"eval_steps_per_second": 39.463,
"step": 38000
},
{
"epoch": 13.76,
"learning_rate": 2.7070075080443334e-05,
"loss": 1.1644,
"step": 38500
},
{
"epoch": 13.76,
"eval_accuracy": 0.7206685953069752,
"eval_loss": 1.374656081199646,
"eval_runtime": 0.8397,
"eval_samples_per_second": 595.482,
"eval_steps_per_second": 38.111,
"step": 38500
},
{
"epoch": 13.94,
"learning_rate": 2.6772136813252297e-05,
"loss": 1.1709,
"step": 39000
},
{
"epoch": 13.94,
"eval_accuracy": 0.7183739837398374,
"eval_loss": 1.3703839778900146,
"eval_runtime": 0.8025,
"eval_samples_per_second": 623.038,
"eval_steps_per_second": 39.874,
"step": 39000
},
{
"epoch": 14.12,
"learning_rate": 2.6474198546061256e-05,
"loss": 1.1613,
"step": 39500
},
{
"epoch": 14.12,
"eval_accuracy": 0.7246875,
"eval_loss": 1.4306718111038208,
"eval_runtime": 0.8499,
"eval_samples_per_second": 588.275,
"eval_steps_per_second": 37.65,
"step": 39500
},
{
"epoch": 14.3,
"learning_rate": 2.617626027887022e-05,
"loss": 1.1443,
"step": 40000
},
{
"epoch": 14.3,
"eval_accuracy": 0.7220978573712824,
"eval_loss": 1.3189983367919922,
"eval_runtime": 0.7903,
"eval_samples_per_second": 632.651,
"eval_steps_per_second": 40.49,
"step": 40000
},
{
"epoch": 14.48,
"learning_rate": 2.5878322011679178e-05,
"loss": 1.1356,
"step": 40500
},
{
"epoch": 14.48,
"eval_accuracy": 0.7331329325317302,
"eval_loss": 1.3287793397903442,
"eval_runtime": 0.7921,
"eval_samples_per_second": 631.257,
"eval_steps_per_second": 40.4,
"step": 40500
},
{
"epoch": 14.66,
"learning_rate": 2.5580383744488147e-05,
"loss": 1.1493,
"step": 41000
},
{
"epoch": 14.66,
"eval_accuracy": 0.7240227196792516,
"eval_loss": 1.3504801988601685,
"eval_runtime": 0.8432,
"eval_samples_per_second": 592.975,
"eval_steps_per_second": 37.95,
"step": 41000
},
{
"epoch": 14.84,
"learning_rate": 2.5283041353831487e-05,
"loss": 1.1417,
"step": 41500
},
{
"epoch": 14.84,
"eval_accuracy": 0.7320369149637442,
"eval_loss": 1.31459379196167,
"eval_runtime": 0.8272,
"eval_samples_per_second": 604.463,
"eval_steps_per_second": 38.686,
"step": 41500
},
{
"epoch": 15.02,
"learning_rate": 2.498569896317483e-05,
"loss": 1.1349,
"step": 42000
},
{
"epoch": 15.02,
"eval_accuracy": 0.7333114107201578,
"eval_loss": 1.3545522689819336,
"eval_runtime": 0.8634,
"eval_samples_per_second": 579.106,
"eval_steps_per_second": 37.063,
"step": 42000
},
{
"epoch": 15.19,
"learning_rate": 2.4687760695983793e-05,
"loss": 1.1169,
"step": 42500
},
{
"epoch": 15.19,
"eval_accuracy": 0.7246922024623803,
"eval_loss": 1.37086021900177,
"eval_runtime": 0.8611,
"eval_samples_per_second": 580.685,
"eval_steps_per_second": 37.164,
"step": 42500
},
{
"epoch": 15.37,
"learning_rate": 2.4390418305327136e-05,
"loss": 1.1187,
"step": 43000
},
{
"epoch": 15.37,
"eval_accuracy": 0.7217795484727756,
"eval_loss": 1.4242717027664185,
"eval_runtime": 0.8265,
"eval_samples_per_second": 604.985,
"eval_steps_per_second": 38.719,
"step": 43000
},
{
"epoch": 15.55,
"learning_rate": 2.4092480038136102e-05,
"loss": 1.118,
"step": 43500
},
{
"epoch": 15.55,
"eval_accuracy": 0.7264245251582806,
"eval_loss": 1.3835431337356567,
"eval_runtime": 0.8374,
"eval_samples_per_second": 597.064,
"eval_steps_per_second": 38.212,
"step": 43500
},
{
"epoch": 15.73,
"learning_rate": 2.379454177094506e-05,
"loss": 1.1165,
"step": 44000
},
{
"epoch": 15.73,
"eval_accuracy": 0.7253818654533637,
"eval_loss": 1.3239895105361938,
"eval_runtime": 0.8499,
"eval_samples_per_second": 588.29,
"eval_steps_per_second": 37.651,
"step": 44000
},
{
"epoch": 15.91,
"learning_rate": 2.3496603503754024e-05,
"loss": 1.114,
"step": 44500
},
{
"epoch": 15.91,
"eval_accuracy": 0.7382113821138211,
"eval_loss": 1.3263858556747437,
"eval_runtime": 0.8424,
"eval_samples_per_second": 593.546,
"eval_steps_per_second": 37.987,
"step": 44500
},
{
"epoch": 16.09,
"learning_rate": 2.3198665236562986e-05,
"loss": 1.105,
"step": 45000
},
{
"epoch": 16.09,
"eval_accuracy": 0.7333548804137039,
"eval_loss": 1.3213739395141602,
"eval_runtime": 0.8677,
"eval_samples_per_second": 576.224,
"eval_steps_per_second": 36.878,
"step": 45000
},
{
"epoch": 16.27,
"learning_rate": 2.2900726969371946e-05,
"loss": 1.0924,
"step": 45500
},
{
"epoch": 16.27,
"eval_accuracy": 0.7282392026578073,
"eval_loss": 1.384667992591858,
"eval_runtime": 0.9421,
"eval_samples_per_second": 530.704,
"eval_steps_per_second": 33.965,
"step": 45500
},
{
"epoch": 16.45,
"learning_rate": 2.260278870218091e-05,
"loss": 1.0915,
"step": 46000
},
{
"epoch": 16.45,
"eval_accuracy": 0.7317073170731707,
"eval_loss": 1.3603721857070923,
"eval_runtime": 0.7951,
"eval_samples_per_second": 628.874,
"eval_steps_per_second": 40.248,
"step": 46000
},
{
"epoch": 16.62,
"learning_rate": 2.230485043498987e-05,
"loss": 1.0968,
"step": 46500
},
{
"epoch": 16.62,
"eval_accuracy": 0.7319177173191772,
"eval_loss": 1.3539705276489258,
"eval_runtime": 0.8815,
"eval_samples_per_second": 567.187,
"eval_steps_per_second": 36.3,
"step": 46500
},
{
"epoch": 16.8,
"learning_rate": 2.2006912167798833e-05,
"loss": 1.0772,
"step": 47000
},
{
"epoch": 16.8,
"eval_accuracy": 0.7306332369013179,
"eval_loss": 1.2475004196166992,
"eval_runtime": 0.8301,
"eval_samples_per_second": 602.308,
"eval_steps_per_second": 38.548,
"step": 47000
},
{
"epoch": 16.98,
"learning_rate": 2.1708973900607796e-05,
"loss": 1.0975,
"step": 47500
},
{
"epoch": 16.98,
"eval_accuracy": 0.7448207826372903,
"eval_loss": 1.2635700702667236,
"eval_runtime": 0.8269,
"eval_samples_per_second": 604.655,
"eval_steps_per_second": 38.698,
"step": 47500
},
{
"epoch": 17.16,
"learning_rate": 2.1411035633416755e-05,
"loss": 1.0708,
"step": 48000
},
{
"epoch": 17.16,
"eval_accuracy": 0.7182085648904871,
"eval_loss": 1.4056382179260254,
"eval_runtime": 0.8973,
"eval_samples_per_second": 557.236,
"eval_steps_per_second": 35.663,
"step": 48000
},
{
"epoch": 17.34,
"learning_rate": 2.111309736622572e-05,
"loss": 1.0654,
"step": 48500
},
{
"epoch": 17.34,
"eval_accuracy": 0.727630285152409,
"eval_loss": 1.3769292831420898,
"eval_runtime": 0.8377,
"eval_samples_per_second": 596.886,
"eval_steps_per_second": 38.201,
"step": 48500
},
{
"epoch": 17.52,
"learning_rate": 2.081515909903468e-05,
"loss": 1.0676,
"step": 49000
},
{
"epoch": 17.52,
"eval_accuracy": 0.7224234441883438,
"eval_loss": 1.33571457862854,
"eval_runtime": 0.7909,
"eval_samples_per_second": 632.166,
"eval_steps_per_second": 40.459,
"step": 49000
},
{
"epoch": 17.7,
"learning_rate": 2.0517220831843643e-05,
"loss": 1.0507,
"step": 49500
},
{
"epoch": 17.7,
"eval_accuracy": 0.712369109947644,
"eval_loss": 1.4087713956832886,
"eval_runtime": 0.7955,
"eval_samples_per_second": 628.504,
"eval_steps_per_second": 40.224,
"step": 49500
},
{
"epoch": 17.88,
"learning_rate": 2.0219282564652605e-05,
"loss": 1.0424,
"step": 50000
},
{
"epoch": 17.88,
"eval_accuracy": 0.7314667515112949,
"eval_loss": 1.3146371841430664,
"eval_runtime": 0.7881,
"eval_samples_per_second": 634.428,
"eval_steps_per_second": 40.603,
"step": 50000
},
{
"epoch": 18.06,
"learning_rate": 1.9921344297461568e-05,
"loss": 1.0524,
"step": 50500
},
{
"epoch": 18.06,
"eval_accuracy": 0.7393395319012503,
"eval_loss": 1.28960382938385,
"eval_runtime": 0.8581,
"eval_samples_per_second": 582.683,
"eval_steps_per_second": 37.292,
"step": 50500
},
{
"epoch": 18.23,
"learning_rate": 1.962340603027053e-05,
"loss": 1.0349,
"step": 51000
},
{
"epoch": 18.23,
"eval_accuracy": 0.7191558441558441,
"eval_loss": 1.3986730575561523,
"eval_runtime": 0.7904,
"eval_samples_per_second": 632.599,
"eval_steps_per_second": 40.486,
"step": 51000
},
{
"epoch": 18.41,
"learning_rate": 1.932546776307949e-05,
"loss": 1.0217,
"step": 51500
},
{
"epoch": 18.41,
"eval_accuracy": 0.7380645161290322,
"eval_loss": 1.2937612533569336,
"eval_runtime": 0.8575,
"eval_samples_per_second": 583.089,
"eval_steps_per_second": 37.318,
"step": 51500
},
{
"epoch": 18.59,
"learning_rate": 1.9028125372422833e-05,
"loss": 1.0238,
"step": 52000
},
{
"epoch": 18.59,
"eval_accuracy": 0.738654650788542,
"eval_loss": 1.296163558959961,
"eval_runtime": 0.8423,
"eval_samples_per_second": 593.617,
"eval_steps_per_second": 37.992,
"step": 52000
},
{
"epoch": 18.77,
"learning_rate": 1.87301871052318e-05,
"loss": 1.0292,
"step": 52500
},
{
"epoch": 18.77,
"eval_accuracy": 0.737131757850437,
"eval_loss": 1.3194587230682373,
"eval_runtime": 0.8232,
"eval_samples_per_second": 607.358,
"eval_steps_per_second": 38.871,
"step": 52500
},
{
"epoch": 18.95,
"learning_rate": 1.8433440591109523e-05,
"loss": 1.0426,
"step": 53000
},
{
"epoch": 18.95,
"eval_accuracy": 0.7411687025420931,
"eval_loss": 1.2835460901260376,
"eval_runtime": 0.7859,
"eval_samples_per_second": 636.221,
"eval_steps_per_second": 40.718,
"step": 53000
},
{
"epoch": 19.13,
"learning_rate": 1.8135502323918486e-05,
"loss": 1.0196,
"step": 53500
},
{
"epoch": 19.13,
"eval_accuracy": 0.747275204359673,
"eval_loss": 1.234621524810791,
"eval_runtime": 0.8361,
"eval_samples_per_second": 597.997,
"eval_steps_per_second": 38.272,
"step": 53500
},
{
"epoch": 19.31,
"learning_rate": 1.7837564056727445e-05,
"loss": 1.012,
"step": 54000
},
{
"epoch": 19.31,
"eval_accuracy": 0.7338292367399741,
"eval_loss": 1.3665757179260254,
"eval_runtime": 0.8157,
"eval_samples_per_second": 612.938,
"eval_steps_per_second": 39.228,
"step": 54000
},
{
"epoch": 19.49,
"learning_rate": 1.753962578953641e-05,
"loss": 1.0256,
"step": 54500
},
{
"epoch": 19.49,
"eval_accuracy": 0.7364842991259307,
"eval_loss": 1.3140363693237305,
"eval_runtime": 0.7949,
"eval_samples_per_second": 628.974,
"eval_steps_per_second": 40.254,
"step": 54500
},
{
"epoch": 19.66,
"learning_rate": 1.724168752234537e-05,
"loss": 0.9824,
"step": 55000
},
{
"epoch": 19.66,
"eval_accuracy": 0.7416496250852079,
"eval_loss": 1.2764383554458618,
"eval_runtime": 0.8178,
"eval_samples_per_second": 611.417,
"eval_steps_per_second": 39.131,
"step": 55000
},
{
"epoch": 19.84,
"learning_rate": 1.6943749255154336e-05,
"loss": 1.0048,
"step": 55500
},
{
"epoch": 19.84,
"eval_accuracy": 0.7487891507910881,
"eval_loss": 1.2514091730117798,
"eval_runtime": 0.8164,
"eval_samples_per_second": 612.474,
"eval_steps_per_second": 39.198,
"step": 55500
},
{
"epoch": 20.02,
"learning_rate": 1.6645810987963295e-05,
"loss": 0.9947,
"step": 56000
},
{
"epoch": 20.02,
"eval_accuracy": 0.7431572246976448,
"eval_loss": 1.3350915908813477,
"eval_runtime": 0.7912,
"eval_samples_per_second": 631.988,
"eval_steps_per_second": 40.447,
"step": 56000
},
{
"epoch": 20.2,
"learning_rate": 1.634846859730664e-05,
"loss": 0.977,
"step": 56500
},
{
"epoch": 20.2,
"eval_accuracy": 0.7451045469631596,
"eval_loss": 1.2854044437408447,
"eval_runtime": 0.8499,
"eval_samples_per_second": 588.28,
"eval_steps_per_second": 37.65,
"step": 56500
},
{
"epoch": 20.38,
"learning_rate": 1.60505303301156e-05,
"loss": 0.9862,
"step": 57000
},
{
"epoch": 20.38,
"eval_accuracy": 0.7285475792988314,
"eval_loss": 1.366584300994873,
"eval_runtime": 0.816,
"eval_samples_per_second": 612.774,
"eval_steps_per_second": 39.218,
"step": 57000
},
{
"epoch": 20.56,
"learning_rate": 1.5752592062924564e-05,
"loss": 0.9699,
"step": 57500
},
{
"epoch": 20.56,
"eval_accuracy": 0.7347811780190853,
"eval_loss": 1.3123427629470825,
"eval_runtime": 0.7779,
"eval_samples_per_second": 642.731,
"eval_steps_per_second": 41.135,
"step": 57500
},
{
"epoch": 20.74,
"learning_rate": 1.5454653795733526e-05,
"loss": 0.977,
"step": 58000
},
{
"epoch": 20.74,
"eval_accuracy": 0.7254770672915969,
"eval_loss": 1.3425793647766113,
"eval_runtime": 0.8285,
"eval_samples_per_second": 603.485,
"eval_steps_per_second": 38.623,
"step": 58000
},
{
"epoch": 20.92,
"learning_rate": 1.5157311405076868e-05,
"loss": 0.9749,
"step": 58500
},
{
"epoch": 20.92,
"eval_accuracy": 0.7296604740550929,
"eval_loss": 1.3763371706008911,
"eval_runtime": 0.7855,
"eval_samples_per_second": 636.556,
"eval_steps_per_second": 40.74,
"step": 58500
},
{
"epoch": 21.09,
"learning_rate": 1.4859373137885832e-05,
"loss": 0.9505,
"step": 59000
},
{
"epoch": 21.09,
"eval_accuracy": 0.7434469200524246,
"eval_loss": 1.2372225522994995,
"eval_runtime": 0.7967,
"eval_samples_per_second": 627.592,
"eval_steps_per_second": 40.166,
"step": 59000
},
{
"epoch": 21.27,
"learning_rate": 1.4561434870694793e-05,
"loss": 0.9438,
"step": 59500
},
{
"epoch": 21.27,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 1.433412790298462,
"eval_runtime": 0.7929,
"eval_samples_per_second": 630.567,
"eval_steps_per_second": 40.356,
"step": 59500
},
{
"epoch": 21.45,
"learning_rate": 1.4263496603503754e-05,
"loss": 0.944,
"step": 60000
},
{
"epoch": 21.45,
"eval_accuracy": 0.7507936507936508,
"eval_loss": 1.269033432006836,
"eval_runtime": 0.8274,
"eval_samples_per_second": 604.314,
"eval_steps_per_second": 38.676,
"step": 60000
},
{
"epoch": 21.63,
"learning_rate": 1.3965558336312718e-05,
"loss": 0.9427,
"step": 60500
},
{
"epoch": 21.63,
"eval_accuracy": 0.7485941118094608,
"eval_loss": 1.2185914516448975,
"eval_runtime": 0.7923,
"eval_samples_per_second": 631.05,
"eval_steps_per_second": 40.387,
"step": 60500
},
{
"epoch": 21.81,
"learning_rate": 1.3667620069121679e-05,
"loss": 0.9553,
"step": 61000
},
{
"epoch": 21.81,
"eval_accuracy": 0.726882430647292,
"eval_loss": 1.3940554857254028,
"eval_runtime": 0.7961,
"eval_samples_per_second": 628.083,
"eval_steps_per_second": 40.197,
"step": 61000
},
{
"epoch": 21.99,
"learning_rate": 1.3369681801930641e-05,
"loss": 0.9571,
"step": 61500
},
{
"epoch": 21.99,
"eval_accuracy": 0.7273940607273941,
"eval_loss": 1.4162867069244385,
"eval_runtime": 0.791,
"eval_samples_per_second": 632.128,
"eval_steps_per_second": 40.456,
"step": 61500
},
{
"epoch": 22.17,
"learning_rate": 1.3071743534739602e-05,
"loss": 0.932,
"step": 62000
},
{
"epoch": 22.17,
"eval_accuracy": 0.7522727272727273,
"eval_loss": 1.2717351913452148,
"eval_runtime": 0.796,
"eval_samples_per_second": 628.103,
"eval_steps_per_second": 40.199,
"step": 62000
},
{
"epoch": 22.35,
"learning_rate": 1.2773805267548563e-05,
"loss": 0.9166,
"step": 62500
},
{
"epoch": 22.35,
"eval_accuracy": 0.73956326268465,
"eval_loss": 1.217714786529541,
"eval_runtime": 0.8289,
"eval_samples_per_second": 603.185,
"eval_steps_per_second": 38.604,
"step": 62500
},
{
"epoch": 22.52,
"learning_rate": 1.2475867000357526e-05,
"loss": 0.9301,
"step": 63000
},
{
"epoch": 22.52,
"eval_accuracy": 0.7377950210151956,
"eval_loss": 1.3264496326446533,
"eval_runtime": 0.8524,
"eval_samples_per_second": 586.56,
"eval_steps_per_second": 37.54,
"step": 63000
},
{
"epoch": 22.7,
"learning_rate": 1.2177928733166488e-05,
"loss": 0.9351,
"step": 63500
},
{
"epoch": 22.7,
"eval_accuracy": 0.752010292698617,
"eval_loss": 1.2570440769195557,
"eval_runtime": 0.785,
"eval_samples_per_second": 636.94,
"eval_steps_per_second": 40.764,
"step": 63500
},
{
"epoch": 22.88,
"learning_rate": 1.1879990465975451e-05,
"loss": 0.9211,
"step": 64000
},
{
"epoch": 22.88,
"eval_accuracy": 0.75,
"eval_loss": 1.2638896703720093,
"eval_runtime": 0.8753,
"eval_samples_per_second": 571.265,
"eval_steps_per_second": 36.561,
"step": 64000
},
{
"epoch": 23.06,
"learning_rate": 1.1582052198784414e-05,
"loss": 0.9211,
"step": 64500
},
{
"epoch": 23.06,
"eval_accuracy": 0.7605543022881083,
"eval_loss": 1.2376515865325928,
"eval_runtime": 0.7946,
"eval_samples_per_second": 629.265,
"eval_steps_per_second": 40.273,
"step": 64500
},
{
"epoch": 23.24,
"learning_rate": 1.1284113931593374e-05,
"loss": 0.9196,
"step": 65000
},
{
"epoch": 23.24,
"eval_accuracy": 0.7485168094924193,
"eval_loss": 1.2738728523254395,
"eval_runtime": 0.8576,
"eval_samples_per_second": 583.036,
"eval_steps_per_second": 37.314,
"step": 65000
},
{
"epoch": 23.42,
"learning_rate": 1.098677154093672e-05,
"loss": 0.9062,
"step": 65500
},
{
"epoch": 23.42,
"eval_accuracy": 0.7365366010964205,
"eval_loss": 1.3262896537780762,
"eval_runtime": 0.8401,
"eval_samples_per_second": 595.164,
"eval_steps_per_second": 38.09,
"step": 65500
},
{
"epoch": 23.6,
"learning_rate": 1.068883327374568e-05,
"loss": 0.8965,
"step": 66000
},
{
"epoch": 23.6,
"eval_accuracy": 0.7455209024552091,
"eval_loss": 1.2814128398895264,
"eval_runtime": 0.778,
"eval_samples_per_second": 642.691,
"eval_steps_per_second": 41.132,
"step": 66000
},
{
"epoch": 23.78,
"learning_rate": 1.0392086759623406e-05,
"loss": 0.9004,
"step": 66500
},
{
"epoch": 23.78,
"eval_accuracy": 0.7561779242174629,
"eval_loss": 1.2108628749847412,
"eval_runtime": 0.8669,
"eval_samples_per_second": 576.736,
"eval_steps_per_second": 36.911,
"step": 66500
},
{
"epoch": 23.95,
"learning_rate": 1.0094148492432369e-05,
"loss": 0.9094,
"step": 67000
},
{
"epoch": 23.95,
"eval_accuracy": 0.7528089887640449,
"eval_loss": 1.2629289627075195,
"eval_runtime": 0.8653,
"eval_samples_per_second": 577.859,
"eval_steps_per_second": 36.983,
"step": 67000
},
{
"epoch": 24.13,
"learning_rate": 9.79621022524133e-06,
"loss": 0.8937,
"step": 67500
},
{
"epoch": 24.13,
"eval_accuracy": 0.7375168690958165,
"eval_loss": 1.2770532369613647,
"eval_runtime": 0.8492,
"eval_samples_per_second": 588.814,
"eval_steps_per_second": 37.684,
"step": 67500
},
{
"epoch": 24.31,
"learning_rate": 9.498271958050292e-06,
"loss": 0.8711,
"step": 68000
},
{
"epoch": 24.31,
"eval_accuracy": 0.7353233830845771,
"eval_loss": 1.3746039867401123,
"eval_runtime": 0.7929,
"eval_samples_per_second": 630.629,
"eval_steps_per_second": 40.36,
"step": 68000
},
{
"epoch": 24.49,
"learning_rate": 9.200333690859255e-06,
"loss": 0.8972,
"step": 68500
},
{
"epoch": 24.49,
"eval_accuracy": 0.7453750420450723,
"eval_loss": 1.2529133558273315,
"eval_runtime": 0.8497,
"eval_samples_per_second": 588.462,
"eval_steps_per_second": 37.662,
"step": 68500
},
{
"epoch": 24.67,
"learning_rate": 8.902395423668217e-06,
"loss": 0.8863,
"step": 69000
},
{
"epoch": 24.67,
"eval_accuracy": 0.7359154929577465,
"eval_loss": 1.3219196796417236,
"eval_runtime": 0.8149,
"eval_samples_per_second": 613.598,
"eval_steps_per_second": 39.27,
"step": 69000
},
{
"epoch": 24.85,
"learning_rate": 8.604457156477178e-06,
"loss": 0.8823,
"step": 69500
},
{
"epoch": 24.85,
"eval_accuracy": 0.7367235275185066,
"eval_loss": 1.313620924949646,
"eval_runtime": 0.8311,
"eval_samples_per_second": 601.621,
"eval_steps_per_second": 38.504,
"step": 69500
},
{
"epoch": 25.03,
"learning_rate": 8.306518889286139e-06,
"loss": 0.8759,
"step": 70000
},
{
"epoch": 25.03,
"eval_accuracy": 0.7427812811151676,
"eval_loss": 1.3151708841323853,
"eval_runtime": 0.7986,
"eval_samples_per_second": 626.093,
"eval_steps_per_second": 40.07,
"step": 70000
},
{
"epoch": 25.21,
"learning_rate": 8.008580622095102e-06,
"loss": 0.8722,
"step": 70500
},
{
"epoch": 25.21,
"eval_accuracy": 0.7569644572526417,
"eval_loss": 1.3108021020889282,
"eval_runtime": 0.8281,
"eval_samples_per_second": 603.782,
"eval_steps_per_second": 38.642,
"step": 70500
},
{
"epoch": 25.38,
"learning_rate": 7.710642354904064e-06,
"loss": 0.8548,
"step": 71000
},
{
"epoch": 25.38,
"eval_accuracy": 0.7367716008037508,
"eval_loss": 1.3503183126449585,
"eval_runtime": 0.7871,
"eval_samples_per_second": 635.233,
"eval_steps_per_second": 40.655,
"step": 71000
},
{
"epoch": 25.56,
"learning_rate": 7.412704087713027e-06,
"loss": 0.8728,
"step": 71500
},
{
"epoch": 25.56,
"eval_accuracy": 0.7402768622280818,
"eval_loss": 1.3091211318969727,
"eval_runtime": 0.8581,
"eval_samples_per_second": 582.712,
"eval_steps_per_second": 37.294,
"step": 71500
},
{
"epoch": 25.74,
"learning_rate": 7.114765820521989e-06,
"loss": 0.8633,
"step": 72000
},
{
"epoch": 25.74,
"eval_accuracy": 0.7416481069042317,
"eval_loss": 1.2952070236206055,
"eval_runtime": 0.8515,
"eval_samples_per_second": 587.213,
"eval_steps_per_second": 37.582,
"step": 72000
},
{
"epoch": 25.92,
"learning_rate": 6.816827553330949e-06,
"loss": 0.8612,
"step": 72500
},
{
"epoch": 25.92,
"eval_accuracy": 0.7719072164948454,
"eval_loss": 1.1612097024917603,
"eval_runtime": 0.7967,
"eval_samples_per_second": 627.618,
"eval_steps_per_second": 40.168,
"step": 72500
},
{
"epoch": 26.1,
"learning_rate": 6.5194851626742935e-06,
"loss": 0.8677,
"step": 73000
},
{
"epoch": 26.1,
"eval_accuracy": 0.7449731903485255,
"eval_loss": 1.2855061292648315,
"eval_runtime": 0.8112,
"eval_samples_per_second": 616.391,
"eval_steps_per_second": 39.449,
"step": 73000
},
{
"epoch": 26.28,
"learning_rate": 6.2221427720176384e-06,
"loss": 0.8526,
"step": 73500
},
{
"epoch": 26.28,
"eval_accuracy": 0.7544929396662388,
"eval_loss": 1.297914981842041,
"eval_runtime": 0.8472,
"eval_samples_per_second": 590.203,
"eval_steps_per_second": 37.773,
"step": 73500
},
{
"epoch": 26.46,
"learning_rate": 5.9242045048266e-06,
"loss": 0.8594,
"step": 74000
},
{
"epoch": 26.46,
"eval_accuracy": 0.7598070739549839,
"eval_loss": 1.2569819688796997,
"eval_runtime": 0.7923,
"eval_samples_per_second": 631.066,
"eval_steps_per_second": 40.388,
"step": 74000
},
{
"epoch": 26.64,
"learning_rate": 5.626266237635562e-06,
"loss": 0.8481,
"step": 74500
},
{
"epoch": 26.64,
"eval_accuracy": 0.7491992312620115,
"eval_loss": 1.2336714267730713,
"eval_runtime": 0.8668,
"eval_samples_per_second": 576.839,
"eval_steps_per_second": 36.918,
"step": 74500
},
{
"epoch": 26.81,
"learning_rate": 5.3283279704445245e-06,
"loss": 0.855,
"step": 75000
},
{
"epoch": 26.81,
"eval_accuracy": 0.7443507588532884,
"eval_loss": 1.2874828577041626,
"eval_runtime": 0.7926,
"eval_samples_per_second": 630.803,
"eval_steps_per_second": 40.371,
"step": 75000
},
{
"epoch": 26.99,
"learning_rate": 5.030389703253486e-06,
"loss": 0.835,
"step": 75500
},
{
"epoch": 26.99,
"eval_accuracy": 0.7584731819677526,
"eval_loss": 1.2270281314849854,
"eval_runtime": 0.8172,
"eval_samples_per_second": 611.826,
"eval_steps_per_second": 39.157,
"step": 75500
},
{
"epoch": 27.17,
"learning_rate": 4.732451436062448e-06,
"loss": 0.8309,
"step": 76000
},
{
"epoch": 27.17,
"eval_accuracy": 0.7389322916666666,
"eval_loss": 1.2539992332458496,
"eval_runtime": 0.8357,
"eval_samples_per_second": 598.292,
"eval_steps_per_second": 38.291,
"step": 76000
},
{
"epoch": 27.35,
"learning_rate": 4.43451316887141e-06,
"loss": 0.8326,
"step": 76500
},
{
"epoch": 27.35,
"eval_accuracy": 0.7374631268436578,
"eval_loss": 1.3610546588897705,
"eval_runtime": 0.7953,
"eval_samples_per_second": 628.676,
"eval_steps_per_second": 40.235,
"step": 76500
},
{
"epoch": 27.53,
"learning_rate": 4.136574901680372e-06,
"loss": 0.8398,
"step": 77000
},
{
"epoch": 27.53,
"eval_accuracy": 0.7504918032786885,
"eval_loss": 1.2247506380081177,
"eval_runtime": 0.859,
"eval_samples_per_second": 582.099,
"eval_steps_per_second": 37.254,
"step": 77000
},
{
"epoch": 27.71,
"learning_rate": 3.838636634489334e-06,
"loss": 0.8304,
"step": 77500
},
{
"epoch": 27.71,
"eval_accuracy": 0.7607282184655396,
"eval_loss": 1.2403171062469482,
"eval_runtime": 0.9471,
"eval_samples_per_second": 527.922,
"eval_steps_per_second": 33.787,
"step": 77500
},
{
"epoch": 27.89,
"learning_rate": 3.5406983672982957e-06,
"loss": 0.8373,
"step": 78000
},
{
"epoch": 27.89,
"eval_accuracy": 0.7611295681063123,
"eval_loss": 1.1708660125732422,
"eval_runtime": 0.8284,
"eval_samples_per_second": 603.609,
"eval_steps_per_second": 38.631,
"step": 78000
},
{
"epoch": 28.07,
"learning_rate": 3.2427601001072583e-06,
"loss": 0.8462,
"step": 78500
},
{
"epoch": 28.07,
"eval_accuracy": 0.7508185985592666,
"eval_loss": 1.289104700088501,
"eval_runtime": 0.8603,
"eval_samples_per_second": 581.16,
"eval_steps_per_second": 37.194,
"step": 78500
},
{
"epoch": 28.24,
"learning_rate": 2.945417709450602e-06,
"loss": 0.8259,
"step": 79000
},
{
"epoch": 28.24,
"eval_accuracy": 0.7500814597588791,
"eval_loss": 1.2452012300491333,
"eval_runtime": 0.8046,
"eval_samples_per_second": 621.394,
"eval_steps_per_second": 39.769,
"step": 79000
},
{
"epoch": 28.42,
"learning_rate": 2.647479442259564e-06,
"loss": 0.8334,
"step": 79500
},
{
"epoch": 28.42,
"eval_accuracy": 0.746810598626104,
"eval_loss": 1.2985996007919312,
"eval_runtime": 0.9197,
"eval_samples_per_second": 543.676,
"eval_steps_per_second": 34.795,
"step": 79500
},
{
"epoch": 28.6,
"learning_rate": 2.349541175068526e-06,
"loss": 0.8115,
"step": 80000
},
{
"epoch": 28.6,
"eval_accuracy": 0.7514638906961614,
"eval_loss": 1.2879589796066284,
"eval_runtime": 0.7986,
"eval_samples_per_second": 626.129,
"eval_steps_per_second": 40.072,
"step": 80000
},
{
"epoch": 28.78,
"learning_rate": 2.0516029078774876e-06,
"loss": 0.8205,
"step": 80500
},
{
"epoch": 28.78,
"eval_accuracy": 0.75615359369872,
"eval_loss": 1.2727956771850586,
"eval_runtime": 0.8652,
"eval_samples_per_second": 577.899,
"eval_steps_per_second": 36.986,
"step": 80500
},
{
"epoch": 28.96,
"learning_rate": 1.7536646406864498e-06,
"loss": 0.8261,
"step": 81000
},
{
"epoch": 28.96,
"eval_accuracy": 0.7523561910952227,
"eval_loss": 1.2660555839538574,
"eval_runtime": 0.7893,
"eval_samples_per_second": 633.494,
"eval_steps_per_second": 40.544,
"step": 81000
},
{
"epoch": 29.14,
"learning_rate": 1.4563222500297937e-06,
"loss": 0.8299,
"step": 81500
},
{
"epoch": 29.14,
"eval_accuracy": 0.7486106570774763,
"eval_loss": 1.25924813747406,
"eval_runtime": 0.8513,
"eval_samples_per_second": 587.342,
"eval_steps_per_second": 37.59,
"step": 81500
},
{
"epoch": 29.32,
"learning_rate": 1.1583839828387559e-06,
"loss": 0.8276,
"step": 82000
},
{
"epoch": 29.32,
"eval_accuracy": 0.7529644268774703,
"eval_loss": 1.2325080633163452,
"eval_runtime": 0.8587,
"eval_samples_per_second": 582.291,
"eval_steps_per_second": 37.267,
"step": 82000
},
{
"epoch": 29.5,
"learning_rate": 8.604457156477178e-07,
"loss": 0.8112,
"step": 82500
},
{
"epoch": 29.5,
"eval_accuracy": 0.7477890599410416,
"eval_loss": 1.3154096603393555,
"eval_runtime": 0.8166,
"eval_samples_per_second": 612.267,
"eval_steps_per_second": 39.185,
"step": 82500
},
{
"epoch": 29.67,
"learning_rate": 5.625074484566799e-07,
"loss": 0.8111,
"step": 83000
},
{
"epoch": 29.67,
"eval_accuracy": 0.740531561461794,
"eval_loss": 1.3342524766921997,
"eval_runtime": 0.8076,
"eval_samples_per_second": 619.083,
"eval_steps_per_second": 39.621,
"step": 83000
},
{
"epoch": 29.85,
"learning_rate": 2.645691812656418e-07,
"loss": 0.8148,
"step": 83500
},
{
"epoch": 29.85,
"eval_accuracy": 0.7484622855292975,
"eval_loss": 1.2806158065795898,
"eval_runtime": 0.8122,
"eval_samples_per_second": 615.596,
"eval_steps_per_second": 39.398,
"step": 83500
},
{
"epoch": 30.0,
"step": 83910,
"total_flos": 3.583580261367381e+17,
"train_loss": 1.1746184680817338,
"train_runtime": 16410.0948,
"train_samples_per_second": 163.619,
"train_steps_per_second": 5.113
}
],
"max_steps": 83910,
"num_train_epochs": 30,
"total_flos": 3.583580261367381e+17,
"trial_name": null,
"trial_params": null
}